Papers
- Back
Deterministic Image-to-Image Translation via Denoising Brownian Bridge Models with Dual Approximators-
[pdf]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Bohan and Wang, Peiyong and He, Qisheng and Dong, Ming}, title = {Deterministic Image-to-Image Translation via Denoising Brownian Bridge Models with Dual Approximators}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28232-28241} }
Towards Source-Free Machine Unlearning-
[pdf]
[supp]
[bibtex]@InProceedings{Ahmed_2025_CVPR, author = {Ahmed, Sk Miraj and Basaran, Umit Yigit and Raychaudhuri, Dripta S. and Dutta, Arindam and Kundu, Rohit and Niloy, Fahim Faisal and Guler, Basak and Roy-Chowdhury, Amit K.}, title = {Towards Source-Free Machine Unlearning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4948-4957} }
Uni4D: Unifying Visual Foundation Models for 4D Modeling from a Single Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_CVPR, author = {Yao, David Yifan and Zhai, Albert J. and Wang, Shenlong}, title = {Uni4D: Unifying Visual Foundation Models for 4D Modeling from a Single Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1116-1126} }
DynScene: Scalable Generation of Dynamic Robotic Manipulation Scenes for Embodied AI-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Sangmin and Park, Sungyong and Kim, Heewon}, title = {DynScene: Scalable Generation of Dynamic Robotic Manipulation Scenes for Embodied AI}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12166-12175} }
DiffLocks: Generating 3D Hair from a Single Image using Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rosu_2025_CVPR, author = {Rosu, Radu Alexandru and Wu, Keyu and Feng, Yao and Zheng, Youyi and Black, Michael J.}, title = {DiffLocks: Generating 3D Hair from a Single Image using Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10847-10857} }
Hyperbolic Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuanpei and He, Zhenqi and Han, Kai}, title = {Hyperbolic Category Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9891-9900} }
The Language of Motion: Unifying Verbal and Non-verbal Language of 3D Human Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Changan and Zhang, Juze and Lakshmikanth, Shrinidhi K. and Fang, Yusu and Shao, Ruizhi and Wetzstein, Gordon and Fei-Fei, Li and Adeli, Ehsan}, title = {The Language of Motion: Unifying Verbal and Non-verbal Language of 3D Human Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6200-6211} }
CALICO: Part-Focused Semantic Co-Segmentation with Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Kiet A. and Juvekar, Adheesh and Yu, Tianjiao and Wahed, Muntasir and Lourentzou, Ismini}, title = {CALICO: Part-Focused Semantic Co-Segmentation with Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4550-4561} }
Task Preference Optimization: Improving Multimodal Large Language Models with Vision Task Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Ziang and Li, Zhilin and He, Yinan and Wang, Chenting and Li, Kunchang and Li, Xinhao and Zeng, Xiangyu and Wang, Zilei and Wang, Yali and Qiao, Yu and Wang, Limin and Wang, Yi}, title = {Task Preference Optimization: Improving Multimodal Large Language Models with Vision Task Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29880-29892} }
Cross-modal Causal Relation Alignment for Video Question Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Weixing and Liu, Yang and Chen, Binglin and Su, Jiandong and Zheng, Yongsen and Lin, Liang}, title = {Cross-modal Causal Relation Alignment for Video Question Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24087-24096} }
Words or Vision: Do Vision-Language Models Have Blind Faith in Text?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Ailin and Cao, Tri and Chen, Zhirui and Hooi, Bryan}, title = {Words or Vision: Do Vision-Language Models Have Blind Faith in Text?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3867-3876} }
Diffusion Renderer: Neural Inverse and Forward Rendering with Video Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Ruofan and Gojcic, Zan and Ling, Huan and Munkberg, Jacob and Hasselgren, Jon and Lin, Chih-Hao and Gao, Jun and Keller, Alexander and Vijaykumar, Nandita and Fidler, Sanja and Wang, Zian}, title = {Diffusion Renderer: Neural Inverse and Forward Rendering with Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26069-26080} }
Harnessing Frequency Spectrum Insights for Image Copyright Protection Against Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhenguang and Shuai, Chao and Fan, Shaojing and Dong, Ziping and Hu, Jinwu and Ba, Zhongjie and Ren, Kui}, title = {Harnessing Frequency Spectrum Insights for Image Copyright Protection Against Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18653-18662} }
Learning to Detect Objects from Multi-Agent LiDAR Scans without Manual Labels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Qiming and Lin, Wenkai and Xiang, Haoen and Huang, Xun and Chen, Siheng and Dong, Zhen and Wang, Cheng and Wen, Chenglu}, title = {Learning to Detect Objects from Multi-Agent LiDAR Scans without Manual Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1418-1428} }
DeepLA-Net: Very Deep Local Aggregation Networks for Point Cloud Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2025_CVPR, author = {Zeng, Ziyin and Dong, Mingyue and Zhou, Jian and Qiu, Huan and Dong, Zhen and Luo, Man and Li, Bijun}, title = {DeepLA-Net: Very Deep Local Aggregation Networks for Point Cloud Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1330-1341} }
Multi-Layer Visual Feature Fusion in Multimodal LLMs: Methods, Analysis, and Best Practices-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Junyan and Chen, Haoran and Fan, Yue and Fan, Yingqi and Jin, Xin and Su, Hui and Fu, Jinlan and Shen, Xiaoyu}, title = {Multi-Layer Visual Feature Fusion in Multimodal LLMs: Methods, Analysis, and Best Practices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4156-4166} }
APHQ-ViT: Post-Training Quantization with Average Perturbation Hessian Based Reconstruction for Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Zhuguanyu and Zhang, Jiayi and Chen, Jiaxin and Guo, Jinyang and Huang, Di and Wang, Yunhong}, title = {APHQ-ViT: Post-Training Quantization with Average Perturbation Hessian Based Reconstruction for Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9686-9695} }
AdaptCMVC: Robust Adaption to Incremental Views in Continual Multi-view Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jing and Feng, Songhe and Wickstr{\o}m, Kristoffer Knutsen and Kampffmeyer, Michael C.}, title = {AdaptCMVC: Robust Adaption to Incremental Views in Continual Multi-view Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10285-10294} }
Omni-Scene: Omni-Gaussian Representation for Ego-Centric Sparse-View Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Dongxu and Li, Zhiqi and Liu, Peidong}, title = {Omni-Scene: Omni-Gaussian Representation for Ego-Centric Sparse-View Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22317-22327} }
3DTopia-XL: Scaling High-quality 3D Asset Generation via Primitive Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zhaoxi and Tang, Jiaxiang and Dong, Yuhao and Cao, Ziang and Hong, Fangzhou and Lan, Yushi and Wang, Tengfei and Xie, Haozhe and Wu, Tong and Saito, Shunsuke and Pan, Liang and Lin, Dahua and Liu, Ziwei}, title = {3DTopia-XL: Scaling High-quality 3D Asset Generation via Primitive Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26576-26586} }
UA-Pose: Uncertainty-Aware 6D Object Pose Estimation and Online Object Completion with Partial References-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Ming-Feng and Yang, Xin and Wang, Fu-En and Basak, Hritam and Sun, Yuyin and Gayaka, Shreekant and Sun, Min and Kuo, Cheng-Hao}, title = {UA-Pose: Uncertainty-Aware 6D Object Pose Estimation and Online Object Completion with Partial References}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1180-1189} }
Missing Target-Relevant Information Prediction with World Model for Accurate Zero-Shot Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Yuanmin and Yu, Jing and Gai, Keke and Zhuang, Jiamin and Xiong, Gang and Gou, Gaopeng and Wu, Qi}, title = {Missing Target-Relevant Information Prediction with World Model for Accurate Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24785-24795} }
Binarized Mamba-Transformer for Lightweight Quad Bayer HybridEVS Demosaicing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Shiyang and Zeng, Haijin and Lu, Yunfan and Shao, Tong and Tang, Ke and Chen, Yongyong and Liu, Jie and Su, Jingyong}, title = {Binarized Mamba-Transformer for Lightweight Quad Bayer HybridEVS Demosaicing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8817-8827} }
DiffSensei: Bridging Multi-Modal LLMs and Diffusion Models for Customized Manga Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Jianzong and Tang, Chao and Wang, Jingbo and Zeng, Yanhong and Li, Xiangtai and Tong, Yunhai}, title = {DiffSensei: Bridging Multi-Modal LLMs and Diffusion Models for Customized Manga Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28684-28693} }
Narrating the Video: Boosting Text-Video Retrieval via Comprehensive Utilization of Frame-Level Captions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hur_2025_CVPR, author = {Hur, Chan and Hong, Jeong-hun and Lee, Dong-hun and Kang, Dabin and Myeong, Semin and Park, Sang-hyo and Park, Hyeyoung}, title = {Narrating the Video: Boosting Text-Video Retrieval via Comprehensive Utilization of Frame-Level Captions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24077-24086} }
IDEA-Bench: How Far are Generative Models from Professional Designing?-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Chen and Huang, Lianghua and Fang, Jingwu and Dou, Huanzhang and Wang, Wei and Wu, Zhi-Fan and Shi, Yupeng and Zhang, Junge and Zhao, Xin and Liu, Yu}, title = {IDEA-Bench: How Far are Generative Models from Professional Designing?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18541-18551} }
Interpretable Image Classification via Non-parametric Part Prototype Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Zhijie and Fan, Lei and Pagnucco, Maurice and Song, Yang}, title = {Interpretable Image Classification via Non-parametric Part Prototype Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9762-9771} }
PhD: A ChatGPT-Prompted Visual Hallucination Evaluation Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jiazhen and Fu, Yuhan and Xie, Ruobing and Xie, Runquan and Sun, Xingwu and Lian, Fengzong and Kang, Zhanhui and Li, Xirong}, title = {PhD: A ChatGPT-Prompted Visual Hallucination Evaluation Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19857-19866} }
CARL: A Framework for Equivariant Image Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Greer_2025_CVPR, author = {Greer, Hastings and Tian, Lin and Vialard, Fran\c{c}ois-Xavier and Kwitt, Roland and Estepar, Raul San Jose and Niethammer, Marc}, title = {CARL: A Framework for Equivariant Image Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26014-26023} }
ClimbingCap: Multi-Modal Dataset and Method for Rock Climbing in World Coordinate-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Ming and Lin, Xincheng and Luo, Yuhua and Fan, Shuqi and Dai, Yudi and Zhong, Qixin and Zhong, Lincai and Ma, Yuexin and Xu, Lan and Wen, Chenglu and Shen, Siqi and Wang, Cheng}, title = {ClimbingCap: Multi-Modal Dataset and Method for Rock Climbing in World Coordinate}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12312-12323} }
DAGSM: Disentangled Avatar Generation with GS-enhanced Mesh-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2025_CVPR, author = {Zhuang, Jingyu and Kang, Di and Bao, Linchao and Lin, Liang and Li, Guanbin}, title = {DAGSM: Disentangled Avatar Generation with GS-enhanced Mesh}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {292-303} }
Estimating Body and Hand Motion in an Ego-sensed World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yi_2025_CVPR, author = {Yi, Brent and Ye, Vickie and Zheng, Maya and Li, Yunqi and M\"uller, Lea and Pavlakos, Georgios and Ma, Yi and Malik, Jitendra and Kanazawa, Angjoo}, title = {Estimating Body and Hand Motion in an Ego-sensed World}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7072-7084} }
A Bias-Free Training Paradigm for More General AI-generated Image Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guillaro_2025_CVPR, author = {Guillaro, Fabrizio and Zingarini, Giada and Usman, Ben and Sud, Avneesh and Cozzolino, Davide and Verdoliva, Luisa}, title = {A Bias-Free Training Paradigm for More General AI-generated Image Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18685-18694} }
FALCON: Fairness Learning via Contrastive Attention Approach to Continual Semantic Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Truong_2025_CVPR, author = {Truong, Thanh-Dat and Prabhu, Utsav and Raj, Bhiksha and Cothren, Jackson and Luu, Khoa}, title = {FALCON: Fairness Learning via Contrastive Attention Approach to Continual Semantic Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15065-15075} }
Certified Human Trajectory Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bahari_2025_CVPR, author = {Bahari, Mohammadhossein and Saadatnejad, Saeed and Farsangi, Amirhossein Askari and Moosavi-Dezfooli, Seyed-Mohsen and Alahi, Alexandre}, title = {Certified Human Trajectory Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12301-12311} }
Evaluating Vision-Language Models as Evaluators in Path Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aghzal_2025_CVPR, author = {Aghzal, Mohamed and Yue, Xiang and Plaku, Erion and Yao, Ziyu}, title = {Evaluating Vision-Language Models as Evaluators in Path Planning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6886-6897} }
Free on the Fly: Enhancing Flexibility in Test-Time Adaptation with Online EM-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Qiyuan and Yang, Sibei}, title = {Free on the Fly: Enhancing Flexibility in Test-Time Adaptation with Online EM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9538-9548} }
Transformers without Normalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jiachen and Chen, Xinlei and He, Kaiming and LeCun, Yann and Liu, Zhuang}, title = {Transformers without Normalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14901-14911} }
SGC-Net: Stratified Granular Comparison Network for Open-Vocabulary HOI Detection-
[pdf]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Xin and Shi, Chong and Yang, Zuopeng and Tang, Haojin and Zhou, Zhili}, title = {SGC-Net: Stratified Granular Comparison Network for Open-Vocabulary HOI Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4539-4549} }
Galaxy Walker: Geometry-aware VLMs For Galaxy-scale Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Tianyu and Fu, Xingcheng and Gao, Yisen and Qian, Haodong and Wei, Yuecen and Yan, Kun and Zhou, Haoyi and Li, Jianxin}, title = {Galaxy Walker: Geometry-aware VLMs For Galaxy-scale Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4112-4121} }
HiPART: Hierarchical Pose AutoRegressive Transformer for Occluded 3D Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Hongwei and Li, Han and Dai, Wenrui and Zheng, Ziyang and Li, Chenglin and Zou, Junni and Xiong, Hongkai}, title = {HiPART: Hierarchical Pose AutoRegressive Transformer for Occluded 3D Human Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16807-16817} }
SnowMaster: Comprehensive Real-world Image Desnowing via MLLM with Multi-Model Feedback Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Jianyu and Chen, Sixiang and Lin, Yunlong and Ye, Tian and Liu, Yun and Fei, Song and Xing, Zhaohu and Wu, Hongtao and Wang, Weiming and Zhu, Lei}, title = {SnowMaster: Comprehensive Real-world Image Desnowing via MLLM with Multi-Model Feedback Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4302-4312} }
From Faces to Voices: Learning Hierarchical Representations for High-quality Video-to-Speech-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Ji-Hoon and Choi, Jeongsoo and Kim, Jaehun and Jung, Chaeyoung and Chung, Joon Son}, title = {From Faces to Voices: Learning Hierarchical Representations for High-quality Video-to-Speech}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15874-15884} }
DFM: Differentiable Feature Matching for Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Sheng and Wang, Yimi and Liu, Xudong and Yang, Yuguang and Wang, Runqi and Guo, Guodong and Doermann, David and Zhang, Baochang}, title = {DFM: Differentiable Feature Matching for Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15224-15233} }
FlashGS: Efficient 3D Gaussian Splatting for Large-scale and High-resolution Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Guofeng and Chen, Siyan and Fu, Rong and Liao, Zimu and Wang, Yi and Liu, Tao and Hu, Boni and Xu, Linning and Pei, Zhilin and Li, Hengjie and Li, Xiuhong and Sun, Ninghui and Zhang, Xingcheng and Dai, Bo}, title = {FlashGS: Efficient 3D Gaussian Splatting for Large-scale and High-resolution Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26652-26662} }
PointSR: Self-Regularized Point Supervision for Drone-View Object Detection-
[pdf]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Weizhuo and Xi, Yue and Jia, Wenjing and Zhang, Zehao and Li, Fei and Liu, Xiangzeng and Miao, Qiguang}, title = {PointSR: Self-Regularized Point Supervision for Drone-View Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11707-11716} }
Exploring Timeline Control for Facial Motion Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Yifeng and Qi, Jinwei and Ji, Chaonan and Zhang, Peng and Zhang, Bang and Deng, Zhidong and Bo, Liefeng}, title = {Exploring Timeline Control for Facial Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1940-1950} }
v-CLR: View-Consistent Learning for Open-World Instance Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chang-Bin and Ni, Jinhong and Zhong, Yujie and Han, Kai}, title = {v-CLR: View-Consistent Learning for Open-World Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20307-20317} }
Chat2SVG: Vector Graphics Generation with Large Language Models and Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Ronghuan and Su, Wanchao and Liao, Jing}, title = {Chat2SVG: Vector Graphics Generation with Large Language Models and Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23690-23700} }
GAF: Gaussian Avatar Reconstruction from Monocular Videos via Multi-view Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Jiapeng and Davoli, Davide and Kirschstein, Tobias and Schoneveld, Liam and Nie{\ss}ner, Matthias}, title = {GAF: Gaussian Avatar Reconstruction from Monocular Videos via Multi-view Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5546-5558} }
Reloc3r: Large-Scale Training of Relative Camera Pose Regression for Generalizable, Fast, and Accurate Visual Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Siyan and Wang, Shuzhe and Liu, Shaohui and Cai, Lulu and Fan, Qingnan and Kannala, Juho and Yang, Yanchao}, title = {Reloc3r: Large-Scale Training of Relative Camera Pose Regression for Generalizable, Fast, and Accurate Visual Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16739-16752} }
AI-Face: A Million-Scale Demographically Annotated AI-Generated Face Dataset and Fairness Benchmark-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Li and Santosh, Santosh and Wu, Mingyang and Wang, Xin and Hu, Shu}, title = {AI-Face: A Million-Scale Demographically Annotated AI-Generated Face Dataset and Fairness Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3503-3515} }
Inference-Scale Complexity in ANN-SNN Conversion for High-Performance and Low-Power Applications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bu_2025_CVPR, author = {Bu, Tong and Li, Maohua and Yu, Zhaofei}, title = {Inference-Scale Complexity in ANN-SNN Conversion for High-Performance and Low-Power Applications}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24387-24397} }
Janus: Decoupling Visual Encoding for Unified Multimodal Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Chengyue and Chen, Xiaokang and Wu, Zhiyu and Ma, Yiyang and Liu, Xingchao and Pan, Zizheng and Liu, Wen and Xie, Zhenda and Yu, Xingkai and Ruan, Chong and Luo, Ping}, title = {Janus: Decoupling Visual Encoding for Unified Multimodal Understanding and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12966-12977} }
MVDoppler-Pose: Multi-Modal Multi-View mmWave Sensing for Long-Distance Self-Occluded Human Walking Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2025_CVPR, author = {Choi, Jaeho and Hor, Soheil and Yang, Shubo and Arbabian, Amin}, title = {MVDoppler-Pose: Multi-Modal Multi-View mmWave Sensing for Long-Distance Self-Occluded Human Walking Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27750-27759} }
TopNet: Transformer-Efficient Occupancy Prediction Network for Octree-Structured Point Cloud Geometry Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xinjie and Zhang, Yifan and Liu, Ting and Liu, Xinpu and Xu, Ke and Wan, Jianwei and Guo, Yulan and Wang, Hanyun}, title = {TopNet: Transformer-Efficient Occupancy Prediction Network for Octree-Structured Point Cloud Geometry Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27305-27314} }
MagicArticulate: Make Your 3D Models Articulation-Ready-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Chaoyue and Zhang, Jianfeng and Li, Xiu and Yang, Fan and Chen, Yiwen and Xu, Zhongcong and Liew, Jun Hao and Guo, Xiaoyang and Liu, Fayao and Feng, Jiashi and Lin, Guosheng}, title = {MagicArticulate: Make Your 3D Models Articulation-Ready}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15998-16007} }
Gain from Neighbors: Boosting Model Robustness in the Wild via Adversarial Perturbations Toward Neighboring Classes-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Zhou and Feng, Mingtao and Huang, Tao and Wu, Fangfang and Dong, Weisheng and Li, Xin and Shi, Guangming}, title = {Gain from Neighbors: Boosting Model Robustness in the Wild via Adversarial Perturbations Toward Neighboring Classes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25497-25507} }
Enhancing Video-LLM Reasoning via Agent-of-Thoughts Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Yudi and Di, Shangzhe and Chen, Qirui and Xie, Weidi}, title = {Enhancing Video-LLM Reasoning via Agent-of-Thoughts Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8523-8533} }
De^2Gaze: Deformable and Decoupled Representation Learning for 3D Gaze Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Yunfeng and Bai, Xiaowei and Chen, Baojun and Su, Hao and He, Hao and Xie, Liang and Yin, Erwei}, title = {De{\textasciicircum}2Gaze: Deformable and Decoupled Representation Learning for 3D Gaze Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3091-3100} }
ReCapture: Generative Video Camera Controls for User-Provided Videos using Masked Video Fine-Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, David Junhao and Paiss, Roni and Zada, Shiran and Karnad, Nikhil and Jacobs, David E. and Pritch, Yael and Mosseri, Inbar and Shou, Mike Zheng and Wadhwa, Neal and Ruiz, Nataniel}, title = {ReCapture: Generative Video Camera Controls for User-Provided Videos using Masked Video Fine-Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2050-2062} }
M^3-VOS: Multi-Phase, Multi-Transition, and Multi-Scenery Video Object Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zixuan and Li, Jiaxin and Liang, Junxuan and Tan, Liming and Guo, Yejie and Lu, Cewu and Li, Yong-Lu}, title = {M{\textasciicircum}3-VOS: Multi-Phase, Multi-Transition, and Multi-Scenery Video Object Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29193-29202} }
Self-Expansion of Pre-trained Models with Mixture of Adapters for Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Huiyi and Lu, Haodong and Yao, Lina and Gong, Dong}, title = {Self-Expansion of Pre-trained Models with Mixture of Adapters for Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10087-10098} }
Dual Prompting Image Restoration with Diffusion Transformers-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kong_2025_CVPR, author = {Kong, Dehong and Li, Fan and Wang, Zhixin and Xu, Jiaqi and Pei, Renjing and Li, Wenbo and Ren, WenQi}, title = {Dual Prompting Image Restoration with Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12809-12819} }
Brain-Inspired Spiking Neural Networks for Energy-Efficient Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Ziqi and Gao, Tao and An, Yisheng and Chen, Ting and Zhang, Jing and Wen, Yuanbo and Liu, Mengkun and Zhang, Qianxi}, title = {Brain-Inspired Spiking Neural Networks for Energy-Efficient Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3552-3562} }
Medusa: A Multi-Scale High-order Contrastive Dual-Diffusion Approach for Multi-View Clustering-
[pdf]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Liang and Xue, Zhe and Li, Yawen and Liang, Meiyu and Wang, Yan and van den Hengel, Anton and Qi, Yuankai}, title = {Medusa: A Multi-Scale High-order Contrastive Dual-Diffusion Approach for Multi-View Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10295-10304} }
MambaOut: Do We Really Need Mamba for Vision?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Weihao and Wang, Xinchao}, title = {MambaOut: Do We Really Need Mamba for Vision?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4484-4496} }
Everything to the Synthetic: Diffusion-driven Test-time Adaptation via Synthetic-Domain Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Jiayi and Zhao, Junhao and Du, Chaoqun and Wang, Yulin and Ge, Chunjiang and Ni, Zanlin and Song, Shiji and Shi, Humphrey and Huang, Gao}, title = {Everything to the Synthetic: Diffusion-driven Test-time Adaptation via Synthetic-Domain Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30503-30513} }
Multi-Granularity Class Prototype Topology Distillation for Class-Incremental Source-Free Unsupervised Domain Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Peihua and Zhang, Jiehua and Sheng, Xichun and Yan, Chenggang and Sun, Yaoqi and Fu, Ying and Li, Liang}, title = {Multi-Granularity Class Prototype Topology Distillation for Class-Incremental Source-Free Unsupervised Domain Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30566-30576} }
DepthCues: Evaluating Monocular Depth Perception in Large Vision Models-
[pdf]
[supp]
[bibtex]@InProceedings{Danier_2025_CVPR, author = {Danier, Duolikun and Ayg\"un, Mehmet and Li, Changjian and Bilen, Hakan and Mac Aodha, Oisin}, title = {DepthCues: Evaluating Monocular Depth Perception in Large Vision Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20049-20059} }
A Polarization-Aided Transformer for Image Deblurring via Motion Vector Decomposition-
[pdf]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Duosheng and Zhou, Shihao and Pan, Jinshan and Shi, Jinglei and Qu, Lishen and Yang, Jufeng}, title = {A Polarization-Aided Transformer for Image Deblurring via Motion Vector Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28061-28070} }
SpecTRe-GS: Modeling Highly Specular Surfaces with Reflected Nearby Objects by Tracing Rays in 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Jiajun and Fei, Fan and Li, Zhihao and Tang, Xiao and Liu, Shiyong and Chen, Youyu and Huang, Binxiao and Chen, Zhenyu and Wu, Xiaofei and Shi, Boxin}, title = {SpecTRe-GS: Modeling Highly Specular Surfaces with Reflected Nearby Objects by Tracing Rays in 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16133-16142} }
Seurat: From Moving Points to Depth-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cho_2025_CVPR, author = {Cho, Seokju and Huang, Jiahui and Kim, Seungryong and Lee, Joon-Young}, title = {Seurat: From Moving Points to Depth}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7211-7221} }
AuraFusion360: Augmented Unseen Region Alignment for Reference-based 360deg Unbounded Scene Inpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Chung-Ho and Chen, Yang-Jung and Chen, Ying-Huan and Lee, Jie-Ying and Ke, Bo-Hsu and Mu, Chun-Wei Tuan and Huang, Yi-Chuan and Lin, Chin-Yang and Chen, Min-Hung and Lin, Yen-Yu and Liu, Yu-Lun}, title = {AuraFusion360: Augmented Unseen Region Alignment for Reference-based 360deg Unbounded Scene Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16366-16376} }
Language-Guided Image Tokenization for Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zha_2025_CVPR, author = {Zha, Kaiwen and Yu, Lijun and Fathi, Alireza and Ross, David A. and Schmid, Cordelia and Katabi, Dina and Gu, Xiuye}, title = {Language-Guided Image Tokenization for Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15713-15722} }
Img-Diff: Contrastive Data Synthesis for Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Jiao_2025_CVPR, author = {Jiao, Qirui and Chen, Daoyuan and Huang, Yilun and Ding, Bolin and Li, Yaliang and Shen, Ying}, title = {Img-Diff: Contrastive Data Synthesis for Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9296-9307} }
CocoER: Aligning Multi-Level Feature by Competition and Coordination for Emotion Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Xuli and Cai, Hua and Shen, Weilin and Xu, Qing and Yu, Dingding and Ge, Weifeng and Xue, Xiangyang}, title = {CocoER: Aligning Multi-Level Feature by Competition and Coordination for Emotion Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29591-29600} }
Hyperbolic Uncertainty-Aware Few-Shot Incremental Point Cloud Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Sur_2025_CVPR, author = {Sur, Tanuj and Mukherjee, Samrat and Rahaman, Kaizer and Chaudhuri, Subhasis and Khan, Muhammad Haris and Banerjee, Biplab}, title = {Hyperbolic Uncertainty-Aware Few-Shot Incremental Point Cloud Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11810-11821} }
Enhancing Creative Generation on Stable Diffusion-based Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Jiyeon and Kwon, Dahee and Lee, Gayoung and Kim, Junho and Choi, Jaesik}, title = {Enhancing Creative Generation on Stable Diffusion-based Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28609-28618} }
The Devil is in the Prompts: Retrieval-Augmented Prompt Optimization for Text-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Bingjie and Gao, Xinyu and Wu, Xiaoxue and Zhou, Yujie and Qiao, Yu and Niu, Li and Chen, Xinyuan and Wang, Yaohui}, title = {The Devil is in the Prompts: Retrieval-Augmented Prompt Optimization for Text-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3173-3183} }
Denoising Functional Maps: Diffusion Models for Shape Correspondence-
[pdf]
[supp]
[bibtex]@InProceedings{Zhuravlev_2025_CVPR, author = {Zhuravlev, Aleksei and L\"ahner, Zorah and Golyanik, Vladislav}, title = {Denoising Functional Maps: Diffusion Models for Shape Correspondence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26899-26909} }
ProReflow: Progressive Reflow with Decomposed Velocity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2025_CVPR, author = {Ke, Lei and Xu, Haohang and Ning, Xuefei and Li, Yu and Li, Jiajun and Li, Haoling and Lin, Yuxuan and Jiang, Dongsheng and Yang, Yujiu and Zhang, Linfeng}, title = {ProReflow: Progressive Reflow with Decomposed Velocity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28029-28038} }
DnLUT: Ultra-Efficient Color Image Denoising via Channel-Aware Lookup Tables-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Sidi and Huang, Binxiao and Zhang, Yulun and Yu, Dahai and Yang, Yujiu and Wong, Ngai}, title = {DnLUT: Ultra-Efficient Color Image Denoising via Channel-Aware Lookup Tables}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7582-7591} }
Devil is in the Detail: Towards Injecting Fine Details of Image Prompt in Image Generation via Conflict-free Guidance and Stratified Attention-
[pdf]
[supp]
[bibtex]@InProceedings{Jo_2025_CVPR, author = {Jo, Kyungmin and Yun, Jooyeol and Choo, Jaegul}, title = {Devil is in the Detail: Towards Injecting Fine Details of Image Prompt in Image Generation via Conflict-free Guidance and Stratified Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23595-23603} }
D^3-Human: Dynamic Disentangled Digital Human from Monocular Video-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Honghu and Peng, Bo and Tao, Yunfan and Zhang, Juyong}, title = {D{\textasciicircum}3-Human: Dynamic Disentangled Digital Human from Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10836-10846} }
BiM-VFI: Bidirectional Motion Field-Guided Frame Interpolation for Video with Non-uniform Motions-
[pdf]
[supp]
[bibtex]@InProceedings{Seo_2025_CVPR, author = {Seo, Wonyong and Oh, Jihyong and Kim, Munchurl}, title = {BiM-VFI: Bidirectional Motion Field-Guided Frame Interpolation for Video with Non-uniform Motions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7244-7253} }
Curriculum Coarse-to-Fine Selection for High-IPC Dataset Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Yanda and Chen, Gongwei and Zhang, Miao and Guan, Weili and Nie, Liqiang}, title = {Curriculum Coarse-to-Fine Selection for High-IPC Dataset Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20437-20446} }
BADGR: Bundle Adjustment Diffusion Conditioned by Gradients for Wide-Baseline Floor Plan Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yuguang and Boyadzhiev, Ivaylo and Liu, Zixuan and Shapiro, Linda and Colburn, Alex}, title = {BADGR: Bundle Adjustment Diffusion Conditioned by Gradients for Wide-Baseline Floor Plan Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16785-16795} }
Three Cars Approaching within 100m! Enhancing Distant Geometry by Tri-Axis Voxel Scanning for Camera-based Semantic Scene Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bae_2025_CVPR, author = {Bae, Jongseong and Ha, Junwoo and Kim, Ha Young}, title = {Three Cars Approaching within 100m! Enhancing Distant Geometry by Tri-Axis Voxel Scanning for Camera-based Semantic Scene Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11939-11948} }
MetaShadow: Object-Centered Shadow Detection, Removal, and Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Tianyu and Zhang, Jianming and Zheng, Haitian and Ding, Zhihong and Cohen, Scott and Lin, Zhe and Xiong, Wei and Fu, Chi-Wing and Figueroa, Luis and Kim, Soo Ye}, title = {MetaShadow: Object-Centered Shadow Detection, Removal, and Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28252-28262} }
TANGO: Training-free Embodied AI Agents for Open-world Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ziliotto_2025_CVPR, author = {Ziliotto, Filippo and Campari, Tommaso and Serafini, Luciano and Ballan, Lamberto}, title = {TANGO: Training-free Embodied AI Agents for Open-world Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24603-24613} }
SATA: Spatial Autocorrelation Token Analysis for Enhancing the Robustness of Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nikzad_2025_CVPR, author = {Nikzad, Nick and Liao, Yi and Gao, Yongsheng and Zhou, Jun}, title = {SATA: Spatial Autocorrelation Token Analysis for Enhancing the Robustness of Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9730-9739} }
DViN: Dynamic Visual Routing Network for Weakly Supervised Referring Expression Comprehension-
[pdf]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xiaofu and Luo, Yaxin and Luo, Gen and Ji, Jiayi and Ding, Henghui and Zhou, Yiyi}, title = {DViN: Dynamic Visual Routing Network for Weakly Supervised Referring Expression Comprehension}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14347-14357} }
Nested Diffusion Models Using Hierarchical Latent Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xiao and Jiang, Ruoxi and Willett, Rebecca and Maire, Michael}, title = {Nested Diffusion Models Using Hierarchical Latent Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2502-2512} }
A Theory of Learning Unified Model via Knowledge Integration from Label Space Varying Domains-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Dexuan and Westfechtel, Thomas and Harada, Tatsuya}, title = {A Theory of Learning Unified Model via Knowledge Integration from Label Space Varying Domains}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10142-10152} }
HiLoTs: High-Low Temporal Sensitive Representation Learning for Semi-Supervised LiDAR Segmentation in Autonomous Driving-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, R.D. and Weng, Pengcheng and Wang, Yinqiao and Ding, Han and Han, Jinsong and Wang, Fei}, title = {HiLoTs: High-Low Temporal Sensitive Representation Learning for Semi-Supervised LiDAR Segmentation in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1429-1438} }
Spiking Transformer with Spatial-Temporal Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Donghyun and Li, Yuhang and Kim, Youngeun and Xiao, Shiting and Panda, Priyadarshini}, title = {Spiking Transformer with Spatial-Temporal Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13948-13958} }
Perceptual Video Compression with Neural Wrapping-
[pdf]
[supp]
[bibtex]@InProceedings{Khan_2025_CVPR, author = {Khan, Muhammad Umar Karim and Chadha, Aaron and Anam, Mohammad Ashraful and Andreopoulos, Yiannis}, title = {Perceptual Video Compression with Neural Wrapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17743-17754} }
ViKIENet: Towards Efficient 3D Object Detection with Virtual Key Instance Enhanced Network-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Zhuochen and Qiu, Bijie and Khong, Andy W. H.}, title = {ViKIENet: Towards Efficient 3D Object Detection with Virtual Key Instance Enhanced Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11844-11853} }
DKDM: Data-Free Knowledge Distillation for Diffusion Models with Any Architecture-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2025_CVPR, author = {Xiang, Qianlong and Zhang, Miao and Shang, Yuzhang and Wu, Jianlong and Yan, Yan and Nie, Liqiang}, title = {DKDM: Data-Free Knowledge Distillation for Diffusion Models with Any Architecture}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2955-2965} }
SymDPO: Boosting In-Context Learning of Large Multimodal Models with Symbol Demonstration Direct Preference Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2025_CVPR, author = {Jia, Hongrui and Jiang, Chaoya and Xu, Haiyang and Ye, Wei and Dong, Mengfan and Yan, Ming and Zhang, Ji and Huang, Fei and Zhang, Shikun}, title = {SymDPO: Boosting In-Context Learning of Large Multimodal Models with Symbol Demonstration Direct Preference Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9361-9371} }
Stealthy Backdoor Attack in Self-Supervised Learning Vision Encoders for Large Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhaoyi and Zhang, Huan}, title = {Stealthy Backdoor Attack in Self-Supervised Learning Vision Encoders for Large Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25060-25070} }
Data-free Universal Adversarial Perturbation with Pseudo-semantic Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Chanhui and Song, Yeonghwan and Son, Jeany}, title = {Data-free Universal Adversarial Perturbation with Pseudo-semantic Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13907-13916} }
Debiasing Multimodal Large Language Models via Noise-Aware Preference Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zefeng and Tang, Hengzhu and Sheng, Jiawei and Zhang, Zhenyu and Ren, Yiming and Li, Zhenyang and Yin, Dawei and Ma, Duohe and Liu, Tingwen}, title = {Debiasing Multimodal Large Language Models via Noise-Aware Preference Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9423-9433} }
SAM2-LOVE: Segment Anything Model 2 in Language-aided Audio-Visual Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuji and Xu, Haoran and Liu, Yong and Li, Jiaze and Tang, Yansong}, title = {SAM2-LOVE: Segment Anything Model 2 in Language-aided Audio-Visual Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28932-28941} }
GIVEPose: Gradual Intra-class Variation Elimination for RGB-based Category-Level Object Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Ziqin and Wang, Gu and Zhang, Chenyangguang and Zhang, Ruida and Li, Xiu and Ji, Xiangyang}, title = {GIVEPose: Gradual Intra-class Variation Elimination for RGB-based Category-Level Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22055-22066} }
FRAME: Floor-aligned Representation for Avatar Motion from Egocentric Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Camiletto_2025_CVPR, author = {Camiletto, Andrea Boscolo and Wang, Jian and Alvarado, Eduardo and Dabral, Rishabh and Beeler, Thabo and Habermann, Marc and Theobalt, Christian}, title = {FRAME: Floor-aligned Representation for Avatar Motion from Egocentric Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17497-17507} }
Sketch Down the FLOPs: Towards Efficient Networks for Human Sketch-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sain_2025_CVPR, author = {Sain, Aneeshan and Maity, Subhajit and Chowdhury, Pinaki Nath and Koley, Shubhadeep and Bhunia, Ayan Kumar and Song, Yi-Zhe}, title = {Sketch Down the FLOPs: Towards Efficient Networks for Human Sketch}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28383-28393} }
Generalized Zero-Shot Classification via Semantics-Free Inter-Class Feature Generation-
[pdf]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Libiao and Nie, Dong and Pan, Junjun and Yan, Jing and Tang, Zhenyu}, title = {Generalized Zero-Shot Classification via Semantics-Free Inter-Class Feature Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20286-20295} }
Feat2GS: Probing Visual Foundation Models with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Yue and Chen, Xingyu and Chen, Anpei and Pons-Moll, Gerard and Xiu, Yuliang}, title = {Feat2GS: Probing Visual Foundation Models with Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6348-6361} }
Multi-Modal Aerial-Ground Cross-View Place Recognition with Neural ODEs-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Sijie and She, Rui and Kang, Qiyu and Li, Siqi and Li, Disheng and Geng, Tianyu and Yu, Shangshu and Tay, Wee Peng}, title = {Multi-Modal Aerial-Ground Cross-View Place Recognition with Neural ODEs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11717-11728} }
Rethinking Decoder Design: Improving Biomarker Segmentation Using Depth-to-Space Restoration and Residual Linear Attention-
[pdf]
[supp]
[bibtex]@InProceedings{Wazir_2025_CVPR, author = {Wazir, Saad and Kim, Daeyoung}, title = {Rethinking Decoder Design: Improving Biomarker Segmentation Using Depth-to-Space Restoration and Residual Linear Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30861-30871} }
MaDCoW: Marginal Distortion Correction for Wide-Angle Photography with Arbitrary Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Kevin and Huang, Jia-Bin and Echevarria, Jose and DiVerdi, Stephen and Hertzmann, Aaron}, title = {MaDCoW: Marginal Distortion Correction for Wide-Angle Photography with Arbitrary Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10923-10932} }
SynTab-LLaVA: Enhancing Multimodal Table Understanding with Decoupled Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Bangbang and Gao, Zuan and Wang, Zixiao and Zhang, Boqiang and Wang, Yuxin and Chen, Zhineng and Xie, Hongtao}, title = {SynTab-LLaVA: Enhancing Multimodal Table Understanding with Decoupled Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24796-24806} }
Edit Away and My Face Will not Stay: Personal Biometric Defense against Malicious Generative Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Hanhui and Zhang, Yihua and Bai, Ruizheng and Zhao, Yue and Liu, Sijia and Tu, Zhengzhong}, title = {Edit Away and My Face Will not Stay: Personal Biometric Defense against Malicious Generative Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23806-23816} }
Any6D: Model-free 6D Pose Estimation of Novel Objects-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Taeyeop and Wen, Bowen and Kang, Minjun and Kang, Gyuree and Kweon, In So and Yoon, Kuk-Jin}, title = {Any6D: Model-free 6D Pose Estimation of Novel Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11633-11643} }
Improving Accuracy and Calibration via Differentiated Deep Mutual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Han and Cui, Peng and Wang, Bingning and Chen, Weipeng and Zhang, Yupeng and Zhu, Jun and Hu, Xiaolin}, title = {Improving Accuracy and Calibration via Differentiated Deep Mutual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25812-25821} }
DrVideo: Document Retrieval Based Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Ziyu and Gou, Chenhui and Shi, Hengcan and Sun, Bin and Li, Shutao and Rezatofighi, Hamid and Cai, Jianfei}, title = {DrVideo: Document Retrieval Based Long Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18936-18946} }
Infighting in the Dark: Multi-Label Backdoor Attack in Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Ye and Zhao, Yanchao and Zhu, Chengcheng and Zhang, Jiale}, title = {Infighting in the Dark: Multi-Label Backdoor Attack in Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25770-25779} }
Buffer Anytime: Zero-Shot Video Depth and Normal from Image Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kuang_2025_CVPR, author = {Kuang, Zhengfei and Zhang, Tianyuan and Zhang, Kai and Tan, Hao and Bi, Sai and Hu, Yiwei and Xu, Zexiang and Hasan, Milos and Wetzstein, Gordon and Luan, Fujun}, title = {Buffer Anytime: Zero-Shot Video Depth and Normal from Image Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17660-17670} }
PSHuman: Photorealistic Single-image 3D Human Reconstruction using Cross-Scale Multiview Diffusion and Explicit Remeshing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Peng and Zheng, Wangguandong and Liu, Yuan and Yu, Tao and Li, Yangguang and Qi, Xingqun and Chi, Xiaowei and Xia, Siyu and Cao, Yan-Pei and Xue, Wei and Luo, Wenhan and Guo, Yike}, title = {PSHuman: Photorealistic Single-image 3D Human Reconstruction using Cross-Scale Multiview Diffusion and Explicit Remeshing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16008-16018} }
LSNet: See Large, Focus Small-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ao and Chen, Hui and Lin, Zijia and Han, Jungong and Ding, Guiguang}, title = {LSNet: See Large, Focus Small}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9718-9729} }
DynamicScaler: Seamless and Scalable Video Generation for Panoramic Scenes-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jinxiu and Lin, Shaoheng and Li, Yinxiao and Yang, Ming-Hsuan}, title = {DynamicScaler: Seamless and Scalable Video Generation for Panoramic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6144-6153} }
Tartan IMU: A Light Foundation Model for Inertial Positioning in Robotics-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shibo and Zhou, Sifan and Blanchard, Raphael and Qiu, Yuheng and Wang, Wenshan and Scherer, Sebastian}, title = {Tartan IMU: A Light Foundation Model for Inertial Positioning in Robotics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22520-22529} }
Event Ellipsometer: Event-based Mueller-Matrix Video Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maeda_2025_CVPR, author = {Maeda, Ryota and Moon, Yunseong and Baek, Seung-Hwan}, title = {Event Ellipsometer: Event-based Mueller-Matrix Video Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21804-21813} }
DocLayLLM: An Efficient Multi-modal Extension of Large Language Models for Text-rich Document Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_CVPR, author = {Liao, Wenhui and Wang, Jiapeng and Li, Hongliang and Wang, Chengyu and Huang, Jun and Jin, Lianwen}, title = {DocLayLLM: An Efficient Multi-modal Extension of Large Language Models for Text-rich Document Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4038-4049} }
EDEN: Enhanced Diffusion for High-quality Large-motion Video Frame Interpolation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zihao and Chen, Haoran and Zhao, Haoyu and Lu, Guansong and Fu, Yanwei and Xu, Hang and Wu, Zuxuan}, title = {EDEN: Enhanced Diffusion for High-quality Large-motion Video Frame Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2105-2115} }
Handling Spatial-Temporal Data Heterogeneity for Federated Continual Learning via Tail Anchor-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Hao and Yang, Xin and Zhang, Le and Gu, Hanlin and Li, Tianrui and Fan, Lixin and Yang, Qiang}, title = {Handling Spatial-Temporal Data Heterogeneity for Federated Continual Learning via Tail Anchor}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4874-4883} }
DeSiRe-GS: 4D Street Gaussians for Static-Dynamic Decomposition and Surface Reconstruction for Urban Driving Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Chensheng and Zhang, Chengwei and Wang, Yixiao and Xu, Chenfeng and Xie, Yichen and Zheng, Wenzhao and Keutzer, Kurt and Tomizuka, Masayoshi and Zhan, Wei}, title = {DeSiRe-GS: 4D Street Gaussians for Static-Dynamic Decomposition and Surface Reconstruction for Urban Driving Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6782-6791} }
End-to-End HOI Reconstruction Transformer with Graph-based Encoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhenrong and Zheng, Qi and Ma, Sihan and Ye, Maosheng and Zhan, Yibing and Li, Dongjiang}, title = {End-to-End HOI Reconstruction Transformer with Graph-based Encoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27706-27715} }
REWIND: Real-Time Egocentric Whole-Body Motion Diffusion with Exemplar-Based Identity Conditioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Jihyun and Xu, Weipeng and Richard, Alexander and Wei, Shih-En and Saito, Shunsuke and Bai, Shaojie and Wang, Te-Li and Sung, Minhyuk and Kim, Tae-Kyun and Saragih, Jason}, title = {REWIND: Real-Time Egocentric Whole-Body Motion Diffusion with Exemplar-Based Identity Conditioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7095-7104} }
Hiding Images in Diffusion Models by Editing Learned Score Functions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Haoyu and Yang, Yunqiao and Zhong, Nan and Ma, Kede}, title = {Hiding Images in Diffusion Models by Editing Learned Score Functions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18663-18673} }
Disco4D: Disentangled 4D Human Generation and Animation from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pang_2025_CVPR, author = {Pang, Hui En and Liu, Shuai and Cai, Zhongang and Yang, Lei and Zhang, Tianwei and Liu, Ziwei}, title = {Disco4D: Disentangled 4D Human Generation and Animation from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26331-26344} }
DoraCycle: Domain-Oriented Adaptation of Unified Generative Model in Multimodal Cycles-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Rui and Mao, Weijia and Shou, Mike Zheng}, title = {DoraCycle: Domain-Oriented Adaptation of Unified Generative Model in Multimodal Cycles}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2835-2846} }
WeatherGen: A Unified Diverse Weather Generator for LiDAR Point Clouds via Spider Mamba Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Yang and Zhu, Yun and Zhang, Kaihua and Qian, Jianjun and Xie, Jin and Yang, Jian}, title = {WeatherGen: A Unified Diverse Weather Generator for LiDAR Point Clouds via Spider Mamba Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17019-17028} }
MUST: The First Dataset and Unified Framework for Multispectral UAV Single Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Haolin and Xu, Tingfa and Li, Tianhao and Chen, Zhenxiang and Feng, Tao and Li, Jianan}, title = {MUST: The First Dataset and Unified Framework for Multispectral UAV Single Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16882-16891} }
IDOL: Instant Photorealistic 3D Human Creation from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2025_CVPR, author = {Zhuang, Yiyu and Lv, Jiaxi and Wen, Hao and Shuai, Qing and Zeng, Ailing and Zhu, Hao and Chen, Shifeng and Yang, Yujiu and Cao, Xun and Liu, Wei}, title = {IDOL: Instant Photorealistic 3D Human Creation from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26308-26319} }
Tightening Robustness Verification of MaxPool-based Neural Networks via Minimizing the Over-Approximation Zone-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Yuan and Chen, Yuchen and Ma, Shiqing and Fang, Chunrong and Bai, Tongtong and Gu, Mingzheng and Cheng, Yuxin and Chen, Yanwei and Chen, Zhenyu}, title = {Tightening Robustness Verification of MaxPool-based Neural Networks via Minimizing the Over-Approximation Zone}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20695-20705} }
SketchVideo: Sketch-based Video Generation and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Feng-Lin and Fu, Hongbo and Wang, Xintao and Ye, Weicai and Wan, Pengfei and Zhang, Di and Gao, Lin}, title = {SketchVideo: Sketch-based Video Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23379-23390} }
PhysicsGen: Can Generative Models Learn from Images to Predict Complex Physical Relations?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Spitznagel_2025_CVPR, author = {Spitznagel, Martin and Vaillant, Jan and Keuper, Janis}, title = {PhysicsGen: Can Generative Models Learn from Images to Predict Complex Physical Relations?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11125-11134} }
Taste More, Taste Better: Diverse Data and Strong Model Boost Semi-Supervised Crowd Counting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Maochen and Li, Zekun and Zhang, Jian and Qi, Lei and Shi, Yinghuan}, title = {Taste More, Taste Better: Diverse Data and Strong Model Boost Semi-Supervised Crowd Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24440-24451} }
Gaussian Splashing: Unified Particles for Versatile Motion Synthesis and Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Yutao and Feng, Xiang and Shang, Yintong and Jiang, Ying and Yu, Chang and Zong, Zeshun and Shao, Tianjia and Wu, Hongzhi and Zhou, Kun and Jiang, Chenfanfu and Yang, Yin}, title = {Gaussian Splashing: Unified Particles for Versatile Motion Synthesis and Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {518-529} }
Improve Representation for Imbalanced Regression through Geometric Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Zijian and Wu, Yilei and Chen, Chongyao and Zou, Yingtian and Zhang, Yichi and Zhou, Juan Helen}, title = {Improve Representation for Imbalanced Regression through Geometric Constraints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5082-5091} }
AnyDressing: Customizable Multi-Garment Virtual Dressing via Latent Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xinghui and Sun, Qichao and Zhang, Pengze and Ye, Fulong and Liao, Zhichao and Feng, Wanquan and Zhao, Songtao and He, Qian}, title = {AnyDressing: Customizable Multi-Garment Virtual Dressing via Latent Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23723-23733} }
Spectral Informed Mamba for Robust Point Cloud Processing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bahri_2025_CVPR, author = {Bahri, Ali and Yazdanpanah, Moslem and Noori, Mehrdad and Dastani, Sahar and Cheraghalikhani, Milad and Hakim, Gustavo Adolfo Vargas and Osowiechi, David and Beizaee, Farzad and Ben Ayed, Ismail and Desrosiers, Christian}, title = {Spectral Informed Mamba for Robust Point Cloud Processing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11799-11809} }
Latent Space Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Souza_2025_CVPR, author = {Souza, Matheus and Zheng, Yidan and Kang, Kaizhang and Mishra, Yogeshwar Nath and Fu, Qiang and Heidrich, Wolfgang}, title = {Latent Space Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28295-28305} }
Balanced Direction from Multifarious Choices: Arithmetic Meta-Learning for Domain Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xiran and Zhang, Jian and Qi, Lei and Shi, Yinghuan}, title = {Balanced Direction from Multifarious Choices: Arithmetic Meta-Learning for Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30577-30587} }
Anatomical Consistency and Adaptive Prior-informed Transformation for Multi-contrast MR Image Synthesis via Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Shin_2025_CVPR, author = {Shin, Yejee and Lee, Yeeun and Jang, Hanbyol and Son, Geonhui and Kim, Hyeongyu and Hwang, Dosik}, title = {Anatomical Consistency and Adaptive Prior-informed Transformation for Multi-contrast MR Image Synthesis via Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30918-30927} }
BlobGEN-Vid: Compositional Text-to-Video Generation with Blob Video Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Weixi and Liu, Chao and Liu, Sifei and Wang, William Yang and Vahdat, Arash and Nie, Weili}, title = {BlobGEN-Vid: Compositional Text-to-Video Generation with Blob Video Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12989-12998} }
D2SP: Dynamic Dual-Stage Purification Framework for Dual Noise Mitigation in Vision-based Affective Recognition.-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Haoran and Mai, Xinji and Tao, Zeng and Tong, Xuan and Lin, Junxiong and Wang, Yan and Yu, Jiawen and Yan, Shaoqi and Zhou, Ziheng and Zhang, Wenqiang}, title = {D2SP: Dynamic Dual-Stage Purification Framework for Dual Noise Mitigation in Vision-based Affective Recognition.}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19218-19229} }
PartRM: Modeling Part-Level Dynamics with Large Cross-State Reconstruction Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Mingju and Pan, Yike and Gao, Huan-ang and Zhang, Zongzheng and Li, Wenyi and Dong, Hao and Tang, Hao and Yi, Li and Zhao, Hao}, title = {PartRM: Modeling Part-Level Dynamics with Large Cross-State Reconstruction Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7004-7014} }
LaVin-DiT: Large Vision Diffusion Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhaoqing and Xia, Xiaobo and Chen, Runnan and Yu, Dongdong and Wang, Changhu and Gong, Mingming and Liu, Tongliang}, title = {LaVin-DiT: Large Vision Diffusion Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20060-20070} }
DiffFNO: Diffusion Fourier Neural Operator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xiaoyi and Tang, Hao}, title = {DiffFNO: Diffusion Fourier Neural Operator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {150-160} }
CAP-Net: A Unified Network for 6D Pose and Size Estimation of Categorical Articulated Parts from a Single RGB-D Image-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Jingshun and Lin, Haitao and Wang, Tianyu and Fu, Yanwei and Xue, Xiangyang and Zhu, Yi}, title = {CAP-Net: A Unified Network for 6D Pose and Size Estimation of Categorical Articulated Parts from a Single RGB-D Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11654-11664} }
SeCap: Self-Calibrating and Adaptive Prompts for Cross-view Person Re-Identification in Aerial-Ground Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shining and Wang, Yunlong and Wu, Ruiqi and Jiao, Bingliang and Wang, Wenxuan and Wang, Peng}, title = {SeCap: Self-Calibrating and Adaptive Prompts for Cross-view Person Re-Identification in Aerial-Ground Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22119-22128} }
Zero-Shot Styled Text Image Generation, but Make It Autoregressive-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pippi_2025_CVPR, author = {Pippi, Vittorio and Quattrini, Fabio and Cascianelli, Silvia and Tonioni, Alessio and Cucchiara, Rita}, title = {Zero-Shot Styled Text Image Generation, but Make It Autoregressive}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7910-7919} }
Don't Shake the Wheel: Momentum-Aware Planning in End-to-End Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Ziying and Jia, Caiyan and Liu, Lin and Pan, Hongyu and Zhang, Yongchang and Wang, Junming and Zhang, Xingyu and Xu, Shaoqing and Yang, Lei and Luo, Yadan}, title = {Don't Shake the Wheel: Momentum-Aware Planning in End-to-End Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22432-22441} }
Leveraging Perturbation Robustness to Enhance Out-of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Wenxi and Yeh, Raymond A. and Mou, Shaoshuai and Gu, Yan}, title = {Leveraging Perturbation Robustness to Enhance Out-of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4724-4733} }
Neural Motion Simulator Pushing the Limit of World Models in Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hao_2025_CVPR, author = {Hao, Chenjie and Lu, Weyl and Xu, Yifan and Chen, Yubei}, title = {Neural Motion Simulator Pushing the Limit of World Models in Reinforcement Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27608-27617} }
Aesthetic Post-Training Diffusion Models from Generic Preferences with Step-by-step Preference Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Zhanhao and Yuan, Yuhui and Gu, Shuyang and Chen, Bohan and Hang, Tiankai and Cheng, Mingxi and Li, Ji and Zheng, Liang}, title = {Aesthetic Post-Training Diffusion Models from Generic Preferences with Step-by-step Preference Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13199-13208} }
Adversarial Diffusion Compression for Real-World Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Bin and Li, Gehui and Wu, Rongyuan and Zhang, Xindong and Chen, Jie and Zhang, Jian and Zhang, Lei}, title = {Adversarial Diffusion Compression for Real-World Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28208-28220} }
DiSciPLE: Learning Interpretable Programs for Scientific Visual Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mall_2025_CVPR, author = {Mall, Utkarsh and Phoo, Cheng Perng and Chiquier, Mia and Hariharan, Bharath and Bala, Kavita and Vondrick, Carl}, title = {DiSciPLE: Learning Interpretable Programs for Scientific Visual Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29258-29267} }
SOLAMI: Social Vision-Language-Action Modeling for Immersive Interaction with 3D Autonomous Characters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Jianping and Xiao, Weiye and Lin, Zhengyu and Zhang, Huaizhong and Ren, Tianxiang and Gao, Yang and Lin, Zhiqian and Cai, Zhongang and Yang, Lei and Liu, Ziwei}, title = {SOLAMI: Social Vision-Language-Action Modeling for Immersive Interaction with 3D Autonomous Characters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26887-26898} }
EntropyMark: Towards More Harmless Backdoor Watermark via Entropy-based Constraint for Open-source Dataset Copyright Protection-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Ming and Wang, Rui and Zhu, Zixuan and Jing, Lihua and Guo, Yuanfang}, title = {EntropyMark: Towards More Harmless Backdoor Watermark via Entropy-based Constraint for Open-source Dataset Copyright Protection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30692-30701} }
Adaptive Markup Language Generation for Contextually-Grounded Visual Document Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Han and Xie, Yina and Tan, Guanxin and Chen, Yinghao and Hu, Rui and Wang, Ke and Zhou, Aojun and Li, Hao and Shao, Hao and Lu, Xudong and Gao, Peng and Wen, Yafei and Chen, Xiaoxin and Ren, Shuai and Li, Hongsheng}, title = {Adaptive Markup Language Generation for Contextually-Grounded Visual Document Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29558-29568} }
BARD-GS: Blur-Aware Reconstruction of Dynamic Scenes via Gaussian Splatting-
[pdf]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Yiren and Zhou, Yunlai and Liu, Disheng and Liang, Tuo and Yin, Yu}, title = {BARD-GS: Blur-Aware Reconstruction of Dynamic Scenes via Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16532-16542} }
SALAD: Skeleton-aware Latent Diffusion for Text-driven Motion Generation and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2025_CVPR, author = {Hong, Seokhyeon and Kim, Chaelin and Yoon, Serin and Nam, Junghyun and Cha, Sihun and Noh, Junyong}, title = {SALAD: Skeleton-aware Latent Diffusion for Text-driven Motion Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7158-7168} }
Towards Universal AI-Generated Image Detection by Variational Information Bottleneck Network-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Haifeng and He, Qinghui and Bi, Xiuli and Li, Weisheng and Liu, Bo and Xiao, Bin}, title = {Towards Universal AI-Generated Image Detection by Variational Information Bottleneck Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23828-23837} }
HSI: A Holistic Style Injector for Arbitrary Style Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shuhao and Kang, Hui and Liu, Yang and Mei, Fang and Li, Hongjuan}, title = {HSI: A Holistic Style Injector for Arbitrary Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23433-23442} }
LookingGlass: Generative Anamorphoses via Laplacian Pyramid Warping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2025_CVPR, author = {Chang, Pascal and Sancho, Sergio and Tang, Jingwei and Gross, Markus and Azevedo, Vinicius}, title = {LookingGlass: Generative Anamorphoses via Laplacian Pyramid Warping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24-33} }
V2V3D: View-to-View Denoised 3D Reconstruction for Light Field Microscopy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jiayin and Fu, Zhenqi and Yu, Tao and Qiao, Hui}, title = {V2V3D: View-to-View Denoised 3D Reconstruction for Light Field Microscopy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26451-26461} }
DiN: Diffusion Model for Robust Medical VQA with Semantic Noisy Labels-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Erjian and Zhao, Zhen and Wang, Zicheng and Chen, Tong and Liu, Yunyi and Zhou, Luping}, title = {DiN: Diffusion Model for Robust Medical VQA with Semantic Noisy Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14337-14346} }
Splatter-360: Generalizable 360 Gaussian Splatting for Wide-baseline Panoramic Images-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zheng and Wu, Chenming and Shen, Zhelun and Zhao, Chen and Ye, Weicai and Feng, Haocheng and Ding, Errui and Zhang, Song-Hai}, title = {Splatter-360: Generalizable 360 Gaussian Splatting for Wide-baseline Panoramic Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21590-21599} }
ShowMak3r: Compositional TV Show Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Sangmin and Do, Seunguk and Park, Jaesik}, title = {ShowMak3r: Compositional TV Show Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {864-874} }
CADRef: Robust Out-of-Distribution Detection via Class-Aware Decoupled Relative Feature Leveraging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ling_2025_CVPR, author = {Ling, Zhiwei and Chang, Yachen and Zhao, Hailiang and Zhao, Xinkui and Chow, Kingsum and Deng, Shuiguang}, title = {CADRef: Robust Out-of-Distribution Detection via Class-Aware Decoupled Relative Feature Leveraging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4968-4977} }
S^3-Face: SSS-Compliant Facial Reflectance Estimation via Diffusion Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Xingyu and Deng, Jiankang and Cheng, Yuhao and Zhu, Wenhan and Yan, Yichao and Yang, Xiaokang and Zafeiriou, Stefanos and Ma, Chao}, title = {S{\textasciicircum}3-Face: SSS-Compliant Facial Reflectance Estimation via Diffusion Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16051-16060} }
FSBench: A Figure Skating Benchmark for Advancing Artistic Sports Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Rong and Liu, Xin and Hu, Zhuozhao and Xing, Bohao and Xia, Baiqiang and Yu, Zitong and K\"alvi\"ainen, Heikki}, title = {FSBench: A Figure Skating Benchmark for Advancing Artistic Sports Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13595-13605} }
Keep the Balance: A Parameter-Efficient Symmetrical Framework for RGB+X Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Jiaxin and Su, Jingze and Li, Qi and Yang, Wenjie and Wang, Shu and Zhao, Tiesong and He, Shengfeng and Liu, Wenxi}, title = {Keep the Balance: A Parameter-Efficient Symmetrical Framework for RGB+X Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10587-10598} }
VideoDirector: Precise Video Editing via Text-to-Video Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yukun and Wang, Longguang and Ma, Zhiyuan and Hu, Qibin and Xu, Kai and Guo, Yulan}, title = {VideoDirector: Precise Video Editing via Text-to-Video Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2589-2598} }
LLM-driven Multimodal and Multi-Identity Listening Head Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Peiwen and Zhong, Weizhi and Qin, Yipeng and Ren, Xiaohang and Wang, Baoyuan and Li, Guanbin}, title = {LLM-driven Multimodal and Multi-Identity Listening Head Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10656-10666} }
Towards Understanding How Knowledge Evolves in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Sudong and Zhang, Yunjian and Zhu, Yao and Li, Jianing and Wang, Zizhe and Liu, Yanwei and Ji, Xiangyang}, title = {Towards Understanding How Knowledge Evolves in Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29858-29868} }
A Unified, Resilient, and Explainable Adversarial Patch Detector-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2025_CVPR, author = {Kumar, Vishesh and Agarwal, Akshay}, title = {A Unified, Resilient, and Explainable Adversarial Patch Detector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30387-30397} }
VISTA: Enhancing Long-Duration and High-Resolution Video Understanding by Video Spatiotemporal Augmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Weiming and Yang, Huan and Min, Jie and Wei, Cong and Chen, Wenhu}, title = {VISTA: Enhancing Long-Duration and High-Resolution Video Understanding by Video Spatiotemporal Augmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3804-3814} }
Structured 3D Latents for Scalable and Versatile 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2025_CVPR, author = {Xiang, Jianfeng and Lv, Zelong and Xu, Sicheng and Deng, Yu and Wang, Ruicheng and Zhang, Bowen and Chen, Dong and Tong, Xin and Yang, Jiaolong}, title = {Structured 3D Latents for Scalable and Versatile 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21469-21480} }
GA3CE: Unconstrained 3D Gaze Estimation with Gaze-Aware 3D Context Encoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kawana_2025_CVPR, author = {Kawana, Yuki and Shiba, Shintaro and Kong, Quan and Kobori, Norimasa}, title = {GA3CE: Unconstrained 3D Gaze Estimation with Gaze-Aware 3D Context Encoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3081-3090} }
Self-Cross Diffusion Guidance for Text-to-Image Synthesis of Similar Subjects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2025_CVPR, author = {Qiu, Weimin and Wang, Jieke and Tang, Meng}, title = {Self-Cross Diffusion Guidance for Text-to-Image Synthesis of Similar Subjects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23528-23538} }
RigGS: Rigging of 3D Gaussians for Modeling Articulated Objects in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_CVPR, author = {Yao, Yuxin and Deng, Zhi and Hou, Junhui}, title = {RigGS: Rigging of 3D Gaussians for Modeling Articulated Objects in Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5592-5601} }
Noise Modeling in One Hour: Minimizing Preparation Efforts for Self-supervised Low-Light RAW Image Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Feiran and Jiang, Haiyang and Iso, Daisuke}, title = {Noise Modeling in One Hour: Minimizing Preparation Efforts for Self-supervised Low-Light RAW Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5699-5708} }
Adv-CPG: A Customized Portrait Generation Framework with Facial Adversarial Attacks-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Junying and Zhang, Hongyuan and Yuan, Yuan}, title = {Adv-CPG: A Customized Portrait Generation Framework with Facial Adversarial Attacks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21001-21010} }
Fish-Vista: A Multi-Purpose Dataset for Understanding & Identification of Traits from Images-
[pdf]
[supp]
[bibtex]@InProceedings{Mehrab_2025_CVPR, author = {Mehrab, Kazi Sajeed and Maruf, M. and Daw, Arka and Neog, Abhilash and Manogaran, Harish Babu and Khurana, Mridul and Feng, Zhenyang and Altintas, Bahadir and Bakis, Yasin and Campolongo, Elizabeth G and Thompson, Matthew J and Wang, Xiaojun and Lapp, Hilmar and Berger-Wolf, Tanya and Mabee, Paula and Bart, Henry and Chao, Wei-Lun and Dahdul, Wasila M and Karpatne, Anuj}, title = {Fish-Vista: A Multi-Purpose Dataset for Understanding \& Identification of Traits from Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24275-24285} }
High Dynamic Range Video Compression: A Large-Scale Benchmark Dataset and A Learned Bit-depth Scalable Compression Algorithm-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Zhaoyi and Wang, Feifeng and Wang, Shiwei and Zhou, Zihao and Zhu, Yao and Shen, Liquan}, title = {High Dynamic Range Video Compression: A Large-Scale Benchmark Dataset and A Learned Bit-depth Scalable Compression Algorithm}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7320-7330} }
OffsetOPT: Explicit Surface Reconstruction without Normals-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2025_CVPR, author = {Lei, Huan}, title = {OffsetOPT: Explicit Surface Reconstruction without Normals}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11729-11738} }
PCM : Picard Consistency Model for Fast Parallel Sampling of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{So_2025_CVPR, author = {So, Junhyuk and Shin, Jiwoong and Jang, Chaeyeon and Park, Eunhyeok}, title = {PCM : Picard Consistency Model for Fast Parallel Sampling of Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23313-23322} }
CoMapGS: Covisibility Map-based Gaussian Splatting for Sparse Novel View Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Youngkyoon and P\'erez-Pellitero, Eduardo}, title = {CoMapGS: Covisibility Map-based Gaussian Splatting for Sparse Novel View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26779-26788} }
Any-Resolution AI-Generated Image Detection by Spectral Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karageorgiou_2025_CVPR, author = {Karageorgiou, Dimitrios and Papadopoulos, Symeon and Kompatsiaris, Ioannis and Gavves, Efstratios}, title = {Any-Resolution AI-Generated Image Detection by Spectral Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18706-18717} }
DivPrune: Diversity-based Visual Token Pruning for Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alvar_2025_CVPR, author = {Alvar, Saeed Ranjbar and Singh, Gursimran and Akbari, Mohammad and Zhang, Yong}, title = {DivPrune: Diversity-based Visual Token Pruning for Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9392-9401} }
Training Data Provenance Verification: Did Your Model Use Synthetic Data from My Generative Model for Training?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Yuechen and Song, Jie and Wang, Huiqiong and Song, Mingli}, title = {Training Data Provenance Verification: Did Your Model Use Synthetic Data from My Generative Model for Training?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23817-23827} }
3D-AVS: LiDAR-based 3D Auto-Vocabulary Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Weijie and \"Ulger, Osman and Nejadasl, Fatemeh Karimi and Gevers, Theo and Oswald, Martin R.}, title = {3D-AVS: LiDAR-based 3D Auto-Vocabulary Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8910-8920} }
STOP: Integrated Spatial-Temporal Dynamic Prompting for Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zichen and Xu, Kunlun and Su, Bing and Zou, Xu and Peng, Yuxin and Zhou, Jiahuan}, title = {STOP: Integrated Spatial-Temporal Dynamic Prompting for Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13776-13786} }
TimeTracker: Event-based Continuous Point Tracking for Video Frame Interpolation with Non-linear Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Haoyue and Xu, Jinghan and Chang, Yi and Zhou, Hanyu and Zhao, Haozhi and Wang, Lin and Yan, Luxin}, title = {TimeTracker: Event-based Continuous Point Tracking for Video Frame Interpolation with Non-linear Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17649-17659} }
Improving the Training of Data-Efficient GANs via Quality Aware Dynamic Discriminator Rejection Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhaoyu and Hua, Yang and Sun, Guanxiong and Wang, Hui and McLoone, Se\'an}, title = {Improving the Training of Data-Efficient GANs via Quality Aware Dynamic Discriminator Rejection Sampling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30682-30691} }
Shading Meets Motion: Self-supervised Indoor 3D Reconstruction Via Simultaneous Shape-from-Shading and Structure-from-Motion-
[pdf]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Guoyu}, title = {Shading Meets Motion: Self-supervised Indoor 3D Reconstruction Via Simultaneous Shape-from-Shading and Structure-from-Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16508-16519} }
Believing is Seeing: Unobserved Object Detection using Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bhattacharjee_2025_CVPR, author = {Bhattacharjee, Subhransu S. and Campbell, Dylan and Shome, Rahul}, title = {Believing is Seeing: Unobserved Object Detection using Generative Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19366-19377} }
MotionStone: Decoupled Motion Intensity Modulation with Diffusion Transformer for Image-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Shuwei and Gong, Biao and Chen, Xi and Zheng, Dandan and Tan, Shuai and Yang, Zizheng and Li, Yuyuan and He, Jingwen and Zheng, Kecheng and Chen, Jingdong and Yang, Ming and Zheng, Yinqiang}, title = {MotionStone: Decoupled Motion Intensity Modulation with Diffusion Transformer for Image-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22864-22874} }
NLPrompt: Noise-Label Prompt Learning for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2025_CVPR, author = {Pan, Bikang and Li, Qun and Tang, Xiaoying and Huang, Wei and Fang, Zhen and Liu, Feng and Wang, Jingya and Yu, Jingyi and Shi, Ye}, title = {NLPrompt: Noise-Label Prompt Learning for Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19963-19973} }
MEGA: Masked Generative Autoencoder for Human Mesh Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fiche_2025_CVPR, author = {Fiche, Gu\'enol\'e and Leglaive, Simon and Alameda-Pineda, Xavier and Moreno-Noguer, Francesc}, title = {MEGA: Masked Generative Autoencoder for Human Mesh Recovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5366-5378} }
PBR-NeRF: Inverse Rendering with Physics-Based Neural Fields-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Sean and Basu, Shamik and Broedermann, Tim and Van Gool, Luc and Sakaridis, Christos}, title = {PBR-NeRF: Inverse Rendering with Physics-Based Neural Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10974-10984} }
Disentangling Safe and Unsafe Image Corruptions via Anisotropy and Locality-
[pdf]
[supp]
[bibtex]@InProceedings{Muthukumar_2025_CVPR, author = {Muthukumar, Ramchandran and Pal, Ambar and Sulam, Jeremias and Vidal, Rene}, title = {Disentangling Safe and Unsafe Image Corruptions via Anisotropy and Locality}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9954-9963} }
Prometheus: 3D-Aware Latent Diffusion Models for Feed-Forward Text-to-3D Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Yuanbo and Shao, Jiahao and Li, Xinyang and Shen, Yujun and Geiger, Andreas and Liao, Yiyi}, title = {Prometheus: 3D-Aware Latent Diffusion Models for Feed-Forward Text-to-3D Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2857-2869} }
No Pains, More Gains: Recycling Sub-Salient Patches for Efficient High-Resolution Image Recognition-
[pdf]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Rong and Liu, Xin and Liu, Xingyu and Liu, Jiaxuan and Shi, Jinglei and Lin, Liang and Yang, Jufeng}, title = {No Pains, More Gains: Recycling Sub-Salient Patches for Efficient High-Resolution Image Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14965-14975} }
SphereUFormer: A U-Shaped Transformer for Spherical 360 Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Benny_2025_CVPR, author = {Benny, Yaniv and Wolf, Lior}, title = {SphereUFormer: A U-Shaped Transformer for Spherical 360 Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {940-950} }
Advancing Generalizable Tumor Segmentation with Anomaly-Aware Open-Vocabulary Attention Maps and Frozen Foundation Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yankai and Zhang, Peng and Yang, Donglin and Tian, Yuan and Lin, Hai and Wang, Xiaosong}, title = {Advancing Generalizable Tumor Segmentation with Anomaly-Aware Open-Vocabulary Attention Maps and Frozen Foundation Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25971-25981} }
Towards Generalizable Scene Change Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Jae-Woo and Kim, Ue-Hwan}, title = {Towards Generalizable Scene Change Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24463-24473} }
Beyond Clean Training Data: A Versatile and Model-Agnostic Framework for Out-of-Distribution Detection with Contaminated Training Data-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yuchuan and Kang, Jae-Mo and Kim, Il-Min}, title = {Beyond Clean Training Data: A Versatile and Model-Agnostic Framework for Out-of-Distribution Detection with Contaminated Training Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10183-10192} }
Incomplete Multi-modal Brain Tumor Segmentation via Learnable Sorting State Space Model-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zheyu and Lu, Yayuan and Ma, Feipeng and Zhang, Yueyi and Yue, Huanjing and Sun, Xiaoyan}, title = {Incomplete Multi-modal Brain Tumor Segmentation via Learnable Sorting State Space Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25982-25992} }
FedAWA: Adaptive Optimization of Aggregation Weights in Federated Learning Using Client Vectors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Changlong and Zhao, He and Zhang, Bingjie and Zhou, Mingyuan and Guo, Dandan and Chang, Yi}, title = {FedAWA: Adaptive Optimization of Aggregation Weights in Federated Learning Using Client Vectors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30651-30660} }
FreeUV: Ground-Truth-Free Realistic Facial UV Texture Recovery via Cross-Assembly Inference Strategy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Xingchao and Taketomi, Takafumi and Endo, Yuki and Kanamori, Yoshihiro}, title = {FreeUV: Ground-Truth-Free Realistic Facial UV Texture Recovery via Cross-Assembly Inference Strategy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {326-337} }
HarmonySet: A Comprehensive Dataset for Understanding Video-Music Semantic Alignment and Temporal Synchronization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zitang and Mei, Ke and Lu, Yu and Wang, Tianyi and Rao, Fengyun}, title = {HarmonySet: A Comprehensive Dataset for Understanding Video-Music Semantic Alignment and Temporal Synchronization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3152-3162} }
Rethinking Diffusion for Text-Driven Human Motion Generation: Redundant Representations, Evaluation, and Masked Autoregression-
[pdf]
[supp]
[bibtex]@InProceedings{Meng_2025_CVPR, author = {Meng, Zichong and Xie, Yiming and Peng, Xiaogang and Han, Zeyu and Jiang, Huaizu}, title = {Rethinking Diffusion for Text-Driven Human Motion Generation: Redundant Representations, Evaluation, and Masked Autoregression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27859-27871} }
StyleMaster: Stylize Your Video with Artistic Generation and Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Zixuan and Huang, Huijuan and Wang, Xintao and Wan, Pengfei and Zhang, Di and Luo, Wenhan}, title = {StyleMaster: Stylize Your Video with Artistic Generation and Translation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2630-2640} }
Unsupervised Continual Domain Shift Learning with Multi-Prototype Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Haopeng and Zhang, Yingwei and Xu, Lumin and Jin, Sheng and Luo, Ping and Qian, Chen and Liu, Wentao and Chen, Yiqiang}, title = {Unsupervised Continual Domain Shift Learning with Multi-Prototype Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10131-10141} }
OmniGuard: Hybrid Manipulation Localization via Augmented Versatile Deep Image Watermarking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xuanyu and Tang, Zecheng and Xu, Zhipei and Li, Runyi and Xu, Youmin and Chen, Bin and Gao, Feng and Zhang, Jian}, title = {OmniGuard: Hybrid Manipulation Localization via Augmented Versatile Deep Image Watermarking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3008-3018} }
Open-Canopy: Towards Very High Resolution Forest Monitoring-
[pdf]
[supp]
[bibtex]@InProceedings{Fogel_2025_CVPR, author = {Fogel, Fajwel and Perron, Yohann and Besic, Nikola and Saint-Andr\'e, Laurent and Pellissier-Tanon, Agn\`es and Schwartz, Martin and Boudras, Thomas and Fayad, Ibrahim and d'Aspremont, Alexandre and Landrieu, Loic and Ciais, Philippe}, title = {Open-Canopy: Towards Very High Resolution Forest Monitoring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1395-1406} }
ClearSight: Visual Signal Enhancement for Object Hallucination Mitigation in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Hao and Si, Guangzong and Wang, Zilei}, title = {ClearSight: Visual Signal Enhancement for Object Hallucination Mitigation in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14625-14634} }
Stretching Each Dollar: Diffusion Training from Scratch on a Micro-Budget-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sehwag_2025_CVPR, author = {Sehwag, Vikash and Kong, Xianghao and Li, Jingtao and Spranger, Michael and Lyu, Lingjuan}, title = {Stretching Each Dollar: Diffusion Training from Scratch on a Micro-Budget}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28596-28608} }
Guiding Human-Object Interactions with Rich Geometry and Relations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2025_CVPR, author = {Xue, Mengqing and Liu, Yifei and Guo, Ling and Huang, Shaoli and Ding, Changxing}, title = {Guiding Human-Object Interactions with Rich Geometry and Relations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22714-22723} }
TacoDepth: Towards Efficient Radar-Camera Depth Estimation with One-stage Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yiran and Li, Jiaqi and Hong, Chaoyi and Li, Ruibo and Sun, Liusheng and Song, Xiao and Wang, Zhe and Cao, Zhiguo and Lin, Guosheng}, title = {TacoDepth: Towards Efficient Radar-Camera Depth Estimation with One-stage Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10523-10533} }
Physical Plausibility-aware Trajectory Prediction via Locomotion Embodiment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Taketsugu_2025_CVPR, author = {Taketsugu, Hiromu and Oba, Takeru and Maeda, Takahiro and Nobuhara, Shohei and Ukita, Norimichi}, title = {Physical Plausibility-aware Trajectory Prediction via Locomotion Embodiment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12324-12334} }
CADDreamer: CAD Object Generation from Single-view Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yuan and Lin, Cheng and Liu, Yuan and Long, Xiaoxiao and Zhang, Chenxu and Wang, Ningna and Li, Xin and Wang, Wenping and Guo, Xiaohu}, title = {CADDreamer: CAD Object Generation from Single-view Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21448-21457} }
Vision-Language Model IP Protection via Prompt-based Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Lianyu and Wang, Meng and Fu, Huazhu and Zhang, Daoqiang}, title = {Vision-Language Model IP Protection via Prompt-based Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9497-9506} }
Where's the Liability in the Generative Era? Recovery-based Black-Box Detection of AI-Generated Content-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2025_CVPR, author = {Bai, Haoyue and Sun, Yiyou and Cheng, Wei and Chen, Haifeng}, title = {Where's the Liability in the Generative Era? Recovery-based Black-Box Detection of AI-Generated Content}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28821-28830} }
Kiss3DGen: Repurposing Image Diffusion Models for 3D Asset Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Jiantao and Yang, Xin and Chen, Meixi and Xu, Yingjie and Yan, Dongyu and Wu, Leyi and Xu, Xinli and Xu, Lie and Zhang, Shunsi and Chen, Ying-Cong}, title = {Kiss3DGen: Repurposing Image Diffusion Models for 3D Asset Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5870-5880} }
DiTASK: Multi-Task Fine-Tuning with Diffeomorphic Transformations-
[pdf]
[supp]
[bibtex]@InProceedings{Mantri_2025_CVPR, author = {Mantri, Krishna Sri Ipsit and Sch\"onlieb, Carola-Bibiane and Ribeiro, Bruno and Baskin, Chaim and Eliasof, Moshe}, title = {DiTASK: Multi-Task Fine-Tuning with Diffeomorphic Transformations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25218-25229} }
OW-OVD: Unified Open World and Open Vocabulary Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Xi_2025_CVPR, author = {Xi, Xing and Huang, Yangyang and Luo, Ronghua and Qiu, Yu}, title = {OW-OVD: Unified Open World and Open Vocabulary Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25454-25464} }
Improving Diffusion Inverse Problem Solving with Decoupled Noise Annealing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Bingliang and Chu, Wenda and Berner, Julius and Meng, Chenlin and Anandkumar, Anima and Song, Yang}, title = {Improving Diffusion Inverse Problem Solving with Decoupled Noise Annealing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20895-20905} }
AvatarArtist: Open-Domain 4D Avatarization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Hongyu and Wang, Xuan and Wan, Ziyu and Ma, Yue and Chen, Jingye and Fan, Yanbo and Shen, Yujun and Song, Yibing and Chen, Qifeng}, title = {AvatarArtist: Open-Domain 4D Avatarization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10758-10769} }
DesignDiffusion: High-Quality Text-to-Design Image Generation with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhendong and Bao, Jianmin and Gu, Shuyang and Chen, Dong and Zhou, Wengang and Li, Houqiang}, title = {DesignDiffusion: High-Quality Text-to-Design Image Generation with Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20906-20915} }
Using Powerful Prior Knowledge of Diffusion Model in Deep Unfolding Networks for Image Compressive Sensing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_CVPR, author = {Liao, Chen and Shen, Yan and Li, Dan and Wang, Zhongli}, title = {Using Powerful Prior Knowledge of Diffusion Model in Deep Unfolding Networks for Image Compressive Sensing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18000-18010} }
Koala-36M: A Large-scale Video Dataset Improving Consistency between Fine-grained Conditions and Video Content-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Qiuheng and Shi, Yukai and Ou, Jiarong and Chen, Rui and Lin, Ke and Wang, Jiahao and Jiang, Boyuan and Yang, Haotian and Zheng, Mingwu and Tao, Xin and Yang, Fei and Wan, Pengfei and Zhang, Di}, title = {Koala-36M: A Large-scale Video Dataset Improving Consistency between Fine-grained Conditions and Video Content}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8428-8437} }
VASparse: Towards Efficient Visual Hallucination Mitigation via Visual-Aware Token Sparsification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2025_CVPR, author = {Zhuang, Xianwei and Zhu, Zhihong and Xie, Yuxin and Liang, Liming and Zou, Yuexian}, title = {VASparse: Towards Efficient Visual Hallucination Mitigation via Visual-Aware Token Sparsification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4189-4199} }
SPARC: Score Prompting and Adaptive Fusion for Zero-Shot Multi-Label Recognition in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miller_2025_CVPR, author = {Miller, Kevin and Gangrade, Aditya and Mishra, Samarth and Saenko, Kate and Saligrama, Venkatesh}, title = {SPARC: Score Prompting and Adaptive Fusion for Zero-Shot Multi-Label Recognition in Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4313-4321} }
UniGoal: Towards Universal Zero-shot Goal-oriented Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Hang and Xu, Xiuwei and Zhao, Linqing and Wang, Ziwei and Zhou, Jie and Lu, Jiwen}, title = {UniGoal: Towards Universal Zero-shot Goal-oriented Navigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19057-19066} }
Noise-Consistent Siamese-Diffusion for Medical Image Synthesis and Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2025_CVPR, author = {Qiu, Kunpeng and Gao, Zhiqiang and Zhou, Zhiying and Sun, Mingjie and Guo, Yongxin}, title = {Noise-Consistent Siamese-Diffusion for Medical Image Synthesis and Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15672-15681} }
DefectFill: Realistic Defect Generation with Inpainting Diffusion Model for Visual Inspection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Jaewoo and Park, Daemin and Baek, Kanghyun and Lee, Sangyub and Choi, Jooyoung and Kim, Eunji and Yoon, Sungroh}, title = {DefectFill: Realistic Defect Generation with Inpainting Diffusion Model for Visual Inspection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18718-18727} }
Less is More: Efficient Image Vectorization with Adaptive Parameterization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Kaibo and Bao, Liang and Li, Yufei and Su, Xu and Zhang, Ke and Qiao, Xiaotian}, title = {Less is More: Efficient Image Vectorization with Adaptive Parameterization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18166-18175} }
FedMIA: An Effective Membership Inference Attack Exploiting "All for One" Principle in Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Gongxi and Li, Donghao and Gu, Hanlin and Yao, Yuan and Fan, Lixin and Han, Yuxing}, title = {FedMIA: An Effective Membership Inference Attack Exploiting ''All for One'' Principle in Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20643-20653} }
Erase Diffusion: Empowering Object Removal Through Calibrating Diffusion Pathways-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yi and Zhou, Hao and Cui, Benlei and Shang, Wenxiang and Lin, Ran}, title = {Erase Diffusion: Empowering Object Removal Through Calibrating Diffusion Pathways}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2418-2427} }
Prompt-CAM: Making Vision Transformers Interpretable for Fine-Grained Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Chowdhury_2025_CVPR, author = {Chowdhury, Arpita and Paul, Dipanjyoti and Mai, Zheda and Gu, Jianyang and Zhang, Ziheng and Mehrab, Kazi Sajeed and Campolongo, Elizabeth G. and Rubenstein, Daniel and Stewart, Charles V. and Karpatne, Anuj and Berger-Wolf, Tanya and Su, Yu and Chao, Wei-Lun}, title = {Prompt-CAM: Making Vision Transformers Interpretable for Fine-Grained Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4375-4385} }
Instruction-based Image Manipulation by Watching How Things Move-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Mingdeng and Zhang, Xuaner and Zheng, Yinqiang and Xia, Zhihao}, title = {Instruction-based Image Manipulation by Watching How Things Move}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2704-2713} }
DPFlow: Adaptive Optical Flow Estimation with a Dual-Pyramid Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Morimitsu_2025_CVPR, author = {Morimitsu, Henrique and Zhu, Xiaobin and Cesar, Roberto M. and Ji, Xiangyang and Yin, Xu-Cheng}, title = {DPFlow: Adaptive Optical Flow Estimation with a Dual-Pyramid Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17810-17820} }
DocSAM: Unified Document Image Segmentation via Query Decomposition and Heterogeneous Mixed Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xiao-Hui and Yin, Fei and Liu, Cheng-Lin}, title = {DocSAM: Unified Document Image Segmentation via Query Decomposition and Heterogeneous Mixed Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15021-15032} }
Ferret: An Efficient Online Continual Learning Framework under Varying Memory Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yuhao and Tian, Yuxin and Lv, Jindi and Shi, Mingjia and Li, Yuanxi and Ye, Qing and Zhang, Shuhao and Lv, Jiancheng}, title = {Ferret: An Efficient Online Continual Learning Framework under Varying Memory Constraints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4850-4861} }
Spatiotemporal Skip Guidance for Enhanced Video Diffusion Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hyung_2025_CVPR, author = {Hyung, Junha and Kim, Kinam and Hong, Susung and Kim, Min-Jung and Choo, Jaegul}, title = {Spatiotemporal Skip Guidance for Enhanced Video Diffusion Sampling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11006-11015} }
VidComposition: Can MLLMs Analyze Compositions in Compiled Videos?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Yunlong and Guo, Junjia and Hua, Hang and Liang, Susan and Feng, Mingqian and Li, Xinyang and Mao, Rui and Huang, Chao and Bi, Jing and Zhang, Zeliang and Fazli, Pooyan and Xu, Chenliang}, title = {VidComposition: Can MLLMs Analyze Compositions in Compiled Videos?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8490-8500} }
SAR3D: Autoregressive 3D Object Generation and Understanding via Multi-scale 3D VQVAE-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Yongwei and Lan, Yushi and Zhou, Shangchen and Wang, Tengfei and Pan, Xingang}, title = {SAR3D: Autoregressive 3D Object Generation and Understanding via Multi-scale 3D VQVAE}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28371-28382} }
Dual-Interrelated Diffusion Model for Few-Shot Anomaly Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Ying and Peng, Jinlong and He, Qingdong and Hu, Teng and Wu, Jiafu and Chen, Hao and Wang, Haoxuan and Zhu, Wenbing and Chi, Mingmin and Liu, Jun and Wang, Yabiao}, title = {Dual-Interrelated Diffusion Model for Few-Shot Anomaly Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30420-30429} }
ODE: Open-Set Evaluation of Hallucinations in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tu_2025_CVPR, author = {Tu, Yahan and Hu, Rui and Sang, Jitao}, title = {ODE: Open-Set Evaluation of Hallucinations in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19836-19845} }
Self-Supervised Learning for Color Spike Camera Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Yanchen and Xiong, Ruiqin and Fan, Xiaopeng and Yu, Zhaofei and Tian, Yonghong and Huang, Tiejun}, title = {Self-Supervised Learning for Color Spike Camera Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6231-6240} }
Interactive Medical Image Analysis with Concept-based Similarity Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huy_2025_CVPR, author = {Huy, Ta Duc and Tran, Sen Kim and Nguyen, Phan and Tran, Nguyen Hoang and Sam, Tran Bao and van den Hengel, Anton and Liao, Zhibin and Verjans, Johan W. and To, Minh-Son and Phan, Vu Minh Hieu}, title = {Interactive Medical Image Analysis with Concept-based Similarity Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30797-30806} }
From Elements to Design: A Layered Approach for Automatic Graphic Design Composition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Jiawei and Sun, Shizhao and Huang, Danqing and Liu, Ting and Li, Ji and Bian, Jiang}, title = {From Elements to Design: A Layered Approach for Automatic Graphic Design Composition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8128-8137} }
h-Edit: Effective and Flexible Diffusion-Based Editing via Doob's h-Transform-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Toan and Do, Kien and Kieu, Duc and Nguyen, Thin}, title = {h-Edit: Effective and Flexible Diffusion-Based Editing via Doob's h-Transform}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28490-28501} }
Masking meets Supervision: A Strong Learning Alliance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Heo_2025_CVPR, author = {Heo, Byeongho and Kim, Taekyung and Yun, Sangdoo and Han, Dongyoon}, title = {Masking meets Supervision: A Strong Learning Alliance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20447-20457} }
DI-PCG: Diffusion-based Efficient Inverse Procedural Content Generation for High-quality 3D Asset Creation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Wang and Cao, Yan-Pei and Xu, Jiale and Dong, Yuejiang and Shan, Ying}, title = {DI-PCG: Diffusion-based Efficient Inverse Procedural Content Generation for High-quality 3D Asset Creation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11061-11072} }
SALOVA: Segment-Augmented Long Video Assistant for Targeted Retrieval and Routing in Long-Form Video Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Junho and Kim, Hyunjun and Lee, Hosu and Ro, Yong Man}, title = {SALOVA: Segment-Augmented Long Video Assistant for Targeted Retrieval and Routing in Long-Form Video Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3352-3362} }
Notes-guided MLLM Reasoning: Enhancing MLLM with Knowledge and Visual Notes for Visual Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Wenlong and Wu, Qiaofeng and Chen, Jing and Xue, Yun}, title = {Notes-guided MLLM Reasoning: Enhancing MLLM with Knowledge and Visual Notes for Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19597-19607} }
Are Spatial-Temporal Graph Convolution Networks for Human Action Recognition Over-Parameterized?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Jianyang and Zhao, Yitian and Meng, Yanda and Zhao, He and Nguyen, Anh and Zheng, Yalin}, title = {Are Spatial-Temporal Graph Convolution Networks for Human Action Recognition Over-Parameterized?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24309-24319} }
DA-VPT: Semantic-Guided Visual Prompt Tuning for Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Li and Chen, Chen and Wang, Liqiang and Hua, Kien}, title = {DA-VPT: Semantic-Guided Visual Prompt Tuning for Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4353-4363} }
Towards Lossless Implicit Neural Representation via Bit Plane Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Woo Kyoung and Lee, Byeonghun and Cho, Hyunmin and Im, Sunghoon and Jin, Kyong Hwan}, title = {Towards Lossless Implicit Neural Representation via Bit Plane Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2269-2278} }
Spectral State Space Model for Rotation-Invariant Visual Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dastani_2025_CVPR, author = {Dastani, Sahar and Bahri, Ali and Yazdanpanah, Moslem and Noori, Mehrdad and Osowiechi, David and Hakim, Gustavo Adolfo Vargas and Beizaee, Farzad and Cheraghalikhani, Milad and Mondal, Arnab Kumar and Lombaert, Herve and Desrosiers, Christian}, title = {Spectral State Space Model for Rotation-Invariant Visual Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23881-23890} }
iSegMan: Interactive Segment-and-Manipulate 3D Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yian and Xu, Wanshi and Zheng, Ruochong and Qiao, Pengchong and Liu, Chang and Chen, Jie}, title = {iSegMan: Interactive Segment-and-Manipulate 3D Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {661-670} }
BlueLM-V-3B: Algorithm and System Co-Design for Multimodal Large Language Models on Mobile Devices-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Xudong and Chen, Yinghao and Chen, Cheng and Tan, Hui and Chen, Boheng and Xie, Yina and Hu, Rui and Tan, Guanxin and Wu, Renshou and Hu, Yan and Zeng, Yi and Wu, Lei and Bian, Liuyang and Wang, Zhaoxiong and Liu, Long and Yang, Yanzhou and Xiao, Han and Zhou, Aojun and Wen, Yafei and Chen, Xiaoxin and Ren, Shuai and Li, Hongsheng}, title = {BlueLM-V-3B: Algorithm and System Co-Design for Multimodal Large Language Models on Mobile Devices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4145-4155} }
Unraveling Normal Anatomy via Fluid-Driven Anomaly Randomization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Peirong and Aguila, Ana Lawry and Iglesias, Juan E.}, title = {Unraveling Normal Anatomy via Fluid-Driven Anomaly Randomization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10455-10465} }
Taming Teacher Forcing for Masked Autoregressive Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Deyu and Sun, Quan and Peng, Yuang and Yan, Kun and Dong, Runpei and Wang, Duomin and Ge, Zheng and Duan, Nan and Zhang, Xiangyu}, title = {Taming Teacher Forcing for Masked Autoregressive Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7374-7384} }
UniRestore: Unified Perceptual and Task-Oriented Image Restoration Model Using Diffusion Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, I-Hsiang and Chen, Wei-Ting and Liu, Yu-Wei and Chiang, Yuan-Chun and Kuo, Sy-Yen and Yang, Ming-Hsuan}, title = {UniRestore: Unified Perceptual and Task-Oriented Image Restoration Model Using Diffusion Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17969-17979} }
Sharp-It: A Multi-view to Multi-view Diffusion Model for 3D Synthesis and Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Edelstein_2025_CVPR, author = {Edelstein, Yiftach and Patashnik, Or and Cohen-Bar, Dana and Zelnik-Manor, Lihi}, title = {Sharp-It: A Multi-view to Multi-view Diffusion Model for 3D Synthesis and Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21458-21468} }
URWKV: Unified RWKV Model with Multi-state Perspective for Low-light Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Rui and Niu, Yuzhen and Li, Yuezhou and Xu, Huangbiao and Liu, Wenxi and Chen, Yuzhong}, title = {URWKV: Unified RWKV Model with Multi-state Perspective for Low-light Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21267-21276} }
Revisiting Backdoor Attacks against Large Vision-Language Models from Domain Shift-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Siyuan and Liang, Jiawei and Pang, Tianyu and Du, Chao and Liu, Aishan and Zhu, Mingli and Cao, Xiaochun and Tao, Dacheng}, title = {Revisiting Backdoor Attacks against Large Vision-Language Models from Domain Shift}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9477-9486} }
Condensing Action Segmentation Datasets via Generative Network Inversion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2025_CVPR, author = {Ding, Guodong and Chen, Rongyu and Yao, Angela}, title = {Condensing Action Segmentation Datasets via Generative Network Inversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17733-17742} }
TCFG: Tangential Damping Classifier-free Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2025_CVPR, author = {Kwon, Mingi and Kim, Shin seong and Jeong, Jaeseok and Hsiao, Yi Ting and Uh, Youngjung}, title = {TCFG: Tangential Damping Classifier-free Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2620-2629} }
MatAnyone: Stable Video Matting with Consistent Memory Propagation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Peiqing and Zhou, Shangchen and Zhao, Jixin and Tao, Qingyi and Loy, Chen Change}, title = {MatAnyone: Stable Video Matting with Consistent Memory Propagation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7299-7308} }
Can Generative Video Models Help Pose Estimation?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Ruojin and Zhang, Jason Y. and Henzler, Philipp and Li, Zhengqi and Snavely, Noah and Martin-Brualla, Ricardo}, title = {Can Generative Video Models Help Pose Estimation?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16764-16773} }
Molmo and PixMo: Open Weights and Open Data for State-of-the-Art Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deitke_2025_CVPR, author = {Deitke, Matt and Clark, Christopher and Lee, Sangho and Tripathi, Rohun and Yang, Yue and Park, Jae Sung and Salehi, Mohammadreza and Muennighoff, Niklas and Lo, Kyle and Soldaini, Luca and Lu, Jiasen and Anderson, Taira and Bransom, Erin and Ehsani, Kiana and Ngo, Huong and Chen, YenSung and Patel, Ajay and Yatskar, Mark and Callison-Burch, Chris and Head, Andrew and Hendrix, Rose and Bastani, Favyen and VanderBilt, Eli and Lambert, Nathan and Chou, Yvonne and Chheda, Arnavi and Sparks, Jenna and Skjonsberg, Sam and Schmitz, Michael and Sarnat, Aaron and Bischoff, Byron and Walsh, Pete and Newell, Chris and Wolters, Piper and Gupta, Tanmay and Zeng, Kuo-Hao and Borchardt, Jon and Groeneveld, Dirk and Nam, Crystal and Lebrecht, Sophie and Wittlif, Caitlin and Schoenick, Carissa and Michel, Oscar and Krishna, Ranjay and Weihs, Luca and Smith, Noah A. and Hajishirzi, Hannaneh and Girshick, Ross and Farhadi, Ali and Kembhavi, Aniruddha}, title = {Molmo and PixMo: Open Weights and Open Data for State-of-the-Art Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {91-104} }
DriveGPT4-V2: Harnessing Large Language Model Capabilities for Enhanced Closed-Loop Autonomous Driving-
[pdf]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Zhenhua and Bai, Yan and Zhang, Yujia and Li, Zhuoling and Xia, Fei and Wong, Kwan-Yee K. and Wang, Jianqiang and Zhao, Hengshuang}, title = {DriveGPT4-V2: Harnessing Large Language Model Capabilities for Enhanced Closed-Loop Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17261-17270} }
High-Fidelity Lightweight Mesh Reconstruction from Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chen and Wang, Wentao and Li, Ximeng and Liao, Xinyao and Su, Wanjuan and Tao, Wenbing}, title = {High-Fidelity Lightweight Mesh Reconstruction from Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11739-11748} }
MDP: Multidimensional Vision Model Pruning with Latency Constraint-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Xinglong and Lakshmanan, Barath and Shen, Maying and Lan, Shiyi and Chen, Jingde and Alvarez, Jose M.}, title = {MDP: Multidimensional Vision Model Pruning with Latency Constraint}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20113-20123} }
OSDFace: One-Step Diffusion Model for Face Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jingkai and Gong, Jue and Zhang, Lin and Chen, Zheng and Liu, Xing and Gu, Hong and Liu, Yutong and Zhang, Yulun and Yang, Xiaokang}, title = {OSDFace: One-Step Diffusion Model for Face Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12626-12636} }
Task Singular Vectors: Reducing Task Interference in Model Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gargiulo_2025_CVPR, author = {Gargiulo, Antonio Andrea and Crisostomi, Donato and Bucarelli, Maria Sofia and Scardapane, Simone and Silvestri, Fabrizio and Rodol\`a, Emanuele}, title = {Task Singular Vectors: Reducing Task Interference in Model Merging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18695-18705} }
Functionality Understanding and Segmentation in 3D Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Corsetti_2025_CVPR, author = {Corsetti, Jaime and Giuliari, Francesco and Fasoli, Alice and Boscaini, Davide and Poiesi, Fabio}, title = {Functionality Understanding and Segmentation in 3D Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24550-24559} }
Dragin3D: Image Editing by Dragging in 3D Space-
[pdf]
[supp]
[bibtex]@InProceedings{Guang_2025_CVPR, author = {Guang, Weiran and Gu, Xiaoguang and Huang, Mengqi and Mao, Zhendong}, title = {Dragin3D: Image Editing by Dragging in 3D Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21502-21512} }
MMTL-UniAD: A Unified Framework for Multimodal and Multi-Task Learning in Assistive Driving Perception-
[pdf]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Wenzhuo and Wang, Wenshuo and Qiao, Yicheng and Guo, Qiannan and Zhu, Jiayin and Li, Pengfei and Chen, Zilong and Yang, Huiming and Li, Zhiwei and Wang, Lening and Tan, Tiao and Liu, Huaping}, title = {MMTL-UniAD: A Unified Framework for Multimodal and Multi-Task Learning in Assistive Driving Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6864-6874} }
T2V-CompBench: A Comprehensive Benchmark for Compositional Text-to-video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Kaiyue and Huang, Kaiyi and Liu, Xian and Wu, Yue and Xu, Zihan and Li, Zhenguo and Liu, Xihui}, title = {T2V-CompBench: A Comprehensive Benchmark for Compositional Text-to-video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8406-8416} }
Self-Evolving Visual Concept Library using Vision-Language Critics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sehgal_2025_CVPR, author = {Sehgal, Atharva and Yuan, Patrick and Hu, Ziniu and Yue, Yisong and Sun, Jennifer J. and Chaudhuri, Swarat}, title = {Self-Evolving Visual Concept Library using Vision-Language Critics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13124-13134} }
Multimodal Autoregressive Pre-training of Large Vision Encoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fini_2025_CVPR, author = {Fini, Enrico and Shukor, Mustafa and Li, Xiujun and Dufter, Philipp and Klein, Michal and Haldimann, David and Aitharaju, Sai and da Costa, Victor G. Turrisi and B\'ethune, Louis and Gan, Zhe and Toshev, Alexander and Eichner, Marcin and Nabi, Moin and Yang, Yinfei and Susskind, Joshua and El-Nouby, Alaaeldin}, title = {Multimodal Autoregressive Pre-training of Large Vision Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9641-9654} }
AKiRa: Augmentation Kit on Rays for Optical Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xi and Courant, Robin and Christie, Marc and Kalogeiton, Vicky}, title = {AKiRa: Augmentation Kit on Rays for Optical Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2609-2619} }
Towards Stable and Storage-efficient Dataset Distillation: Matching Convexified Trajectory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_CVPR, author = {Zhong, Wenliang and Tang, Haoyu and Zheng, Qinghai and Xu, Mingzhu and Hu, Yupeng and Guan, Weili}, title = {Towards Stable and Storage-efficient Dataset Distillation: Matching Convexified Trajectory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25581-25589} }
TSAM: Temporal SAM Augmented with Multimodal Prompts for Referring Audio-Visual Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Radman_2025_CVPR, author = {Radman, Abduljalil and Laaksonen, Jorma}, title = {TSAM: Temporal SAM Augmented with Multimodal Prompts for Referring Audio-Visual Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23947-23956} }
TFCustom: Customized Image Generation with Time-Aware Frequency Feature Guidance-
[pdf]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Mushui and She, Dong and Pang, Jingxuan and Huang, Qihan and Ying, Jiacheng and He, Wanggui and Hou, Yuanlei and Fu, Siming}, title = {TFCustom: Customized Image Generation with Time-Aware Frequency Feature Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2714-2723} }
Boosting Point-Supervised Temporal Action Localization through Integrating Query Reformation and Optimal Transport-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Mengnan and Wang, Le and Zhou, Sanping and Xia, Kun and Sun, Xiaolong and Hua, Gang}, title = {Boosting Point-Supervised Temporal Action Localization through Integrating Query Reformation and Optimal Transport}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13865-13875} }
SketchFusion: Learning Universal Sketch Features through Fusing Foundation Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Koley_2025_CVPR, author = {Koley, Subhadeep and Dutta, Tapas Kumar and Sain, Aneeshan and Chowdhury, Pinaki Nath and Bhunia, Ayan Kumar and Song, Yi-Zhe}, title = {SketchFusion: Learning Universal Sketch Features through Fusing Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2556-2567} }
Bridging the Vision-Brain Gap with an Uncertainty-Aware Blur Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Haitao and Li, Qing and Zhang, Changqing and He, Zhen and Ying, Xiaomin}, title = {Bridging the Vision-Brain Gap with an Uncertainty-Aware Blur Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2246-2257} }
Invisible Backdoor Attack against Self-supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Hanrong and Wang, Zhenting and Li, Boheng and Lin, Fulin and Han, Tingxu and Jin, Mingyu and Zhan, Chenlu and Du, Mengnan and Wang, Hongwei and Ma, Shiqing}, title = {Invisible Backdoor Attack against Self-supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25790-25801} }
Perceptually Accurate 3D Talking Head Generation: New Definitions, Speech-Mesh Representation, and Evaluation Metrics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chae-Yeon_2025_CVPR, author = {Chae-Yeon, Lee and Hyun-Bin, Oh and EunGi, Han and Sung-Bin, Kim and Nam, Suekyeong and Oh, Tae-Hyun}, title = {Perceptually Accurate 3D Talking Head Generation: New Definitions, Speech-Mesh Representation, and Evaluation Metrics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21065-21074} }
BWFormer: Building Wireframe Reconstruction from Airborne LiDAR Point Cloud with Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuzhou and Zhu, Lingjie and Ye, Hanqiao and Huang, Shangfeng and Gao, Xiang and Zheng, Xianwei and Shen, Shuhan}, title = {BWFormer: Building Wireframe Reconstruction from Airborne LiDAR Point Cloud with Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22215-22224} }
Diffusion-4K: Ultra-High-Resolution Image Synthesis with Latent Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinjin and Huang, Qiuyu and Liu, Junjie and Guo, Xiefan and Huang, Di}, title = {Diffusion-4K: Ultra-High-Resolution Image Synthesis with Latent Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23464-23473} }
AffordDP: Generalizable Diffusion Policy with Transferable Affordance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Shijie and Zhu, Yihang and Huang, Yunao and Zhu, Kaizhen and Gu, Jiayuan and Yu, Jingyi and Shi, Ye and Wang, Jingya}, title = {AffordDP: Generalizable Diffusion Policy with Transferable Affordance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6971-6980} }
HMAR: Efficient Hierarchical Masked Auto-Regressive Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Kumbong_2025_CVPR, author = {Kumbong, Hermann and Liu, Xian and Lin, Tsung-Yi and Liu, Ming-Yu and Liu, Xihui and Liu, Ziwei and Fu, Daniel Y. and Re, Christopher and Romero, David W.}, title = {HMAR: Efficient Hierarchical Masked Auto-Regressive Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2535-2544} }
OmniDrive: A Holistic Vision-Language Dataset for Autonomous Driving with Counterfactual Reasoning-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shihao and Yu, Zhiding and Jiang, Xiaohui and Lan, Shiyi and Shi, Min and Chang, Nadine and Kautz, Jan and Li, Ying and Alvarez, Jose M.}, title = {OmniDrive: A Holistic Vision-Language Dataset for Autonomous Driving with Counterfactual Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22442-22452} }
DKC: Differentiated Knowledge Consolidation for Cloth-Hybrid Lifelong Person Re-identification-
[pdf]
[supp]
[bibtex]@InProceedings{Cui_2025_CVPR, author = {Cui, Zhenyu and Zhou, Jiahuan and Peng, Yuxin}, title = {DKC: Differentiated Knowledge Consolidation for Cloth-Hybrid Lifelong Person Re-identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3573-3582} }
Enhancing Facial Privacy Protection via Weakening Diffusion Purification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Salar_2025_CVPR, author = {Salar, Ali and Liu, Qing and Tian, Yingli and Zhao, Guoying}, title = {Enhancing Facial Privacy Protection via Weakening Diffusion Purification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8235-8244} }
ORIDa: Object-centric Real-world Image Composition Dataset-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Jinwoo and Han, Sangmin and Jeong, Jinho and Choi, Jiwoo and Kim, Dongyeoung and Kim, Seon Joo}, title = {ORIDa: Object-centric Real-world Image Composition Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3051-3060} }
MeGA: Hybrid Mesh-Gaussian Head Avatar for High-Fidelity Rendering and Head Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Cong and Kang, Di and Sun, Heyi and Qian, Shenhan and Wang, Zixuan and Bao, Linchao and Zhang, Song-Hai}, title = {MeGA: Hybrid Mesh-Gaussian Head Avatar for High-Fidelity Rendering and Head Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26274-26284} }
Image Generation Diversity Issues and How to Tame Them-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dombrowski_2025_CVPR, author = {Dombrowski, Mischa and Zhang, Weitong and Cechnicka, Sarah and Reynaud, Hadrien and Kainz, Bernhard}, title = {Image Generation Diversity Issues and How to Tame Them}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3029-3039} }
Annotation Ambiguity Aware Semi-Supervised Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Kumari_2025_CVPR, author = {Kumari, Suruchi and Singh, Pravendra}, title = {Annotation Ambiguity Aware Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10404-10413} }
Effective Cloud Removal for Remote Sensing Images by an Improved Mean-Reverting Denoising Model with Elucidated Design Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yi and Li, Wengen and Guan, Jihong and Zhou, Shuigeng and Zhang, Yichao}, title = {Effective Cloud Removal for Remote Sensing Images by an Improved Mean-Reverting Denoising Model with Elucidated Design Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17851-17861} }
CAP4D: Creating Animatable 4D Portrait Avatars with Morphable Multi-View Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Taubner_2025_CVPR, author = {Taubner, Felix and Zhang, Ruihang and Tuli, Mathieu and Lindell, David B.}, title = {CAP4D: Creating Animatable 4D Portrait Avatars with Morphable Multi-View Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5318-5330} }
Comprehensive Information Bottleneck for Unveiling Universal Attribution to Interpret Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2025_CVPR, author = {Hong, Jung-Ho and Kim, Ho-Joong and Jeon, Kyu-Sung and Lee, Seong-Whan}, title = {Comprehensive Information Bottleneck for Unveiling Universal Attribution to Interpret Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25166-25175} }
OpticalNet: An Optical Imaging Dataset and Benchmark Beyond the Diffraction Limit-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Benquan and An, Ruyi and So, Jin-Kyu and Kurdiumov, Sergei and Chan, Eng Aik and Adamo, Giorgio and Peng, Yuhan and Li, Yewen and An, Bo}, title = {OpticalNet: An Optical Imaging Dataset and Benchmark Beyond the Diffraction Limit}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10900-10912} }
Dataset Distillation with Neural Characteristic Function: A Minmax Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shaobo and Yang, Yicun and Liu, Zhiyuan and Sun, Chenghao and Hu, Xuming and He, Conghui and Zhang, Linfeng}, title = {Dataset Distillation with Neural Characteristic Function: A Minmax Perspective}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25570-25580} }
Free-viewpoint Human Animation with Pose-correlated Reference Selection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2025_CVPR, author = {Hong, Fa-Ting and Xu, Zhan and Liu, Haiyang and Lin, Qinjie and Song, Luchuan and Shu, Zhixin and Zhou, Yang and Ceylan, Duygu and Xu, Dan}, title = {Free-viewpoint Human Animation with Pose-correlated Reference Selection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26253-26262} }
CORE4D: A 4D Human-Object-Human Interaction Dataset for Collaborative Object REarrangement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yun and Zhang, Chengwen and Xing, Ruofan and Tang, Bingda and Yang, Bowen and Yi, Li}, title = {CORE4D: A 4D Human-Object-Human Interaction Dataset for Collaborative Object REarrangement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1769-1782} }
PillarHist: A Quantization-aware Pillar Feature Encoder based on Height-aware Histogram-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Sifan and Yuan, Zhihang and Yang, Dawei and Hu, Xing and Qian, Jian and Zhao, Ziyu}, title = {PillarHist: A Quantization-aware Pillar Feature Encoder based on Height-aware Histogram}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27336-27345} }
POp-GS: Next Best View in 3D-Gaussian Splatting with P-Optimality-
[pdf]
[supp]
[bibtex]@InProceedings{Wilson_2025_CVPR, author = {Wilson, Joey and Almeida, Marcelino and Mahajan, Sachit and Labrie, Martin and Ghaffari, Maani and Ghasemalizadeh, Omid and Sun, Min and Kuo, Cheng-Hao and Sen, Arnab}, title = {POp-GS: Next Best View in 3D-Gaussian Splatting with P-Optimality}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3646-3655} }
Empowering Vector Graphics with Consistently Arbitrary Viewing and View-dependent Visibility-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yidi and Xiao, Jun and Lu, Zhengda and Wang, Yiqun and Jiang, Haiyong}, title = {Empowering Vector Graphics with Consistently Arbitrary Viewing and View-dependent Visibility}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18531-18540} }
Semantic and Expressive Variations in Image Captions Across Languages-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Andre and Santy, Sebastin and Hwang, Jena D. and Zhang, Amy X. and Krishna, Ranjay}, title = {Semantic and Expressive Variations in Image Captions Across Languages}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29667-29679} }
ATP-LLaVA: Adaptive Token Pruning for Large Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Xubing and Gan, Yukang and Ge, Yixiao and Zhang, Xiao-Ping and Tang, Yansong}, title = {ATP-LLaVA: Adaptive Token Pruning for Large Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24972-24982} }
ADD: Attribution-Driven Data Augmentation Framework for Boosting Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Mi_2025_CVPR, author = {Mi, Ze-Yu and Yang, Yu-Bin}, title = {ADD: Attribution-Driven Data Augmentation Framework for Boosting Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23101-23110} }
DIFFER: Disentangling Identity Features via Semantic Cues for Clothes-Changing Person Re-ID-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Xin and Rawat, Yogesh S}, title = {DIFFER: Disentangling Identity Features via Semantic Cues for Clothes-Changing Person Re-ID}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13980-13989} }
HyperPose: Hypernetwork-Infused Camera Pose Localization and an Extended Cambridge Landmarks Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ferens_2025_CVPR, author = {Ferens, Ron and Keller, Yosi}, title = {HyperPose: Hypernetwork-Infused Camera Pose Localization and an Extended Cambridge Landmarks Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11547-11557} }
Critic-V: VLM Critics Help Catch VLM Errors in Multimodal Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Di and Lei, Jingdi and Li, Junxian and Wang, Xunzhi and Liu, Yujie and Yang, Zonglin and Li, Jiatong and Wang, Weida and Yang, Suorong and Wu, Jianbo and Ye, Peng and Ouyang, Wanli and Zhou, Dongzhan}, title = {Critic-V: VLM Critics Help Catch VLM Errors in Multimodal Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9050-9061} }
Mono3DVLT: Monocular-Video-Based 3D Visual Language Tracking-
[pdf]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Hongkai and Yang, Yang and Sun, Shijie and Feng, Mingtao and Song, Xiangyu and Lei, Qi and Hu, Hongli and Wang, Rong and Song, Huansheng and Akhtar, Naveed and Mian, Ajmal Saeed}, title = {Mono3DVLT: Monocular-Video-Based 3D Visual Language Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13886-13896} }
Towards Universal Dataset Distillation via Task-Driven Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2025_CVPR, author = {Qi, Ding and Li, Jian and Gao, Junyao and Dou, Shuguang and Tai, Ying and Hu, Jianlong and Zhao, Bo and Wang, Yabiao and Wang, Chengjie and Zhao, Cairong}, title = {Towards Universal Dataset Distillation via Task-Driven Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10557-10566} }
Parametric Point Cloud Completion for Polygonal Surface Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zhaiyu and Wang, Yuqing and Nan, Liangliang and Zhu, Xiao Xiang}, title = {Parametric Point Cloud Completion for Polygonal Surface Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11749-11758} }
SyncSDE: A Probabilistic Framework for Diffusion Synchronization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Hyunjun and Lee, Hyunsoo and Han, Sookwan}, title = {SyncSDE: A Probabilistic Framework for Diffusion Synchronization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17508-17517} }
MaRI: Material Retrieval Integration across Domains-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jianhui and Yang, Zhifei and He, Yangfan and Zhang, Huixiong and Chen, Yuxuan and Huang, Jingwei}, title = {MaRI: Material Retrieval Integration across Domains}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5814-5823} }
MCCD: Multi-Agent Collaboration-based Compositional Diffusion for Complex Text-to-Image Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Mingcheng and Hou, Xiaolu and Liu, Ziyang and Yang, Dingkang and Qian, Ziyun and Chen, Jiawei and Wei, Jinjie and Jiang, Yue and Xu, Qingyao and Zhang, Lihua}, title = {MCCD: Multi-Agent Collaboration-based Compositional Diffusion for Complex Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13263-13272} }
Dual Semantic Guidance for Open Vocabulary Semantic Segmentation-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhengyang and Feng, Tingliang and Lyu, Fan and Shang, Fanhua and Feng, Wei and Wan, Liang}, title = {Dual Semantic Guidance for Open Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20212-20222} }
CroCoDL: Cross-device Collaborative Dataset for Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Blum_2025_CVPR, author = {Blum, Hermann and Mercurio, Alessandro and O'Reilly, Joshua and Engelbracht, Tim and Dusmanu, Mihai and Pollefeys, Marc and Bauer, Zuria}, title = {CroCoDL: Cross-device Collaborative Dataset for Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27424-27434} }
Q-Bench-Video: Benchmark the Video Quality Understanding of LMMs-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zicheng and Jia, Ziheng and Wu, Haoning and Li, Chunyi and Chen, Zijian and Zhou, Yingjie and Sun, Wei and Liu, Xiaohong and Min, Xiongkuo and Lin, Weisi and Zhai, Guangtao}, title = {Q-Bench-Video: Benchmark the Video Quality Understanding of LMMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3229-3239} }
Glossy Object Reconstruction with Cost-effective Polarized Acquisition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Bojian and Peng, Yifan and Hu, Ruizhen and Zhou, Xiaowei}, title = {Glossy Object Reconstruction with Cost-effective Polarized Acquisition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {422-431} }
Generalizable Object Keypoint Localization from Generative Priors-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Dongkai and Duan, Jiang and Wen, Liangjian and Xuan, Shiyu and Chen, Hao and Zhang, Shiliang}, title = {Generalizable Object Keypoint Localization from Generative Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20265-20274} }
CLIP is Almost All You Need: Towards Parameter-Efficient Scene Text Retrieval without OCR-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Xugong and Zhang, Peng and Yang, Jun Jie Ou and Zeng, Gangyan and Li, Yubo and Wang, Yuanyuan and Zhang, Wanqian and Dai, Pengwen}, title = {CLIP is Almost All You Need: Towards Parameter-Efficient Scene Text Retrieval without OCR}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24873-24883} }
L-SWAG: Layer-Sample Wise Activation with Gradients Information for Zero-Shot NAS on Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Casarin_2025_CVPR, author = {Casarin, Sofia and Escalera, Sergio and Lanz, Oswald}, title = {L-SWAG: Layer-Sample Wise Activation with Gradients Information for Zero-Shot NAS on Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4441-4451} }
Commonsense Video Question Answering through Video-Grounded Entailment Tree Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Huabin and Ilievski, Filip and Snoek, Cees G. M.}, title = {Commonsense Video Question Answering through Video-Grounded Entailment Tree Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3262-3271} }
What Makes a Good Dataset for Knowledge Distillation?-
[pdf]
[arXiv]
[bibtex]@InProceedings{Frank_2025_CVPR, author = {Frank, Logan and Davis, Jim}, title = {What Makes a Good Dataset for Knowledge Distillation?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23755-23764} }
Lifelong Knowledge Editing for Vision Language Models with Low-Rank Mixture-of-Experts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Qizhou and Wang, Chengyu and Wang, Dakan and Zhang, Taolin and Li, Wangyue and He, Xiaofeng}, title = {Lifelong Knowledge Editing for Vision Language Models with Low-Rank Mixture-of-Experts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9455-9466} }
Rectification-specific Supervision and Constrained Estimator for Online Stereo Rectification-
[pdf]
[bibtex]@InProceedings{Gong_2025_CVPR, author = {Gong, Rui and Yap, Kim-Hui and Liu, Weide and Yang, Xulei and Cheng, Jun}, title = {Rectification-specific Supervision and Constrained Estimator for Online Stereo Rectification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22348-22358} }
Shape and Texture: What Influences Reliable Optical Flow Estimation?-
[pdf]
[supp]
[bibtex]@InProceedings{Long_2025_CVPR, author = {Long, Libo and Hu, Xiao and Lang, Jochen}, title = {Shape and Texture: What Influences Reliable Optical Flow Estimation?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27894-27903} }
PartGen: Part-level 3D Generation and Reconstruction with Multi-view Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Minghao and Shapovalov, Roman and Laina, Iro and Monnier, Tom and Wang, Jianyuan and Novotny, David and Vedaldi, Andrea}, title = {PartGen: Part-level 3D Generation and Reconstruction with Multi-view Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5881-5892} }
FedCALM: Conflict-aware Layer-wise Mitigation for Selective Aggregation in Deeper Personalized Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Hao and Hu, Zhigang and Yang, Liu and Zheng, Meiguang and Xu, Aikun and Wang, Boyu}, title = {FedCALM: Conflict-aware Layer-wise Mitigation for Selective Aggregation in Deeper Personalized Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15444-15453} }
SINR: Sparsity Driven Compressed Implicit Neural Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jayasundara_2025_CVPR, author = {Jayasundara, Dhananjaya and Rajagopalan, Sudarshan and Ranasinghe, Yasiru and Tran, Trac D. and Patel, Vishal M.}, title = {SINR: Sparsity Driven Compressed Implicit Neural Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3061-3070} }
CaricatureBooth: Data-Free Interactive Caricature Generation in a Photo Booth-
[pdf]
[supp]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Zhiyu and Miao, Yunqi and Zhang, Zhensong and Song, Jifei and Deng, Jiankang and Song, Yi-Zhe}, title = {CaricatureBooth: Data-Free Interactive Caricature Generation in a Photo Booth}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10815-10824} }
FlexGS: Train Once, Deploy Everywhere with Many-in-One Flexible 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Hengyu and Wang, Yuehao and Li, Chenxin and Cai, Ruisi and Wang, Kevin and Li, Wuyang and Molchanov, Pavlo and Wang, Peihao and Wang, Zhangyang}, title = {FlexGS: Train Once, Deploy Everywhere with Many-in-One Flexible 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16336-16345} }
Generalizing Deepfake Video Detection with Plug-and-Play: Video-Level Blending and Spatiotemporal Adapter Tuning-
[pdf]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Zhiyuan and Zhao, Yandan and Chen, Shen and Guo, Mingyi and Fu, Xinghe and Yao, Taiping and Ding, Shouhong and Wu, Yunsheng and Yuan, Li}, title = {Generalizing Deepfake Video Detection with Plug-and-Play: Video-Level Blending and Spatiotemporal Adapter Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12615-12625} }
ManipTrans: Efficient Dexterous Bimanual Manipulation Transfer via Residual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Kailin and Li, Puhao and Liu, Tengyu and Li, Yuyang and Huang, Siyuan}, title = {ManipTrans: Efficient Dexterous Bimanual Manipulation Transfer via Residual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6991-7003} }
Precise, Fast, and Low-cost Concept Erasure in Value Space: Orthogonal Complement Matters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuan and Li, Ouxiang and Mu, Tingting and Hao, Yanbin and Liu, Kuien and Wang, Xiang and He, Xiangnan}, title = {Precise, Fast, and Low-cost Concept Erasure in Value Space: Orthogonal Complement Matters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28759-28768} }
HOIGen-1M: A Large-scale Dataset for Human-Object Interaction Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Kun and Liu, Qi and Liu, Xinchen and Li, Jie and Zhang, Yongdong and Luo, Jiebo and He, Xiaodong and Liu, Wu}, title = {HOIGen-1M: A Large-scale Dataset for Human-Object Interaction Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24001-24010} }
T2ISafety: Benchmark for Assessing Fairness, Toxicity, and Privacy in Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Lijun and Shi, Zhelun and Hu, Xuhao and Dong, Bowen and Qin, Yiran and Liu, Xihui and Sheng, Lu and Shao, Jing}, title = {T2ISafety: Benchmark for Assessing Fairness, Toxicity, and Privacy in Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13381-13392} }
Order-One Rolling Shutter Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hahn_2025_CVPR, author = {Hahn, Marvin Anas and Kohn, Kathl\'en and Marigliano, Orlando and Pajdla, Tomas}, title = {Order-One Rolling Shutter Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27007-27016} }
Animate and Sound an Image-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xihua and Song, Ruihua and Li, Chongxuan and Cheng, Xin and Li, Boyuan and Wu, Yihan and Wang, Yuyue and Xu, Hongteng and Wang, Yunfeng}, title = {Animate and Sound an Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23369-23378} }
Shining Yourself: High-Fidelity Ornaments Virtual Try-on with Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miao_2025_CVPR, author = {Miao, Yingmao and Huang, Zhanpeng and Han, Rui and Wang, Zibin and Lin, Chenhao and Shen, Chao}, title = {Shining Yourself: High-Fidelity Ornaments Virtual Try-on with Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {359-368} }
Foveated Instance Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_CVPR, author = {Zeng, Hongyi and Liu, Wenxuan and Xia, Tianhua and Chen, Jinhui and Li, Ziyun and Zhang, Sai Qian}, title = {Foveated Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24496-24505} }
Make It Count: Text-to-Image Generation with an Accurate Number of Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Binyamin_2025_CVPR, author = {Binyamin, Lital and Tewel, Yoad and Segev, Hilit and Hirsch, Eran and Rassin, Royi and Chechik, Gal}, title = {Make It Count: Text-to-Image Generation with an Accurate Number of Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13242-13251} }
Universal Domain Adaptation for Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choe_2025_CVPR, author = {Choe, Seun-An and Park, Keon-Hee and Choi, Jinwoo and Park, Gyeong-Moon}, title = {Universal Domain Adaptation for Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4607-4617} }
HyperGS: Hyperspectral 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thirgood_2025_CVPR, author = {Thirgood, Christopher and Mendez, Oscar and Ling, Erin and Storey, Jon and Hadfield, Simon}, title = {HyperGS: Hyperspectral 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5970-5979} }
Emphasizing Discriminative Features for Dataset Distillation in Complex Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Kai and Li, Zekai and Cheng, Zhi-Qi and Khaki, Samir and Sajedi, Ahmad and Vedantam, Ramakrishna and Plataniotis, Konstantinos N and Hauptmann, Alexander and You, Yang}, title = {Emphasizing Discriminative Features for Dataset Distillation in Complex Scenarios}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30451-30461} }
LMO: Linear Mamba Operator for MRI Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Wei and Jiang, Jiawei and Wu, Jie and Yu, Kaihao and Zheng, Jianwei}, title = {LMO: Linear Mamba Operator for MRI Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5112-5122} }
AnomalyNCD: Towards Novel Anomaly Class Discovery in Industrial Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Ziming and Li, Xurui and Liu, Haotian and Xue, Feng and Wang, Yuzhe and Zhou, Yu}, title = {AnomalyNCD: Towards Novel Anomaly Class Discovery in Industrial Scenarios}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4755-4765} }
Segment This Thing: Foveated Tokenization for Efficient Point-Prompted Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Schmidt_2025_CVPR, author = {Schmidt, Tanner and Newcombe, Richard}, title = {Segment This Thing: Foveated Tokenization for Efficient Point-Prompted Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29428-29437} }
Task-Specific Gradient Adaptation for Few-Shot One-Class Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yunlong and Liu, Xiabi and Pan, Liyuan and Ren, Yuchen}, title = {Task-Specific Gradient Adaptation for Few-Shot One-Class Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30556-30565} }
TraF-Align: Trajectory-aware Feature Alignment for Asynchronous Multi-agent Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Zhiying and Yang, Lei and Wen, Fuxi and Li, Jun}, title = {TraF-Align: Trajectory-aware Feature Alignment for Asynchronous Multi-agent Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12048-12057} }
DreamCache: Finetuning-Free Lightweight Personalized Image Generation via Feature Caching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aiello_2025_CVPR, author = {Aiello, Emanuele and Michieli, Umberto and Valsesia, Diego and Ozay, Mete and Magli, Enrico}, title = {DreamCache: Finetuning-Free Lightweight Personalized Image Generation via Feature Caching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12480-12489} }
3D Gaussian Inpainting with Depth-Guided Cross-View Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Sheng-Yu and Chou, Zi-Ting and Wang, Yu-Chiang Frank}, title = {3D Gaussian Inpainting with Depth-Guided Cross-View Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26704-26713} }
Your Large Vision-Language Model Only Needs A Few Attention Heads For Visual Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_CVPR, author = {Kang, Seil and Kim, Jinyeong and Kim, Junhyeok and Hwang, Seong Jae}, title = {Your Large Vision-Language Model Only Needs A Few Attention Heads For Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9339-9350} }
FlexUOD: The Answer to Real-world Unsupervised Image Outlier Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhonghang and Zhou, Kun and Wang, Changshuo and Lin, Wen-Yan and Lu, Jiangbo}, title = {FlexUOD: The Answer to Real-world Unsupervised Image Outlier Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15183-15193} }
Ges3ViG : Incorporating Pointing Gestures into Language-Based 3D Visual Grounding for Embodied Reference Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mane_2025_CVPR, author = {Mane, Atharv Mahesh and Weerakoon, Dulanga and Subbaraju, Vigneshwaran and Sen, Sougata and Sarma, Sanjay E. and Misra, Archan}, title = {Ges3ViG : Incorporating Pointing Gestures into Language-Based 3D Visual Grounding for Embodied Reference Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9017-9026} }
Focusing on Tracks for Online Multi-Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Shim_2025_CVPR, author = {Shim, Kyujin and Ko, Kangwook and Yang, Yujin and Kim, Changick}, title = {Focusing on Tracks for Online Multi-Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11687-11696} }
Floxels: Fast Unsupervised Voxel Based Scene Flow Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hoffmann_2025_CVPR, author = {Hoffmann, David T. and Raza, Syed Haseeb and Jiang, Hanqiu and Tananaev, Denis and Klingenhoefer, Steffen and Meinke, Martin}, title = {Floxels: Fast Unsupervised Voxel Based Scene Flow Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22328-22337} }
LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Joya and Zeng, Ziyun and Lin, Yiqi and Li, Wei and Ma, Zejun and Shou, Mike Zheng}, title = {LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29083-29095} }
Identity-preserving Distillation Sampling by Fixed-Point Iterator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, SeonHwa and Kim, Jiwon and Park, Soobin and Ahn, Donghoon and Kang, Jiwon and Kim, Seungryong and Jin, Kyong Hwan and Cha, Eunju}, title = {Identity-preserving Distillation Sampling by Fixed-Point Iterator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11115-11124} }
Progressive Focused Transformer for Single Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Long_2025_CVPR, author = {Long, Wei and Zhou, Xingyu and Zhang, Leheng and Gu, Shuhang}, title = {Progressive Focused Transformer for Single Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2279-2288} }
VladVA: Discriminative Fine-tuning of LVLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ouali_2025_CVPR, author = {Ouali, Yassine and Bulat, Adrian and Xenos, Alexandros and Zaganidis, Anestis and Metaxas, Ioannis Maniadis and Martinez, Brais and Tzimiropoulos, Georgios}, title = {VladVA: Discriminative Fine-tuning of LVLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4101-4111} }
FlexiDiT: Your Diffusion Transformer Can Easily Generate High-Quality Samples with Less Compute-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Anagnostidis_2025_CVPR, author = {Anagnostidis, Sotiris and Bachmann, Gregor and Kim, Yeongmin and Kohler, Jonas and Georgopoulos, Markos and Sanakoyeu, Artsiom and Du, Yuming and Pumarola, Albert and Thabet, Ali and Sch\"onfeld, Edgar}, title = {FlexiDiT: Your Diffusion Transformer Can Easily Generate High-Quality Samples with Less Compute}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28316-28326} }
WiLoR: End-to-end 3D Hand Localization and Reconstruction in-the-wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Potamias_2025_CVPR, author = {Potamias, Rolandos Alexandros and Zhang, Jinglei and Deng, Jiankang and Zafeiriou, Stefanos}, title = {WiLoR: End-to-end 3D Hand Localization and Reconstruction in-the-wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12242-12254} }
HumanMM: Global Human Motion Recovery from Multi-shot Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yuhong and Wu, Guanlin and Chen, Ling-Hao and Zhao, Zhuokai and Lin, Jing and Jiang, Xiaoke and Wu, Jiamin and Li, Zhuoheng and Yang, Hao Frank and Wang, Haoqian and Zhang, Lei}, title = {HumanMM: Global Human Motion Recovery from Multi-shot Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1973-1983} }
Removing Reflections from RAW Photos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kee_2025_CVPR, author = {Kee, Eric and Pikielny, Adam and Blackburn-Matzen, Kevin and Levoy, Marc}, title = {Removing Reflections from RAW Photos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {161-171} }
BiomedCoOp: Learning to Prompt for Biomedical Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koleilat_2025_CVPR, author = {Koleilat, Taha and Asgariandehkordi, Hojat and Rivaz, Hassan and Xiao, Yiming}, title = {BiomedCoOp: Learning to Prompt for Biomedical Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14766-14776} }
AMR-Transformer: Enabling Efficient Long-range Interaction for Complex Neural Fluid Simulation-
[pdf]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Zeyi and Liu, Jinfan and Chen, Kuangxu and Chen, Ye and Hu, Zhangli and Ni, Bingbing}, title = {AMR-Transformer: Enabling Efficient Long-range Interaction for Complex Neural Fluid Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5804-5813} }
MV-SSM: Multi-View State Space Modeling for 3D Human Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Chharia_2025_CVPR, author = {Chharia, Aviral and Gou, Wenbo and Dong, Haoye}, title = {MV-SSM: Multi-View State Space Modeling for 3D Human Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11590-11599} }
HyperGLM: HyperGraph for Video Scene Graph Generation and Anticipation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Trong-Thuan and Nguyen, Pha and Cothren, Jackson and Yilmaz, Alper and Luu, Khoa}, title = {HyperGLM: HyperGraph for Video Scene Graph Generation and Anticipation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29150-29160} }
AnySat: One Earth Observation Model for Many Resolutions, Scales, and Modalities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Astruc_2025_CVPR, author = {Astruc, Guillaume and Gonthier, Nicolas and Mallet, Cl\'ement and Landrieu, Loic}, title = {AnySat: One Earth Observation Model for Many Resolutions, Scales, and Modalities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19530-19540} }
FSFM: A Generalizable Face Security Foundation Model via Self-Supervised Facial Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Gaojian and Lin, Feng and Wu, Tong and Liu, Zhenguang and Ba, Zhongjie and Ren, Kui}, title = {FSFM: A Generalizable Face Security Foundation Model via Self-Supervised Facial Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24364-24376} }
OVO-Bench: How Far is Your Video-LLMs from Real-World Online Video Understanding?-
[pdf]
[supp]
[bibtex]@InProceedings{Niu_2025_CVPR, author = {Niu, Junbo and Li, Yifei and Miao, Ziyang and Ge, Chunjiang and Zhou, Yuanhang and He, Qihao and Dong, Xiaoyi and Duan, Haodong and Ding, Shuangrui and Qian, Rui and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and He, Conghui and Wang, Jiaqi}, title = {OVO-Bench: How Far is Your Video-LLMs from Real-World Online Video Understanding?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18902-18913} }
AlignMamba: Enhancing Multimodal Mamba with Local and Global Cross-modal Alignment-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yan and Xing, Yifei and Lan, Xiangyuan and Li, Xin and Chen, Haifeng and Jiang, Dongmei}, title = {AlignMamba: Enhancing Multimodal Mamba with Local and Global Cross-modal Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24774-24784} }
Blurry-Edges: Photon-Limited Depth Estimation from Defocused Boundaries-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Wei and Wagner, Charles James and Luo, Junjie and Guo, Qi}, title = {Blurry-Edges: Photon-Limited Depth Estimation from Defocused Boundaries}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {432-441} }
VideoComp: Advancing Fine-Grained Compositional and Temporal Alignment in Video-Text Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Dahun and Piergiovanni, AJ and Mallya, Ganesh and Angelova, Anelia}, title = {VideoComp: Advancing Fine-Grained Compositional and Temporal Alignment in Video-Text Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29060-29070} }
One Model for ALL: Low-Level Task Interaction Is a Key to Task-Agnostic Image Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Chunyang and Xu, Tianyang and Feng, Zhenhua and Wu, Xiaojun and Tang, Zhangyong and Li, Hui and Zhang, Zeyang and Atito, Sara and Awais, Muhammad and Kittler, Josef}, title = {One Model for ALL: Low-Level Task Interaction Is a Key to Task-Agnostic Image Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28102-28112} }
MICAS: Multi-grained In-Context Adaptive Sampling for 3D Point Cloud Processing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2025_CVPR, author = {Shao, Feifei and Liu, Ping and Wang, Zhao and Luo, Yawei and Wang, Hongwei and Xiao, Jun}, title = {MICAS: Multi-grained In-Context Adaptive Sampling for 3D Point Cloud Processing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6616-6626} }
Can Text-to-Video Generation help Video-Language Alignment?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zanella_2025_CVPR, author = {Zanella, Luca and Mancini, Massimiliano and Menapace, Willi and Tulyakov, Sergey and Wang, Yiming and Ricci, Elisa}, title = {Can Text-to-Video Generation help Video-Language Alignment?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24097-24107} }
GoalFlow: Goal-Driven Flow Matching for Multimodal Trajectories Generation in End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Zebin and Zhang, Xingyu and Hu, Yang and Jiang, Bo and He, Tong and Zhang, Qian and Long, Xiaoxiao and Yin, Wei}, title = {GoalFlow: Goal-Driven Flow Matching for Multimodal Trajectories Generation in End-to-End Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1602-1611} }
GuardSplat: Efficient and Robust Watermarking for 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zixuan and Wang, Guangcong and Zhu, Jiahao and Lai, Jianhuang and Xie, Xiaohua}, title = {GuardSplat: Efficient and Robust Watermarking for 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16325-16335} }
Weakly Supervised Contrastive Adversarial Training for Learning Robust Features from Semi-supervised Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Lilin and Wu, Chengpei and Yang, Ning}, title = {Weakly Supervised Contrastive Adversarial Training for Learning Robust Features from Semi-supervised Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25718-25727} }
From Poses to Identity: Training-Free Person Re-Identification via Feature Centralization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Chao and Zhang, Guiwei and Ma, Changxiao and Zhang, Tianyi and Niu, Guanglin}, title = {From Poses to Identity: Training-Free Person Re-Identification via Feature Centralization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24409-24418} }
ColabSfM: Collaborative Structure-from-Motion by Point Cloud Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Edstedt_2025_CVPR, author = {Edstedt, Johan and Mateus, Andr\'e and Jaenal, Alberto}, title = {ColabSfM: Collaborative Structure-from-Motion by Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6573-6583} }
RoadSocial: A Diverse VideoQA Dataset and Benchmark for Road Event Understanding from Social Video Narratives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parikh_2025_CVPR, author = {Parikh, Chirag and Rawat, Deepti and T., Rakshitha R. and Ghosh, Tathagata and Sarvadevabhatla, Ravi Kiran}, title = {RoadSocial: A Diverse VideoQA Dataset and Benchmark for Road Event Understanding from Social Video Narratives}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19002-19011} }
MangaNinja: Line Art Colorization with Precise Reference Following-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhiheng and Cheng, Ka Leong and Chen, Xi and Xiao, Jie and Ouyang, Hao and Zhu, Kai and Liu, Yu and Shen, Yujun and Chen, Qifeng and Luo, Ping}, title = {MangaNinja: Line Art Colorization with Precise Reference Following}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5666-5677} }
Nonisotropic Gaussian Diffusion for Realistic 3D Human Motion Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Curreli_2025_CVPR, author = {Curreli, Cecilia and Muhle, Dominik and Saroha, Abhishek and Ye, Zhenzhang and Marin, Riccardo and Cremers, Daniel}, title = {Nonisotropic Gaussian Diffusion for Realistic 3D Human Motion Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1871-1882} }
Is Your World Simulator a Good Story Presenter? A Consecutive Events-Based Benchmark for Future Long Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yiping and He, Xuehai and Wang, Kuan and Ma, Luyao and Yang, Jianwei and Wang, Shuohang and Du, Simon Shaolei and Shen, Yelong}, title = {Is Your World Simulator a Good Story Presenter? A Consecutive Events-Based Benchmark for Future Long Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13629-13638} }
LookCloser: Frequency-aware Radiance Field for Tiny-Detail Scene-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xiaoyu and Pan, Weihong and Bao, Chong and Zhang, Xiyu and Xiang, Xiaojun and Jiang, Hanqing and Bao, Hujun}, title = {LookCloser: Frequency-aware Radiance Field for Tiny-Detail Scene}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16122-16132} }
PICO: Reconstructing 3D People In Contact with Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cseke_2025_CVPR, author = {Cseke, Alp\'ar and Tripathi, Shashank and Dwivedi, Sai Kumar and Lakshmipathy, Arjun S. and Chatterjee, Agniv and Black, Michael J. and Tzionas, Dimitrios}, title = {PICO: Reconstructing 3D People In Contact with Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1783-1794} }
Convex Relaxation for Robust Vanishing Point Estimation in Manhattan World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_CVPR, author = {Liao, Bangyan and Zhao, Zhenjun and Li, Haoang and Zhou, Yi and Zeng, Yingping and Li, Hao and Liu, Peidong}, title = {Convex Relaxation for Robust Vanishing Point Estimation in Manhattan World}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15823-15832} }
Linguistics-aware Masked Image Modeling for Self-supervised Scene Text Recognition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yifei and Liu, Chang and Wei, Jin and Yang, Xiaomeng and Zhou, Yu and Ma, Can and Ji, Xiangyang}, title = {Linguistics-aware Masked Image Modeling for Self-supervised Scene Text Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9318-9328} }
FruitNinja: 3D Object Interior Texture Generation with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Fangyu and Chen, Yuhao}, title = {FruitNinja: 3D Object Interior Texture Generation with Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11051-11060} }
Scaling up Image Segmentation across Data and Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Pei and Cai, Zhaowei and Yang, Hao and Swaminathan, Ashwin and Manmatha, R. and Soatto, Stefano}, title = {Scaling up Image Segmentation across Data and Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4573-4583} }
Take the Bull by the Horns: Learning to Segment Hard Samples-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Yuan and Kong, Jingyu and Wang, Yu and Duan, Yuping}, title = {Take the Bull by the Horns: Learning to Segment Hard Samples}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15642-15652} }
MIMO: A Medical Vision Language Model with Visual Referring Multimodal Input and Pixel Grounding Multimodal Output-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Yanyuan and Xu, Dexuan and Huang, Yu and Zhan, Songkun and Wang, Hanpin and Chen, Dongxue and Wang, Xueping and Qiu, Meikang and Li, Hang}, title = {MIMO: A Medical Vision Language Model with Visual Referring Multimodal Input and Pixel Grounding Multimodal Output}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24732-24741} }
Bias for Action: Video Implicit Neural Representations with Bias Modulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kayabasi_2025_CVPR, author = {Kayabasi, Alper and Vadathya, Anil Kumar and Balakrishnan, Guha and Saragadam, Vishwanath}, title = {Bias for Action: Video Implicit Neural Representations with Bias Modulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27999-28008} }
Bridging Past and Future: End-to-End Autonomous Driving with Historical Prediction and Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Bozhou and Song, Nan and Jin, Xin and Zhang, Li}, title = {Bridging Past and Future: End-to-End Autonomous Driving with Historical Prediction and Planning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6854-6863} }
Blood Flow Speed Estimation with Optical Coherence Tomography Angiography Images-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Wensheng and Li, Zhenghong and Ren, Jiaxiang and Jeong, Hyomin and Du, Congwu and Pan, Yingtian and Ling, Haibin}, title = {Blood Flow Speed Estimation with Optical Coherence Tomography Angiography Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10466-10475} }
DreamTrack: Dreaming the Future for Multimodal Visual Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Mingzhe and Tan, Weiping and Ran, Wenyu and Jing, Liping and Zhang, Zhipeng}, title = {DreamTrack: Dreaming the Future for Multimodal Visual Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7201-7210} }
OmniStyle: Filtering High Quality Style Transfer Data at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ye and Liu, Ruiqi and Lin, Jiang and Liu, Fei and Yi, Zili and Wang, Yilin and Ma, Rui}, title = {OmniStyle: Filtering High Quality Style Transfer Data at Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7847-7856} }
EIDT-V: Exploiting Intersections in Diffusion Trajectories for Model-Agnostic, Zero-Shot, Training-Free Text-to-Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Jagpal_2025_CVPR, author = {Jagpal, Diljeet and Chen, Xi and Namboodiri, Vinay P.}, title = {EIDT-V: Exploiting Intersections in Diffusion Trajectories for Model-Agnostic, Zero-Shot, Training-Free Text-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18219-18228} }
Cross-View Completion Models are Zero-shot Correspondence Estimators-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2025_CVPR, author = {An, Honggyu and Kim, Jin Hyeon and Park, Seonghoon and Jung, Jaewoo and Han, Jisang and Hong, Sunghwan and Kim, Seungryong}, title = {Cross-View Completion Models are Zero-shot Correspondence Estimators}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1103-1115} }
Multi-party Collaborative Attention Control for Image Customization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Han and Yang, Chuanguang and Wang, Qiuli and An, Zhulin and Feng, Weilun and Huang, Libo and Xu, Yongjun}, title = {Multi-party Collaborative Attention Control for Image Customization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7942-7951} }
Reproducible Vision-Language Models Meet Concepts Out of Pre-Training-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Ziliang and Huang, Xin and Fan, Xiaoxuan and Wang, Keze and Zhou, Yuyu and Guan, Quanlong and Lin, Liang}, title = {Reproducible Vision-Language Models Meet Concepts Out of Pre-Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14701-14711} }
Through-The-Mask: Mask-based Motion Trajectories for Image-to-Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yariv_2025_CVPR, author = {Yariv, Guy and Kirstain, Yuval and Zohar, Amit and Sheynin, Shelly and Taigman, Yaniv and Adi, Yossi and Benaim, Sagie and Polyak, Adam}, title = {Through-The-Mask: Mask-based Motion Trajectories for Image-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18198-18208} }
MAGE : Single Image to Material-Aware 3D via the Multi-View G-Buffer Estimation Model-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Haoyuan and Wang, Zhenwei and Long, Xiaoxiao and Lin, Cheng and Hancke, Gerhard and Lau, Rynson W.H.}, title = {MAGE : Single Image to Material-Aware 3D via the Multi-View G-Buffer Estimation Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10985-10995} }
Segment Anything, Even Occluded-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tai_2025_CVPR, author = {Tai, Wei-En and Shih, Yu-Lin and Sun, Cheng and Wang, Yu-Chiang Frank and Chen, Hwann-Tzong}, title = {Segment Anything, Even Occluded}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29385-29394} }
HOT3D: Hand and Object Tracking in 3D from Egocentric Multi-View Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Banerjee_2025_CVPR, author = {Banerjee, Prithviraj and Shkodrani, Sindi and Moulon, Pierre and Hampali, Shreyas and Han, Shangchen and Zhang, Fan and Zhang, Linguang and Fountain, Jade and Miller, Edward and Basol, Selen and Newcombe, Richard and Wang, Robert and Engel, Jakob Julian and Hodan, Tomas}, title = {HOT3D: Hand and Object Tracking in 3D from Egocentric Multi-View Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7061-7071} }
DELT: A Simple Diversity-driven EarlyLate Training for Dataset Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Zhiqiang and Sherif, Ammar and Yin, Zeyuan and Shao, Shitong}, title = {DELT: A Simple Diversity-driven EarlyLate Training for Dataset Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4797-4806} }
MESC-3D:Mining Effective Semantic Cues for 3D Reconstruction from a Single Image-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Shaoming and Cai, Qing and Kong, Songqi and Tan, Runqing and Tong, Heng and Qiu, Shiji and Jiang, Yongguo and Liu, Zhi}, title = {MESC-3D:Mining Effective Semantic Cues for 3D Reconstruction from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16912-16921} }
RoboBrain: A Unified Brain Model for Robotic Manipulation from Abstract to Concrete-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2025_CVPR, author = {Ji, Yuheng and Tan, Huajie and Shi, Jiayu and Hao, Xiaoshuai and Zhang, Yuan and Zhang, Hengyuan and Wang, Pengwei and Zhao, Mengdi and Mu, Yao and An, Pengju and Xue, Xinda and Su, Qinghang and Lyu, Huaihai and Zheng, Xiaolong and Liu, Jiaming and Wang, Zhongyuan and Zhang, Shanghang}, title = {RoboBrain: A Unified Brain Model for Robotic Manipulation from Abstract to Concrete}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1724-1734} }
Advancing Multiple Instance Learning with Continual Learning for Whole Slide Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xianrui and Cui, Yufei and Li, Jun and Chan, Antoni B.}, title = {Advancing Multiple Instance Learning with Continual Learning for Whole Slide Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20800-20809} }
Beyond Image Classification: A Video Benchmark and Dual-Branch Hybrid Discrimination Framework for Compositional Zero-Shot Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Dongyao and Jing, Haodong and Ma, Yongqiang and Zheng, Nanning}, title = {Beyond Image Classification: A Video Benchmark and Dual-Branch Hybrid Discrimination Framework for Compositional Zero-Shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9860-9869} }
ABBSPO: Adaptive Bounding Box Scaling and Symmetric Prior based Orientation Prediction for Detecting Aerial Image Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Woojin and Chang, Hyugjae and Moon, Jaeho and Lee, Jaehyup and Kim, Munchurl}, title = {ABBSPO: Adaptive Bounding Box Scaling and Symmetric Prior based Orientation Prediction for Detecting Aerial Image Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8848-8858} }
Decoupled Distillation to Erase: A General Unlearning Method for Any Class-centric Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yu and Zheng, Dian and Mo, Qijie and Lu, Renjie and Lin, Kun-Yu and Zheng, Wei-Shi}, title = {Decoupled Distillation to Erase: A General Unlearning Method for Any Class-centric Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20350-20359} }
TAET: Two-Stage Adversarial Equalization Training on Long-Tailed Distributions-
[pdf]
[supp]
[bibtex]@InProceedings{Yu-Hang_2025_CVPR, author = {Yu-Hang, Wang and Guo, Junkang and Liu, Aolei and Wang, Kaihao and Wu, Zaitong and Liu, Zhenyu and Yin, Wenfei and Liu, Jian}, title = {TAET: Two-Stage Adversarial Equalization Training on Long-Tailed Distributions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15476-15485} }
Few-shot Personalized Scanpath Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2025_CVPR, author = {Xue, Ruoyu and Xu, Jingyi and Mondal, Sounak and Le, Hieu and Zelinsky, Greg and Hoai, Minh and Samaras, Dimitris}, title = {Few-shot Personalized Scanpath Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13497-13507} }
Do Your Best and Get Enough Rest for Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_CVPR, author = {Kang, Hankyul and Seifer, Gregor and Lee, Donghyun and Ryu, Jongbin}, title = {Do Your Best and Get Enough Rest for Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10077-10086} }
Enhancing Few-Shot Class-Incremental Learning via Training-Free Bi-Level Modality Calibration-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Yiyang and Ding, Tianyu and Wang, Lei and Huo, Jing and Gao, Yang and Li, Wenbin}, title = {Enhancing Few-Shot Class-Incremental Learning via Training-Free Bi-Level Modality Calibration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9881-9890} }
MUSt3R: Multi-view Network for Stereo 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cabon_2025_CVPR, author = {Cabon, Yohann and Stoffl, Lucas and Antsfeld, Leonid and Csurka, Gabriela and Chidlovskii, Boris and Revaud, Jerome and Leroy, Vincent}, title = {MUSt3R: Multi-view Network for Stereo 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1050-1060} }
Hybrid-Level Instruction Injection for Video Token Compression in Multi-modal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhihang and Xie, Chen-Wei and Li, Pandeng and Zhao, Liming and Tang, Longxiang and Zheng, Yun and Liu, Chuanbin and Xie, Hongtao}, title = {Hybrid-Level Instruction Injection for Video Token Compression in Multi-modal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8568-8578} }
Mamba4D: Efficient 4D Point Cloud Video Understanding with Disentangled Spatial-Temporal State Space Models-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jiuming and Han, Jinru and Liu, Lihao and Aviles-Rivero, Angelica I. and Jiang, Chaokang and Liu, Zhe and Wang, Hesheng}, title = {Mamba4D: Efficient 4D Point Cloud Video Understanding with Disentangled Spatial-Temporal State Space Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17626-17636} }
Mamba as a Bridge: Where Vision Foundation Models Meet Vision Language Models for Domain-Generalized Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xin and Tan, Robby T.}, title = {Mamba as a Bridge: Where Vision Foundation Models Meet Vision Language Models for Domain-Generalized Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14527-14537} }
Vision-Guided Action: Enhancing 3D Human Motion Prediction with Gaze-informed Affordance in 3D Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Ting and Lin, Yi and Yu, Jun and Lou, Zhenyu and Cui, Qiongjie}, title = {Vision-Guided Action: Enhancing 3D Human Motion Prediction with Gaze-informed Affordance in 3D Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12335-12346} }
LOGICZSL: Exploring Logic-induced Representation for Compositional Zero-shot Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Peng and Lu, Xiankai and Hu, Hao and Xian, Yongqin and Shen, Jianbing and Wang, Wenguan}, title = {LOGICZSL: Exploring Logic-induced Representation for Compositional Zero-shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30301-30311} }
ChainHOI: Joint-based Kinematic Chain Modeling for Human-Object Interaction Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_CVPR, author = {Zeng, Ling-An and Huang, Guohong and Wei, Yi-Lin and Gu, Shengbo and Tang, Yu-Ming and Meng, Jingke and Zheng, Wei-Shi}, title = {ChainHOI: Joint-based Kinematic Chain Modeling for Human-Object Interaction Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12358-12369} }
CLOC: Contrastive Learning for Ordinal Classification with Multi-Margin N-pair Loss-
[pdf]
[supp]
[bibtex]@InProceedings{Pitawela_2025_CVPR, author = {Pitawela, Dileepa and Carneiro, Gustavo and Chen, Hsiang-Ting}, title = {CLOC: Contrastive Learning for Ordinal Classification with Multi-Margin N-pair Loss}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15538-15548} }
Universal Actions for Enhanced Embodied Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Jinliang and Li, Jianxiong and Liu, Dongxiu and Zheng, Yinan and Wang, Zhihao and Ou, Zhonghong and Liu, Yu and Liu, Jingjing and Zhang, Ya-Qin and Zhan, Xianyuan}, title = {Universal Actions for Enhanced Embodied Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22508-22519} }
ObjectMover: Generative Object Movement with Video Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Xin and Wang, Tianyu and Kim, Soo Ye and Guerrero, Paul and Chen, Xi and Liu, Qing and Lin, Zhe and Qi, Xiaojuan}, title = {ObjectMover: Generative Object Movement with Video Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17682-17691} }
FaithDiff: Unleashing Diffusion Priors for Faithful Image Super-resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Junyang and Pan, Jinshan and Dong, Jiangxin}, title = {FaithDiff: Unleashing Diffusion Priors for Faithful Image Super-resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28188-28197} }
MLLM-as-a-Judge for Image Safety without Human Labeling-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhenting and Hu, Shuming and Zhao, Shiyu and Lin, Xiaowen and Juefei-Xu, Felix and Li, Zhuowei and Han, Ligong and Subramanyam, Harihar and Chen, Li and Chen, Jianfa and Jiang, Nan and Lyu, Lingjuan and Ma, Shiqing and Metaxas, Dimitris N. and Jain, Ankit}, title = {MLLM-as-a-Judge for Image Safety without Human Labeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14657-14666} }
A New Statistical Model of Star Speckles for Learning to Detect and Characterize Exoplanets in Direct Imaging Observations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bodrito_2025_CVPR, author = {Bodrito, Th\'eo and Flasseur, Olivier and Mairal, Julien and Ponce, Jean and Langlois, Maud and Lagrange, Anne-Marie}, title = {A New Statistical Model of Star Speckles for Learning to Detect and Characterize Exoplanets in Direct Imaging Observations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1230-1240} }
Scene-agnostic Pose Regression for Visual Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Junwei and Liu, Ruiping and Chen, Yufan and Chen, Zhenfang and Yang, Kailun and Zhang, Jiaming and Stiefelhagen, Rainer}, title = {Scene-agnostic Pose Regression for Visual Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27092-27102} }
Learning to Filter Outlier Edges in Global SfM-
[pdf]
[supp]
[bibtex]@InProceedings{Damblon_2025_CVPR, author = {Damblon, Nicole and Pollefeys, Marc and Barath, Daniel}, title = {Learning to Filter Outlier Edges in Global SfM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11558-11568} }
Divide and Conquer: Heterogeneous Noise Integration for Diffusion-based Adversarial Purification-
[pdf]
[arXiv]
[bibtex]@InProceedings{Pei_2025_CVPR, author = {Pei, Gaozheng and Lyu, Shaojie and Chen, Gong and Ma, Ke and Xu, Qianqian and Sun, Yingfei and Huang, Qingming}, title = {Divide and Conquer: Heterogeneous Noise Integration for Diffusion-based Adversarial Purification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29268-29277} }
SEC-Prompt:SEmantic Complementary Prompting for Few-Shot Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Ye and Yang, Meng}, title = {SEC-Prompt:SEmantic Complementary Prompting for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25643-25656} }
LiMoE: Mixture of LiDAR Representation Learners from Automotive Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Xiang and Kong, Lingdong and Shuai, Hui and Pan, Liang and Liu, Ziwei and Liu, Qingshan}, title = {LiMoE: Mixture of LiDAR Representation Learners from Automotive Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27368-27379} }
CoT-VLA: Visual Chain-of-Thought Reasoning for Vision-Language-Action Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Qingqing and Lu, Yao and Kim, Moo Jin and Fu, Zipeng and Zhang, Zhuoyang and Wu, Yecheng and Li, Zhaoshuo and Ma, Qianli and Han, Song and Finn, Chelsea and Handa, Ankur and Lin, Tsung-Yi and Wetzstein, Gordon and Liu, Ming-Yu and Xiang, Donglai}, title = {CoT-VLA: Visual Chain-of-Thought Reasoning for Vision-Language-Action Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1702-1713} }
WAVE: Weight Templates for Adaptive Initialization of Variable-sized Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Fu and Xie, Yucheng and Wang, Jing and Geng, Xin}, title = {WAVE: Weight Templates for Adaptive Initialization of Variable-sized Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4819-4828} }
Forensics Adapter: Adapting CLIP for Generalizable Face Forgery Detection-
[pdf]
[bibtex]@InProceedings{Cui_2025_CVPR, author = {Cui, Xinjie and Li, Yuezun and Luo, Ao and Zhou, Jiaran and Dong, Junyu}, title = {Forensics Adapter: Adapting CLIP for Generalizable Face Forgery Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19207-19217} }
KAC: Kolmogorov-Arnold Classifier for Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Yusong and Liang, Zichen and Yang, Fei and Hou, Qibin and Liu, Xialei and Cheng, Ming-Ming}, title = {KAC: Kolmogorov-Arnold Classifier for Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15297-15307} }
PI-HMR: Towards Robust In-bed Temporal Human Shape Reconstruction with Contact Pressure Sensing-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Ziyu and Xiong, Yufan and Niu, Mengting and Xie, Fangting and Wan, Quan and Ying, Qijun and Liu, Boyan and Cai, Xiaohui}, title = {PI-HMR: Towards Robust In-bed Temporal Human Shape Reconstruction with Contact Pressure Sensing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27739-27749} }
BOOTPLACE: Bootstrapped Object Placement with Detection Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Hang and Zuo, Xinxin and Ma, Rui and Cheng, Li}, title = {BOOTPLACE: Bootstrapped Object Placement with Detection Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19294-19303} }
CheckManual: A New Challenge and Benchmark for Manual-based Appliance Manipulation-
[pdf]
[bibtex]@InProceedings{Long_2025_CVPR, author = {Long, Yuxing and Zhang, Jiyao and Pan, Mingjie and Wu, Tianshu and Kim, Taewhan and Dong, Hao}, title = {CheckManual: A New Challenge and Benchmark for Manual-based Appliance Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22595-22604} }
CXPMRG-Bench: Pre-training and Benchmarking for X-ray Medical Report Generation on CheXpert Plus Dataset-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xiao and Wang, Fuling and Li, Yuehang and Ma, Qingchuan and Wang, Shiao and Jiang, Bo and Tang, Jin}, title = {CXPMRG-Bench: Pre-training and Benchmarking for X-ray Medical Report Generation on CheXpert Plus Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5123-5133} }
FASTer: Focal token Acquiring-and-Scaling Transformer for Long-term 3D Objection Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dang_2025_CVPR, author = {Dang, Chenxu and Duan, ZaiPeng and An, Pei and Zhang, Xinmin and Hu, Xuzhong and Ma, Jie}, title = {FASTer: Focal token Acquiring-and-Scaling Transformer for Long-term 3D Objection Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17029-17038} }
SEEN-DA: SEmantic ENtropy guided Domain-aware Attention for Domain Adaptive Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Haochen and Zhang, Rui and Yao, Hantao and Zhang, Xin and Hao, Yifan and Song, Xinkai and Peng, Shaohui and Zhao, Yongwei and Zhao, Chen and Wu, Yanjun and Li, Ling}, title = {SEEN-DA: SEmantic ENtropy guided Domain-aware Attention for Domain Adaptive Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25465-25475} }
Event-Equalized Dense Video Captioning-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Kangyi and Li, Pengna and Fu, Jingwen and Li, Yizhe and Wu, Yang and Liu, Yuhan and Wang, Jinjun and Zhou, Sanping}, title = {Event-Equalized Dense Video Captioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8417-8427} }
Geometry-guided Online 3D Video Synthesis with Multi-View Temporal Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ha_2025_CVPR, author = {Ha, Hyunho and Xiao, Lei and Richardt, Christian and Nguyen-Phuoc, Thu and Kim, Changil and Kim, Min H. and Lanman, Douglas and Khan, Numair}, title = {Geometry-guided Online 3D Video Synthesis with Multi-View Temporal Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11275-11285} }
EDCFlow: Exploring Temporally Dense Difference Maps for Event-based Optical Flow Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Daikun and Cheng, Lei and Wang, Teng and Sun, Changyin}, title = {EDCFlow: Exploring Temporally Dense Difference Maps for Event-based Optical Flow Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1984-1993} }
Point2RBox-v2: Rethinking Point-supervised Oriented Object Detection with Spatial Layout Among Instances-
[pdf]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Yi and Ren, Botao and Zhang, Peiyuan and Liu, Mingxin and Luo, Junwei and Zhang, Shaofeng and Da, Feipeng and Yan, Junchi and Yang, Xue}, title = {Point2RBox-v2: Rethinking Point-supervised Oriented Object Detection with Spatial Layout Among Instances}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19283-19293} }
LibraGrad: Balancing Gradient Flow for Universally Better Vision Transformer Attributions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mehri_2025_CVPR, author = {Mehri, Faridoun and Baghshah, Mahdieh Soleymani and Pilehvar, Mohammad Taher}, title = {LibraGrad: Balancing Gradient Flow for Universally Better Vision Transformer Attributions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {67-78} }
Blind Bitstream-corrupted Video Recovery via Metadata-guided Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shuyun and Zhang, Hu and Shen, Xin and Wang, Dadong and Yu, Xin}, title = {Blind Bitstream-corrupted Video Recovery via Metadata-guided Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22975-22984} }
Mind the Trojan Horse: Image Prompt Adapter Enabling Scalable and Deceptive Jailbreaking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Junxi and Dong, Junhao and Xie, Xiaohua}, title = {Mind the Trojan Horse: Image Prompt Adapter Enabling Scalable and Deceptive Jailbreaking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23785-23794} }
Lost in Translation, Found in Context: Sign Language Translation with Contextual Cues-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Youngjoon and Raajesh, Haran and Momeni, Liliane and Varol, G\"ul and Zisserman, Andrew}, title = {Lost in Translation, Found in Context: Sign Language Translation with Contextual Cues}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8742-8752} }
CoCoGaussian: Leveraging Circle of Confusion for Gaussian Splatting from Defocused Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Jungho and Cho, Suhwan and Kim, Taeoh and Jang, Ho-Deok and Lee, Minhyeok and Cha, Geonho and Wee, Dongyoon and Lee, Dogyoon and Lee, Sangyoun}, title = {CoCoGaussian: Leveraging Circle of Confusion for Gaussian Splatting from Defocused Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16101-16110} }
Semantic and Sequential Alignment for Referring Video Object Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2025_CVPR, author = {Pan, Feiyu and Fang, Hao and Li, Fangkai and Xu, Yanyu and Li, Yawei and Benini, Luca and Lu, Xiankai}, title = {Semantic and Sequential Alignment for Referring Video Object Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19067-19076} }
Synchronized Video-to-Audio Generation via Mel Quantization-Continuum Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Juncheng and Xu, Chao and Yu, Cheng and Shang, Lei and Hu, Zhe and Wang, Shujun and Bo, Liefeng}, title = {Synchronized Video-to-Audio Generation via Mel Quantization-Continuum Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3111-3120} }
Continual SFT Matches Multimodal RLHF with Negative Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Ke and Wang, Yu and Sun, Yanpeng and Chen, Qiang and Liu, Jiangjiang and Zhang, Gang and Wang, Jingdong}, title = {Continual SFT Matches Multimodal RLHF with Negative Supervision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14615-14624} }
Semantic-guided Cross-Modal Prompt Learning for Skeleton-based Zero-shot Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Anqi and Zhu, Jingmin and Bailey, James and Gong, Mingming and Ke, Qiuhong}, title = {Semantic-guided Cross-Modal Prompt Learning for Skeleton-based Zero-shot Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13876-13885} }
FATE: Full-head Gaussian Avatar with Textural Editing from Monocular Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiawei and Wu, Zijian and Liang, Zhiyang and Gong, Yicheng and Hu, Dongfang and Yao, Yao and Cao, Xun and Zhu, Hao}, title = {FATE: Full-head Gaussian Avatar with Textural Editing from Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5535-5545} }
ChatGen: Automatic Text-to-Image Generation From FreeStyle Chatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2025_CVPR, author = {Jia, Chengyou and Xia, Changliang and Dang, Zhuohang and Wu, Weijia and Qian, Hangwei and Luo, Minnan}, title = {ChatGen: Automatic Text-to-Image Generation From FreeStyle Chatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13284-13293} }
GEM: A Generalizable Ego-Vision Multimodal World Model for Fine-Grained Ego-Motion, Object Dynamics, and Scene Composition Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hassan_2025_CVPR, author = {Hassan, Mariam and Stapf, Sebastian and Rahimi, Ahmad and Rezende, Pedro M B and Haghighi, Yasaman and Br\"uggemann, David and Katircioglu, Isinsu and Zhang, Lin and Chen, Xiaoran and Saha, Suman and Cannici, Marco and Aljalbout, Elie and Ye, Botao and Wang, Xi and Davtyan, Aram and Salzmann, Mathieu and Scaramuzza, Davide and Pollefeys, Marc and Favaro, Paolo and Alahi, Alexandre}, title = {GEM: A Generalizable Ego-Vision Multimodal World Model for Fine-Grained Ego-Motion, Object Dynamics, and Scene Composition Control}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22404-22415} }
VEU-Bench: Towards Comprehensive Understanding of Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Bozheng and Wu, Yongliang and Lu, Yi and Yu, Jiashuo and Tang, Licheng and Cao, Jiawang and Zhu, Wenqing and Sun, Yuyang and Wu, Jay and Zhu, Wenbo}, title = {VEU-Bench: Towards Comprehensive Understanding of Video Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13671-13680} }
Decouple Distortion from Perception: Region Adaptive Diffusion for Extreme-low Bitrate Perception Image Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Jinchang and Wang, Shaokang and Chen, Jintao and Li, Zhe and Jia, Peidong and Zhao, Fei and Xiang, Guoqing and Hao, Zhijian and Zhang, Shanghang and Xie, Xiaodong}, title = {Decouple Distortion from Perception: Region Adaptive Diffusion for Extreme-low Bitrate Perception Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18051-18061} }
Yo'Chameleon: Personalized Vision and Language Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Thao and Singh, Krishna Kumar and Shi, Jing and Bui, Trung and Lee, Yong Jae and Li, Yuheng}, title = {Yo'Chameleon: Personalized Vision and Language Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14438-14448} }
PatchVSR: Breaking Video Diffusion Resolution Limits with Patch-wise Video Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Shian and Xia, Menghan and Liu, Chang and Wang, Xintao and Wang, Jing and Wan, Pengfei and Zhang, Di and Ji, Xiangyang}, title = {PatchVSR: Breaking Video Diffusion Resolution Limits with Patch-wise Video Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17799-17809} }
FluxSpace: Disentangled Semantic Editing in Rectified Flow Models-
[pdf]
[supp]
[bibtex]@InProceedings{Dalva_2025_CVPR, author = {Dalva, Yusuf and Venkatesh, Kavana and Yanardag, Pinar}, title = {FluxSpace: Disentangled Semantic Editing in Rectified Flow Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13083-13092} }
Scene-Centric Unsupervised Panoptic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hahn_2025_CVPR, author = {Hahn, Oliver and Reich, Christoph and Araslanov, Nikita and Cremers, Daniel and Rupprecht, Christian and Roth, Stefan}, title = {Scene-Centric Unsupervised Panoptic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24485-24495} }
Touch2Shape: Touch-Conditioned 3D Diffusion for Shape Exploration and Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuanbo and Zhang, Zhaoxuan and Qiu, Jiajin and Sun, Dilong and Meng, Zhengyu and Wei, Xiaopeng and Yang, Xin}, title = {Touch2Shape: Touch-Conditioned 3D Diffusion for Shape Exploration and Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5656-5665} }
VITED: Video Temporal Evidence Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Yujie and Song, Yale and Wang, William and Torresani, Lorenzo and Nagarajan, Tushar}, title = {VITED: Video Temporal Evidence Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8501-8511} }
Adversarial Domain Prompt Tuning and Generation for Single Domain Generalization-
[pdf]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Zhipeng and Cheng, De and Jiang, Xinyang and Wang, Nannan and Li, Dongsheng and Gao, Xinbo}, title = {Adversarial Domain Prompt Tuning and Generation for Single Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18584-18595} }
Learning Physics From Video: Unsupervised Physical Parameter Estimation for Continuous Dynamical Systems-
[pdf]
[supp]
[bibtex]@InProceedings{Garcia_2025_CVPR, author = {Garcia, Alejandro Casta\~neda and Warchocki, Jan and van Gemert, Jan and Brinks, Daan and Tomen, Nergis}, title = {Learning Physics From Video: Unsupervised Physical Parameter Estimation for Continuous Dynamical Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27924-27933} }
Temporal Score Analysis for Understanding and Correcting Diffusion Artifacts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Yu and Zhao, Zengqun and Patras, Ioannis and Gong, Shaogang}, title = {Temporal Score Analysis for Understanding and Correcting Diffusion Artifacts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7707-7716} }
ProAPO: Progressively Automatic Prompt Optimization for Visual Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Xiangyan and Gou, Gaopeng and Zhuang, Jiamin and Yu, Jing and Song, Kun and Wang, Qihao and Li, Yili and Xiong, Gang}, title = {ProAPO: Progressively Automatic Prompt Optimization for Visual Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25145-25155} }
ShapeWords: Guiding Text-to-Image Synthesis with 3D Shape-Aware Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Petrov_2025_CVPR, author = {Petrov, Dmitry and Goyal, Pradyumn and Shivashok, Divyansh and Tao, Yuanming and Averkiou, Melinos and Kalogerakis, Evangelos}, title = {ShapeWords: Guiding Text-to-Image Synthesis with 3D Shape-Aware Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13305-13314} }
Auto-Encoded Supervision for Perceptual Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, MinKyu and Hyun, Sangeek and Jun, Woojin and Heo, Jae-Pil}, title = {Auto-Encoded Supervision for Perceptual Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17958-17968} }
Black Swan: Abductive and Defeasible Video Reasoning in Unpredictable Events-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chinchure_2025_CVPR, author = {Chinchure, Aditya and Ravi, Sahithya and Ng, Raymond and Shwartz, Vered and Li, Boyang and Sigal, Leonid}, title = {Black Swan: Abductive and Defeasible Video Reasoning in Unpredictable Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24201-24210} }
Go-with-the-Flow: Motion-Controllable Video Diffusion Models Using Real-Time Warped Noise-
[pdf]
[supp]
[bibtex]@InProceedings{Burgert_2025_CVPR, author = {Burgert, Ryan and Xu, Yuancheng and Xian, Wenqi and Pilarski, Oliver and Clausen, Pascal and He, Mingming and Ma, Li and Deng, Yitong and Li, Lingxiao and Mousavi, Mohsen and Ryoo, Michael and Debevec, Paul and Yu, Ning}, title = {Go-with-the-Flow: Motion-Controllable Video Diffusion Models Using Real-Time Warped Noise}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13-23} }
Silence is Golden: Leveraging Adversarial Examples to Nullify Audio Control in LDM-based Talking-Head Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Gan_2025_CVPR, author = {Gan, Yuan and Miao, Jiaxu and Wang, Yunze and Yang, Yi}, title = {Silence is Golden: Leveraging Adversarial Examples to Nullify Audio Control in LDM-based Talking-Head Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13434-13444} }
Iterative Predictor-Critic Code Decoding for Real-World Image Dehazing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Jiayi and Liu, Siyu and Liu, Zikun and Guo, Chun-Le and Park, Hyunhee and Wu, Ruiqi and Wang, Guoqing and Li, Chongyi}, title = {Iterative Predictor-Critic Code Decoding for Real-World Image Dehazing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12700-12709} }
RNG: Relightable Neural Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Jiahui and Luan, Fujun and Yang, Jian and Hasan, Milos and Wang, Beibei}, title = {RNG: Relightable Neural Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26525-26534} }
Towards Realistic Example-based Modeling via 3D Gaussian Stitching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Xinyu and Yang, Ziyi and Gong, Bingchen and Han, Xiaoguang and Yang, Sipeng and Jin, Xiaogang}, title = {Towards Realistic Example-based Modeling via 3D Gaussian Stitching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26597-26607} }
Filter Images First, Generate Instructions Later: Pre-Instruction Data Selection for Visual Instruction Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Safaei_2025_CVPR, author = {Safaei, Bardia and Siddiqui, Faizan and Xu, Jiacong and Patel, Vishal M. and Lo, Shao-Yuan}, title = {Filter Images First, Generate Instructions Later: Pre-Instruction Data Selection for Visual Instruction Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14247-14256} }
Gradient-Guided Annealing for Domain Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ballas_2025_CVPR, author = {Ballas, Aristotelis and Diou, Christos}, title = {Gradient-Guided Annealing for Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20558-20568} }
Generative Sparse-View Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Kong_2025_CVPR, author = {Kong, Hanyang and Yang, Xingyi and Wang, Xinchao}, title = {Generative Sparse-View Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26745-26755} }
MicroVQA: A Multimodal Reasoning Benchmark for Microscopy-Based Scientific Research-
[pdf]
[supp]
[bibtex]@InProceedings{Burgess_2025_CVPR, author = {Burgess, James and Nirschl, Jeffrey J and Bravo-S\'anchez, Laura and Lozano, Alejandro and Gupte, Sanket Rajan and Galaz-Montoya, Jesus G. and Zhang, Yuhui and Su, Yuchang and Bhowmik, Disha and Coman, Zachary and Hasan, Sarina M and Johannesson, Alexandra and Leineweber, William D. and Nair, Malvika G and Yarlagadda, Ridhi and Zuraski, Connor and Chiu, Wah and Cohen, Sarah and Hansen, Jan N. and Leonetti, Manuel D and Liu, Chad and Lundberg, Emma and Yeung-Levy, Serena}, title = {MicroVQA: A Multimodal Reasoning Benchmark for Microscopy-Based Scientific Research}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19552-19564} }
Generative Inbetweening through Frame-wise Conditions-Driven Video Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Tianyi and Ren, Dongwei and Wang, Qilong and Wu, Xiaohe and Zuo, Wangmeng}, title = {Generative Inbetweening through Frame-wise Conditions-Driven Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27968-27978} }
DexGrasp Anything: Towards Universal Robotic Dexterous Grasping with Physics Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_CVPR, author = {Zhong, Yiming and Jiang, Qi and Yu, Jingyi and Ma, Yuexin}, title = {DexGrasp Anything: Towards Universal Robotic Dexterous Grasping with Physics Awareness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22584-22594} }
CustAny: Customizing Anything from A Single Example-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2025_CVPR, author = {Kong, Lingjie and Wu, Kai and Xu, Chengming and Hu, Xiaobin and Han, Wenhui and Peng, Jinlong and Luo, Donghao and Li, Mengtian and Zhang, Jiangning and Wang, Chengjie and Fu, Yanwei}, title = {CustAny: Customizing Anything from A Single Example}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20916-20925} }
Vision-Language Gradient Descent-driven All-in-One Deep Unfolding Networks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_CVPR, author = {Zeng, Haijin and Wang, Xiangming and Chen, Yongyong and Su, Jingyong and Liu, Jie}, title = {Vision-Language Gradient Descent-driven All-in-One Deep Unfolding Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7524-7533} }
3D-LLaVA: Towards Generalist 3D LMMs with Omni Superpoint Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Jiajun and He, Tianyu and Jiang, Li and Wang, Tianyu and Dayoub, Feras and Reid, Ian}, title = {3D-LLaVA: Towards Generalist 3D LMMs with Omni Superpoint Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3772-3782} }
Event-based Video Super-Resolution via State Space Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Zeyu and Wang, Xinchao}, title = {Event-based Video Super-Resolution via State Space Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12564-12574} }
PoseTraj: Pose-Aware Trajectory Control in Video Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2025_CVPR, author = {Ji, Longbin and Zhong, Lei and Wei, Pengfei and Li, Changjian}, title = {PoseTraj: Pose-Aware Trajectory Control in Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22776-22785} }
Masked Scene Modeling: Narrowing the Gap Between Supervised and Self-Supervised Learning in 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hermosilla_2025_CVPR, author = {Hermosilla, Pedro and Stippel, Christian and Sick, Leon}, title = {Masked Scene Modeling: Narrowing the Gap Between Supervised and Self-Supervised Learning in 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14835-14844} }
VL2Lite: Task-Specific Knowledge Distillation from Large Vision-Language Models to Lightweight Networks-
[pdf]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Jinseong and Ma, Chunfei and Lee, Byeongwon}, title = {VL2Lite: Task-Specific Knowledge Distillation from Large Vision-Language Models to Lightweight Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30073-30083} }
Boost the Inference with Co-training: A Depth-guided Mutual Learning Framework for Semi-supervised Medical Polyp Segmentation-
[pdf]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yuxin and Zhu, Zihao and Zhang, Yuxiang and Chen, Yifan and Yu, Zhibin}, title = {Boost the Inference with Co-training: A Depth-guided Mutual Learning Framework for Semi-supervised Medical Polyp Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10394-10403} }
VidHalluc: Evaluating Temporal Hallucinations in Multimodal Large Language Models for Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Chaoyu and Im, Eun Woo and Fazli, Pooyan}, title = {VidHalluc: Evaluating Temporal Hallucinations in Multimodal Large Language Models for Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13723-13733} }
StageDesigner: Artistic Stage Generation for Scenography via Theater Scripts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gan_2025_CVPR, author = {Gan, Zhaoxing and Li, Mengtian and Chen, Ruhua and Ji, Zhongxia and Guo, Sichen and Hu, Huanling and Ye, Guangnan and Hu, Zuo}, title = {StageDesigner: Artistic Stage Generation for Scenography via Theater Scripts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28705-28714} }
From Laboratory to Real World: A New Benchmark Towards Privacy-Preserved Visible-Infrared Person Re-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yan and Yu, Hao and Cheng, Xu and Chen, Haoyu and Sun, Zhaodong and Zhao, Guoying}, title = {From Laboratory to Real World: A New Benchmark Towards Privacy-Preserved Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8828-8837} }
4Deform: Neural Surface Deformation for Robust Shape Interpolation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sang_2025_CVPR, author = {Sang, Lu and Canfes, Zehranaz and Cao, Dongliang and Marin, Riccardo and Bernard, Florian and Cremers, Daniel}, title = {4Deform: Neural Surface Deformation for Robust Shape Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6542-6551} }
Dense Match Summarization for Faster Two-view Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Astermark_2025_CVPR, author = {Astermark, Jonathan and Heyden, Anders and Larsson, Viktor}, title = {Dense Match Summarization for Faster Two-view Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1093-1102} }
Align-A-Video: Deterministic Reward Tuning of Image Diffusion Models for Consistent Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shengzhi and Zhong, Yingkang and Mu, Jiangchuan and Wu, Kai and Xiong, Mingliang and Fang, Wen and Liu, Mingqing and Deng, Hao and He, Bin and Li, Gang and Liu, Qingwen}, title = {Align-A-Video: Deterministic Reward Tuning of Image Diffusion Models for Consistent Video Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2074-2083} }
Interpreting Object-level Foundation Models via Visual Precision Search-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Ruoyu and Liang, Siyuan and Li, Jingzhi and Liu, Shiming and Li, Maosen and Huang, Zhen and Zhang, Hua and Cao, Xiaochun}, title = {Interpreting Object-level Foundation Models via Visual Precision Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30042-30052} }
Foley-Flow: Coordinated Video-to-Audio Generation with Masked Audio-Visual Alignment and Dynamic Conditional Flows-
[pdf]
[bibtex]@InProceedings{Mo_2025_CVPR, author = {Mo, Shentong and Song, Yibing}, title = {Foley-Flow: Coordinated Video-to-Audio Generation with Masked Audio-Visual Alignment and Dynamic Conditional Flows}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28912-28921} }
LION-FS: Fast & Slow Video-Language Thinker as Online Video Assistant-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Wei and Hu, Bing and Shao, Rui and Shen, Leyang and Nie, Liqiang}, title = {LION-FS: Fast \& Slow Video-Language Thinker as Online Video Assistant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3240-3251} }
CARE Transformer: Mobile-Friendly Linear Visual Transformer via Decoupled Dual Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yuan and Xu, Qingshan and Cui, Jiequan and Zhou, Junbao and Zhang, Jing and Hong, Richang and Zhang, Hanwang}, title = {CARE Transformer: Mobile-Friendly Linear Visual Transformer via Decoupled Dual Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20135-20145} }
Paint by Inpaint: Learning to Add Image Objects by Removing Them First-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wasserman_2025_CVPR, author = {Wasserman, Navve and Rotstein, Noam and Ganz, Roy and Kimmel, Ron}, title = {Paint by Inpaint: Learning to Add Image Objects by Removing Them First}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18313-18324} }
Motion-Grounded Video Reasoning: Understanding and Perceiving Motion at Pixel Level-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Andong and Chen, Tongjia and Yu, Shoubin and Yang, Taojiannan and Spencer, Lincoln and Tian, Yapeng and Mian, Ajmal Saeed and Bansal, Mohit and Chen, Chen}, title = {Motion-Grounded Video Reasoning: Understanding and Perceiving Motion at Pixel Level}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8625-8636} }
PMA: Towards Parameter-Efficient Point Cloud Understanding via Point Mamba Adapter-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zha_2025_CVPR, author = {Zha, Yaohua and Wang, Yanzi and Guo, Hang and Wang, Jinpeng and Dai, Tao and Chen, Bin and Ouyang, Zhihao and Yuerong, Xue and Chen, Ke and Xia, Shu-Tao}, title = {PMA: Towards Parameter-Efficient Point Cloud Understanding via Point Mamba Adapter}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16976-16986} }
All-directional Disparity Estimation for Real-world QPD Images-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Hongtao and Song, Shaohui and Sun, Lihu and Su, Wenkai and Yang, Xiaodong and Liu, Chengming}, title = {All-directional Disparity Estimation for Real-world QPD Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21836-21846} }
LC-Mamba: Local and Continuous Mamba with Shifted Windows for Frame Interpolation-
[pdf]
[supp]
[bibtex]@InProceedings{Jeong_2025_CVPR, author = {Jeong, Min Wu and Rhee, Chae Eun}, title = {LC-Mamba: Local and Continuous Mamba with Shifted Windows for Frame Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17671-17681} }
Zero-Shot Head Swapping in Real-World Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_CVPR, author = {Kang, Taewoong and Jeong, Sohyun and Jang, Hyojin and Choo, Jaegul}, title = {Zero-Shot Head Swapping in Real-World Scenarios}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10805-10814} }
Toward Robust Neural Reconstruction from Sparse Point Sets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ouasfi_2025_CVPR, author = {Ouasfi, Amine and Jena, Shubhendu and Marchand, Eric and Boukhayma, Adnane}, title = {Toward Robust Neural Reconstruction from Sparse Point Sets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6552-6562} }
GPAvatar: High-fidelity Head Avatars by Learning Efficient Gaussian Projections-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Wei-Qi and Han, Dong and Zhou, Ze-Kang and Li, Shunkai and Liu, Xiaoqiang and Wan, Pengfei and Zhang, Di and Wang, Miao}, title = {GPAvatar: High-fidelity Head Avatars by Learning Efficient Gaussian Projections}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {250-259} }
PIAD: Pose and Illumination agnostic Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Kaichen and Cao, Junjie and Bai, Zeyu and Su, Zhixun and Tagliasacchi, Andrea}, title = {PIAD: Pose and Illumination agnostic Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4734-4743} }
CAV-MAE Sync: Improving Contrastive Audio-Visual Mask Autoencoders via Fine-Grained Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Araujo_2025_CVPR, author = {Araujo, Edson and Rouditchenko, Andrew and Gong, Yuan and Bhati, Saurabhchand and Thomas, Samuel and Kingsbury, Brian and Karlinsky, Leonid and Feris, Rogerio and Glass, James R. and Kuehne, Hilde}, title = {CAV-MAE Sync: Improving Contrastive Audio-Visual Mask Autoencoders via Fine-Grained Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18794-18803} }
Two is Better than One: Efficient Ensemble Defense for Robust and Compact Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2025_CVPR, author = {Jung, Yoojin and Song, Byung Cheol}, title = {Two is Better than One: Efficient Ensemble Defense for Robust and Compact Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9696-9706} }
Tiled Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Madar_2025_CVPR, author = {Madar, Or and Fried, Ohad}, title = {Tiled Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7795-7804} }
Using Diffusion Priors for Video Amodal Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Kaihua and Ramanan, Deva and Khurana, Tarasha}, title = {Using Diffusion Priors for Video Amodal Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22890-22900} }
COBRA: COmBinatorial Retrieval Augmentation for Few-Shot Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Das_2025_CVPR, author = {Das, Arnav M. and Bhatt, Gantavya and Kumari, Lilly and Verma, Sahil and Bilmes, Jeff}, title = {COBRA: COmBinatorial Retrieval Augmentation for Few-Shot Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20534-20546} }
Dyn-HaMR: Recovering 4D Interacting Hand Motion from a Dynamic Camera-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Zhengdi and Zafeiriou, Stefanos and Birdal, Tolga}, title = {Dyn-HaMR: Recovering 4D Interacting Hand Motion from a Dynamic Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27716-27726} }
The Scene Language: Representing Scenes with Programs, Words, and Embeddings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yunzhi and Li, Zizhang and Zhou, Matt and Wu, Shangzhe and Wu, Jiajun}, title = {The Scene Language: Representing Scenes with Programs, Words, and Embeddings}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24625-24634} }
ProbeSDF: Light Field Probes For Neural Surface Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Toussaint_2025_CVPR, author = {Toussaint, Briac and Thomas, Diego and Franco, Jean-S\'ebastien}, title = {ProbeSDF: Light Field Probes For Neural Surface Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11026-11035} }
Descriptor-In-Pixel : Point-Feature Tracking For Pixel Processor Arrays-
[pdf]
[supp]
[bibtex]@InProceedings{Bose_2025_CVPR, author = {Bose, Laurie and Chen, Jianing and Dudek, Piotr}, title = {Descriptor-In-Pixel : Point-Feature Tracking For Pixel Processor Arrays}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5392-5400} }
Hybrid Concept Bottleneck Models-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yang and Zhang, Tianwei and Gu, Shi}, title = {Hybrid Concept Bottleneck Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20179-20189} }
UVGS: Reimagining Unstructured 3D Gaussian Splatting using UV Mapping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rai_2025_CVPR, author = {Rai, Aashish and Wang, Dilin and Jain, Mihir and Sarafianos, Nikolaos and Chen, Kefan and Sridhar, Srinath and Prakash, Aayush}, title = {UVGS: Reimagining Unstructured 3D Gaussian Splatting using UV Mapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5927-5937} }
Dual Consolidation for Pre-Trained Model-Based Domain-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Da-Wei and Cai, Zi-Wen and Ye, Han-Jia and Zhang, Lijun and Zhan, De-Chuan}, title = {Dual Consolidation for Pre-Trained Model-Based Domain-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20547-20557} }
Learning Physics-Based Full-Body Human Reaching and Grasping from Brief Walking References-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yitang and Lin, Mingxian and Lin, Zhuo and Deng, Yipeng and Cao, Yue and Yi, Li}, title = {Learning Physics-Based Full-Body Human Reaching and Grasping from Brief Walking References}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27673-27682} }
EmoEdit: Evoking Emotions through Image Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jingyuan and Feng, Jiawei and Luo, Weibin and Lischinski, Dani and Cohen-Or, Daniel and Huang, Hui}, title = {EmoEdit: Evoking Emotions through Image Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24690-24699} }
RORem: Training a Robust Object Remover with Human-in-the-Loop-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Ruibin and Yang, Tao and Guo, Song and Zhang, Lei}, title = {RORem: Training a Robust Object Remover with Human-in-the-Loop}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14024-14035} }
All Languages Matter: Evaluating LMMs on Culturally Diverse 100 Languages-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vayani_2025_CVPR, author = {Vayani, Ashmal and Dissanayake, Dinura and Watawana, Hasindri and Ahsan, Noor and Sasikumar, Nevasini and Thawakar, Omkar and Ademtew, Henok Biadglign and Hmaiti, Yahya and Kumar, Amandeep and Kukreja, Kartik and Maslych, Mykola and Al Ghallabi, Wafa and Mihaylov, Mihail Minkov and Qin, Chao and Shaker, Abdelrahman M. and Zhang, Mike and Ihsani, Mahardika Krisna and Esplana, Amiel Gian and Gokani, Monil and Mirkin, Shachar and Singh, Harsh and Srivastava, Ashay and Hamerlik, Endre and Izzati, Fathinah Asma and Maani, Fadillah Adamsyah and Cavada, Sebastian and Chim, Jenny and Gupta, Rohit and Manjunath, Sanjay and Zhumakhanova, Kamila and Rabevohitra, Feno Heriniaina and Amirudin, Azril Hafizi and Ridzuan, Muhammad and Kareem, Daniya Najiha Abdul and More, Ketan Pravin and Li, Kunyang and Shakya, Pramesh and Saad, Muhammad and Ghasemaghaei, Amirpouya and Djanibekov, Amirbek and Azizov, Dilshod and Jankovic, Branislava and Bhatia, Naman and Cabrera, Alvaro and Obando-Ceron, Johan and Otieno, Olympiah and Farestam, Febian and Rabbani, Muztoba and Ballah, Sanoojan and Sanjeev, Santosh and Shtanchaev, Abduragim and Fatima, Maheen and Nguyen, Thao and Kareem, Amrin and Aremu, Toluwani and Xavier, Nathan Augusto Zacarias and Bhatkal, Amit and Toyin, Hawau Olamide and Chadha, Aman and Cholakkal, Hisham and Anwer, Rao Muhammad and Felsberg, Michael and Laaksonen, Jorma and Solorio, Thamar and Choudhury, Monojit and Laptev, Ivan and Shah, Mubarak and Khan, Salman and Khan, Fahad Shahbaz}, title = {All Languages Matter: Evaluating LMMs on Culturally Diverse 100 Languages}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19565-19575} }
SparseAlign: a Fully Sparse Framework for Cooperative Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Yunshuang and Xia, Yan and Cremers, Daniel and Sester, Monika}, title = {SparseAlign: a Fully Sparse Framework for Cooperative Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22296-22305} }
Video-Bench: Human-Aligned Video Generation Benchmark-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Hui and Li, Siyuan and Chen, Jiaqi and Yuan, Yiwen and Wu, Yuling and Deng, Yufan and Leong, Chak Tou and Du, Hanwen and Fu, Junchen and Li, Youhua and Zhang, Jie and Zhang, Chi and Li, Li-jia and Ni, Yongxin}, title = {Video-Bench: Human-Aligned Video Generation Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18858-18868} }
Data Distributional Properties As Inductive Bias for Systematic Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{del_Rio_2025_CVPR, author = {del Rio, Felipe and Raymond-Saez, Alain and Florea, Daniel and Icarte, Rodrigo Toro and Hurtado, Julio and Calderon, Cristian B. and Soto, Alvaro}, title = {Data Distributional Properties As Inductive Bias for Systematic Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25590-25601} }
MergeVQ: A Unified Framework for Visual Generation and Representation with Disentangled Token Merging and Quantization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Siyuan and Zhang, Luyuan and Wang, Zedong and Tian, Juanxi and Tan, Cheng and Liu, Zicheng and Yu, Chang and Xie, Qingsong and Lu, Haonan and Wang, Haoqian and Lei, Zhen}, title = {MergeVQ: A Unified Framework for Visual Generation and Representation with Disentangled Token Merging and Quantization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19713-19723} }
InterAct: Advancing Large-Scale Versatile 3D Human-Object Interaction Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Sirui and Li, Dongting and Zhang, Yucheng and Xu, Xiyan and Long, Qi and Wang, Ziyin and Lu, Yunzhi and Dong, Shuchang and Jiang, Hezi and Gupta, Akshat and Wang, Yu-Xiong and Gui, Liang-Yan}, title = {InterAct: Advancing Large-Scale Versatile 3D Human-Object Interaction Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7048-7060} }
TopoCellGen: Generating Histopathology Cell Topology with a Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Meilong and Gupta, Saumya and Hu, Xiaoling and Li, Chen and Abousamra, Shahira and Samaras, Dimitris and Prasanna, Prateek and Chen, Chao}, title = {TopoCellGen: Generating Histopathology Cell Topology with a Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20979-20989} }
Anyattack: Towards Large-scale Self-supervised Adversarial Attacks on Vision-language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiaming and Ye, Junhong and Ma, Xingjun and Li, Yige and Yang, Yunfan and Chen, Yunhao and Sang, Jitao and Yeung, Dit-Yan}, title = {Anyattack: Towards Large-scale Self-supervised Adversarial Attacks on Vision-language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19900-19909} }
Joint Optimization of Neural Radiance Fields and Continuous Camera Motion from a Monocular Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Hoang Chuong and Mao, Wei and Alvarez, Jose M. and Liu, Miaomiao}, title = {Joint Optimization of Neural Radiance Fields and Continuous Camera Motion from a Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11472-11481} }
IRGS: Inter-Reflective Gaussian Splatting with 2D Gaussian Ray Tracing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Chun and Wei, Xiaofei and Zeng, Zixuan and Yao, Yuxuan and Zhang, Li}, title = {IRGS: Inter-Reflective Gaussian Splatting with 2D Gaussian Ray Tracing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10943-10952} }
InterMimic: Towards Universal Whole-Body Control for Physics-Based Human-Object Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Sirui and Ling, Hung Yu and Wang, Yu-Xiong and Gui, Liang-Yan}, title = {InterMimic: Towards Universal Whole-Body Control for Physics-Based Human-Object Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12266-12277} }
Meta-Learning Hyperparameters for Parameter Efficient Fine-Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Zichen and Liu, Yaoyao and Sun, Qianru}, title = {Meta-Learning Hyperparameters for Parameter Efficient Fine-Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23037-23047} }
TriTex: Learning Texture from a Single Mesh via Triplane Semantic Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cohen-Bar_2025_CVPR, author = {Cohen-Bar, Dana and Cohen-Or, Daniel and Chechik, Gal and Kasten, Yoni}, title = {TriTex: Learning Texture from a Single Mesh via Triplane Semantic Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21403-21413} }
Efficient Test-time Adaptive Object Detection via Sensitivity-Guided Pruning-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Kunyu and Fu, Xueyang and Lu, Xin and Ge, Chengjie and Cao, Chengzhi and Zhai, Wei and Zha, Zheng-Jun}, title = {Efficient Test-time Adaptive Object Detection via Sensitivity-Guided Pruning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10577-10586} }
A Data-Centric Revisit of Pre-Trained Vision Models for Robot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2025_CVPR, author = {Wen, Xin and Zhao, Bingchen and Chen, Yilun and Pang, Jiangmiao and Qi, Xiaojuan}, title = {A Data-Centric Revisit of Pre-Trained Vision Models for Robot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12143-12154} }
Visual Agentic AI for Spatial Reasoning with a Dynamic API-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Marsili_2025_CVPR, author = {Marsili, Damiano and Agrawal, Rohun and Yue, Yisong and Gkioxari, Georgia}, title = {Visual Agentic AI for Spatial Reasoning with a Dynamic API}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19446-19455} }
TAMT: Temporal-Aware Model Tuning for Cross-Domain Few-Shot Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yilong and Gao, Zilin and Wang, Qilong and Chen, Zhaofeng and Li, Peihua and Hu, Qinghua}, title = {TAMT: Temporal-Aware Model Tuning for Cross-Domain Few-Shot Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3449-3459} }
Feature Spectrum Learning for Remote Sensing Change Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zang_2025_CVPR, author = {Zang, Qi and Zhao, Dong and Wang, Shuang and Quan, Dou and Zhong, Zhun}, title = {Feature Spectrum Learning for Remote Sensing Change Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12647-12657} }
BioX-CPath: Biologically-driven Explainable Diagnostics for Multistain IHC Computational Pathology-
[pdf]
[supp]
[bibtex]@InProceedings{Gallagher-Syed_2025_CVPR, author = {Gallagher-Syed, Amaya and Senior, Henry and Alwazzan, Omnia and Pontarini, Elena and Bombardieri, Michele and Pitzalis, Costantino and Lewis, Myles J. and Barnes, Michael R. and Rossi, Luca and Slabaugh, Gregory}, title = {BioX-CPath: Biologically-driven Explainable Diagnostics for Multistain IHC Computational Pathology}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10372-10383} }
DriveDreamer4D: World Models Are Effective Data Machines for 4D Driving Scene Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Guosheng and Ni, Chaojun and Wang, Xiaofeng and Zhu, Zheng and Zhang, Xueyang and Wang, Yida and Huang, Guan and Chen, Xinze and Wang, Boyuan and Zhang, Youyi and Mei, Wenjun and Wang, Xingang}, title = {DriveDreamer4D: World Models Are Effective Data Machines for 4D Driving Scene Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12015-12026} }
LoKi: Low-dimensional KAN for Efficient Fine-tuning Image Models-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Xuan and Pan, Renjie and Yang, Hua}, title = {LoKi: Low-dimensional KAN for Efficient Fine-tuning Image Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14869-14880} }
Dr. Splat: Directly Referring 3D Gaussian Splatting via Direct Language Embedding Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jun-Seong_2025_CVPR, author = {Jun-Seong, Kim and Kim, GeonU and Yu-Ji, Kim and Wang, Yu-Chiang Frank and Choe, Jaesung and Oh, Tae-Hyun}, title = {Dr. Splat: Directly Referring 3D Gaussian Splatting via Direct Language Embedding Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14137-14146} }
Wavelet and Prototype Augmented Query-based Transformer for Pixel-level Surface Defect Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Feng and Jiang, Xiaoheng and Lu, Yang and Cao, Jiale and Chen, Dong and Xu, Mingliang}, title = {Wavelet and Prototype Augmented Query-based Transformer for Pixel-level Surface Defect Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23860-23869} }
GauCho: Gaussian Distributions with Cholesky Decomposition for Oriented Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Marques_2025_CVPR, author = {Marques, Jos\'e Henrique Lima and Murrugarra-Llerena, Jeffri and Jung, Claudio R.}, title = {GauCho: Gaussian Distributions with Cholesky Decomposition for Oriented Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3593-3602} }
Alignment, Mining and Fusion: Representation Alignment with Hard Negative Mining and Selective Knowledge Fusion for Medical Visual Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2025_CVPR, author = {Zou, Yuanhao and Yin, Zhaozheng}, title = {Alignment, Mining and Fusion: Representation Alignment with Hard Negative Mining and Selective Knowledge Fusion for Medical Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29623-29633} }
No Thing, Nothing: Highlighting Safety-Critical Classes for Robust LiDAR Semantic Segmentation in Adverse Weather-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Junsung and Lee, Hwijeong and Kang, Inha and Shim, Hyunjung}, title = {No Thing, Nothing: Highlighting Safety-Critical Classes for Robust LiDAR Semantic Segmentation in Adverse Weather}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6690-6699} }
Mind the Gap: Detecting Black-box Adversarial Attacks in the Making through Query Update Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Jeonghwan and McLaughlin, Niall and Alouani, Ihsen}, title = {Mind the Gap: Detecting Black-box Adversarial Attacks in the Making through Query Update Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10235-10243} }
Consistent Normal Orientation for 3D Point Clouds via Least Squares on Delaunay Graph-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Rao and Zheng, Jianmin and Yu, Liang}, title = {Consistent Normal Orientation for 3D Point Clouds via Least Squares on Delaunay Graph}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16932-16942} }
GaussianWorld: Gaussian World Model for Streaming 3D Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zuo_2025_CVPR, author = {Zuo, Sicheng and Zheng, Wenzhao and Huang, Yuanhui and Zhou, Jie and Lu, Jiwen}, title = {GaussianWorld: Gaussian World Model for Streaming 3D Occupancy Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6772-6781} }
ICP: Immediate Compensation Pruning for Mid-to-high Sparsity-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Xin and Fu, Xueming and Jiang, Zihang and Zhou, S. Kevin}, title = {ICP: Immediate Compensation Pruning for Mid-to-high Sparsity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9487-9496} }
VinaBench: Benchmark for Faithful and Consistent Visual Narratives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Silin and Mathew, Sheryl and Mi, Li and Mamooler, Sepideh and Zhao, Mengjie and Wakaki, Hiromi and Mitsufuji, Yuki and Montariol, Syrielle and Bosselut, Antoine}, title = {VinaBench: Benchmark for Faithful and Consistent Visual Narratives}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2870-2879} }
ATA: Adaptive Transformation Agent for Text-Guided Subject-Position Variable Background Inpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Yizhe and Sun, Zhimin and Du, Yuzhen and Yi, Ran and Lu, Guangben and Hu, Teng and Li, Luying and Ma, Lizhuang and Zou, Fangyuan}, title = {ATA: Adaptive Transformation Agent for Text-Guided Subject-Position Variable Background Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18335-18345} }
Optimizing for the Shortest Path in Denoising Diffusion Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Ping and Zhang, Xingpeng and Liu, Zhaoxiang and Hu, Huan and Liu, Xiang and Wang, Kai and Wang, Min and Qian, Yanlin and Lian, Shiguo}, title = {Optimizing for the Shortest Path in Denoising Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18021-18030} }
Antidote: A Unified Framework for Mitigating LVLM Hallucinations in Counterfactual Presupposition and Object Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Yuanchen and Zhang, Lu and Yao, Hang and Du, Junlong and Yan, Ke and Ding, Shouhong and Wu, Yunsheng and Li, Xiaoqiang}, title = {Antidote: A Unified Framework for Mitigating LVLM Hallucinations in Counterfactual Presupposition and Object Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14646-14656} }
Language-Guided Audio-Visual Learning for Long-Term Sports Assessment-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Huangbiao and Ke, Xiao and Wu, Huanqi and Xu, Rui and Li, Yuezhou and Guo, Wenzhong}, title = {Language-Guided Audio-Visual Learning for Long-Term Sports Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23967-23977} }
Dynamic Pseudo Labeling via Gradient Cutting for High-Low Entropy Exploration-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Jae Hyeon and Jeon, Joo Hyeon and Lee, Jae Yun and Ahn, Sangyeon and Cha, Min Hee and Kim, Min Geol and Nam, Hyeok and Cho, Sung In}, title = {Dynamic Pseudo Labeling via Gradient Cutting for High-Low Entropy Exploration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20602-20611} }
VODiff: Controlling Object Visibility Order in Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Dong and Jia, Jinyuan and Liu, Yuhao and Ke, Zhanghan and Fu, Hongbo and Lau, Rynson W. H.}, title = {VODiff: Controlling Object Visibility Order in Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18379-18389} }
Dual Diffusion for Unified Image Generation and Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zijie and Li, Henry and Shi, Yichun and Farimani, Amir Barati and Kluger, Yuval and Yang, Linjie and Wang, Peng}, title = {Dual Diffusion for Unified Image Generation and Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2779-2790} }
WeakMCN: Multi-task Collaborative Network for Weakly Supervised Referring Expression Comprehension and Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Silin and Liu, Yang and He, Xinwei and Ourselin, Sebastien and Tan, Lei and Luo, Gen}, title = {WeakMCN: Multi-task Collaborative Network for Weakly Supervised Referring Expression Comprehension and Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9175-9185} }
CAD-Llama: Leveraging Large Language Models for Computer-Aided Design Parametric 3D Model Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiahao and Ma, Weijian and Li, Xueyang and Lou, Yunzhong and Zhou, Guichun and Zhou, Xiangdong}, title = {CAD-Llama: Leveraging Large Language Models for Computer-Aided Design Parametric 3D Model Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18563-18573} }
Leveraging SD Map to Augment HD Map-based Trajectory Prediction-
[pdf]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Zhiwei and Ding, Ran and Li, Wei and Zhang, Peng and Tang, Guobin and Guo, Jia}, title = {Leveraging SD Map to Augment HD Map-based Trajectory Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17219-17228} }
4DGC: Rate-Aware 4D Gaussian Compression for Efficient Streamable Free-Viewpoint Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Qiang and Zheng, Zihan and Zhong, Houqiang and Fu, Sihua and Song, Li and Zhang, Xiaoyun and Zhai, Guangtao and Wang, Yanfeng}, title = {4DGC: Rate-Aware 4D Gaussian Compression for Efficient Streamable Free-Viewpoint Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {875-885} }
ONDA-Pose: Occlusion-Aware Neural Domain Adaptation for Self-Supervised 6D Object Pose Estimation-
[pdf]
[bibtex]@InProceedings{Tan_2025_CVPR, author = {Tan, Tao and Dong, Qiulei}, title = {ONDA-Pose: Occlusion-Aware Neural Domain Adaptation for Self-Supervised 6D Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16829-16838} }
Teller: Real-Time Streaming Audio-Driven Portrait Animation with Autoregressive Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhen_2025_CVPR, author = {Zhen, Dingcheng and Yin, Shunshun and Qin, Shiyang and Yi, Hou and Zhang, Ziwei and Liu, Siyuan and Qi, Gan and Tao, Ming}, title = {Teller: Real-Time Streaming Audio-Driven Portrait Animation with Autoregressive Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21075-21085} }
GASP: Gaussian Avatars with Synthetic Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saunders_2025_CVPR, author = {Saunders, Jack and Hewitt, Charlie and Jian, Yanan and Kowalski, Marek and Baltrusaitis, Tadas and Chen, Yiye and Cosker, Darren and Estellers, Virginia and Gyd\'e, Nicholas and Namboodiri, Vinay P. and Lundell, Benjamin E.}, title = {GASP: Gaussian Avatars with Synthetic Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {271-280} }
Q-PART: Quasi-Periodic Adaptive Regression with Test-time Training for Pediatric Left Ventricular Ejection Fraction Regression-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jie and Qin, Tiexin and Liu, Hui and Shi, Yilei and Mou, Lichao and Zhu, Xiao Xiang and Wang, Shiqi and Li, Haoliang}, title = {Q-PART: Quasi-Periodic Adaptive Regression with Test-time Training for Pediatric Left Ventricular Ejection Fraction Regression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15560-15569} }
Composing Parts for Expressive Object Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Rangwani_2025_CVPR, author = {Rangwani, Harsh and Agarwal, Aishwarya and Kulkarni, Kuldeep and Babu, R. Venkatesh and Karanam, Srikrishna}, title = {Composing Parts for Expressive Object Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13209-13219} }
CarPlanner: Consistent Auto-regressive Trajectory Planning for Large-Scale Reinforcement Learning in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Dongkun and Liang, Jiaming and Guo, Ke and Lu, Sha and Wang, Qi and Xiong, Rong and Miao, Zhenwei and Wang, Yue}, title = {CarPlanner: Consistent Auto-regressive Trajectory Planning for Large-Scale Reinforcement Learning in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17239-17248} }
Apply Hierarchical-Chain-of-Generation to Complex Attributes Text-to-3D Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Yiming and Xu, Zhu and Liu, Yang}, title = {Apply Hierarchical-Chain-of-Generation to Complex Attributes Text-to-3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18521-18530} }
PersonaHOI: Effortlessly Improving Face Personalization in Human-Object Interaction Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Xinting and Wang, Haoran and Lenssen, Jan Eric and Schiele, Bernt}, title = {PersonaHOI: Effortlessly Improving Face Personalization in Human-Object Interaction Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23775-23784} }
Video-Panda: Parameter-efficient Alignment for Encoder-free Video-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Yi_2025_CVPR, author = {Yi, Jinhui and Wasim, Syed Talal and Luo, Yanan and Naseer, Muzammal and Gall, Juergen}, title = {Video-Panda: Parameter-efficient Alignment for Encoder-free Video-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24119-24128} }
COSMIC: Clique-Oriented Semantic Multi-space Integration for Robust CLIP Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Fanding and Jiang, Jingyan and Jiang, Qinting and Li, Hebei and Khan, Faisal Nadeem and Wang, Zhi}, title = {COSMIC: Clique-Oriented Semantic Multi-space Integration for Robust CLIP Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9772-9781} }
MonoSplat: Generalizable 3D Gaussian Splatting from Monocular Depth Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yifan and Fan, Keyu and Yu, Weihao and Li, Chenxin and Lu, Hao and Yuan, Yixuan}, title = {MonoSplat: Generalizable 3D Gaussian Splatting from Monocular Depth Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21570-21579} }
Hybrid Global-Local Representation with Augmented Spatial Guidance for Zero-Shot Referring Image Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Ting and Li, Siyuan}, title = {Hybrid Global-Local Representation with Augmented Spatial Guidance for Zero-Shot Referring Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29634-29643} }
SOLVE: Synergy of Language-Vision and End-to-End Networks for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xuesong and Huang, Linjiang and Ma, Tao and Fang, Rongyao and Shi, Shaoshuai and Li, Hongsheng}, title = {SOLVE: Synergy of Language-Vision and End-to-End Networks for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12068-12077} }
Shift the Lens: Environment-Aware Unsupervised Camouflaged Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Ji and Hao, Fangwei and Yu, Mingyang and Kong, Desheng and Wu, Jiesheng and Wang, Bin and Xu, Jing and Li, Ping}, title = {Shift the Lens: Environment-Aware Unsupervised Camouflaged Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19271-19282} }
Probability Density Geodesics in Image Diffusion Latent Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Qingtao and Singh, Jaskirat and Yang, Zhaoyuan and Tu, Peter Henry and Zhang, Jing and Li, Hongdong and Hartley, Richard and Campbell, Dylan}, title = {Probability Density Geodesics in Image Diffusion Latent Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27989-27998} }
High-quality Point Cloud Oriented Normal Estimation via Hybrid Angular and Euclidean Distance Encoding-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yuanqi and Huang, Jingcheng and Wang, Hongshen and Lv, Peiyuan and Liu, Yansong and Zheng, Jiuming and Guo, Jie and Guo, Yanwen}, title = {High-quality Point Cloud Oriented Normal Estimation via Hybrid Angular and Euclidean Distance Encoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1287-1296} }
DriveScape: High-Resolution Driving Video Generation by Multi-View Feature Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Wei and Guo, Xi and Tang, Weixuan and Huang, Tingxuan and Wang, Chiyu and Ding, Chenjing}, title = {DriveScape: High-Resolution Driving Video Generation by Multi-View Feature Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17187-17196} }
Training-free Neural Architecture Search through Variance of Knowledge of Deep Network Weights-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tybl_2025_CVPR, author = {Tybl, Ondrej and Neumann, Lukas}, title = {Training-free Neural Architecture Search through Variance of Knowledge of Deep Network Weights}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14881-14890} }
Every SAM Drop Counts: Embracing Semantic Priors for Multi-Modality Image Fusion and Beyond-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Guanyao and Liu, Haoyu and Fu, Hongming and Peng, Yichuan and Liu, Jinyuan and Fan, Xin and Liu, Risheng}, title = {Every SAM Drop Counts: Embracing Semantic Priors for Multi-Modality Image Fusion and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17882-17891} }
EgoLife: Towards Egocentric Life Assistant-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jingkang and Liu, Shuai and Guo, Hongming and Dong, Yuhao and Zhang, Xiamengwei and Zhang, Sicheng and Wang, Pengyun and Zhou, Zitang and Xie, Binzhu and Wang, Ziyue and Ouyang, Bei and Lin, Zhengyu and Cominelli, Marco and Cai, Zhongang and Li, Bo and Zhang, Yuanhan and Zhang, Peiyuan and Hong, Fangzhou and Widmer, Joerg and Gringoli, Francesco and Yang, Lei and Liu, Ziwei}, title = {EgoLife: Towards Egocentric Life Assistant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28885-28900} }
RAEncoder: A Label-Free Reversible Adversarial Examples Encoder for Dataset Intellectual Property Protection-
[pdf]
[supp]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Fan and Tian, Zhuo and Fan, Xuefeng and Zhou, Xiaoyi}, title = {RAEncoder: A Label-Free Reversible Adversarial Examples Encoder for Dataset Intellectual Property Protection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20665-20674} }
BrepGiff: Lightweight Generation of Complex B-rep with 3D GAT Diffusion-
[pdf]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Hao and Huang, Xiaoshui and jiacheng, Hao and Bai, Yunpeng and Gan, Hongping and Shi, Yilei}, title = {BrepGiff: Lightweight Generation of Complex B-rep with 3D GAT Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26587-26596} }
Towards Fine-Grained Interpretability: Counterfactual Explanations for Misclassification with Saliency Partition-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Lintong and Yin, Kang and Lee, Seong-Whan}, title = {Towards Fine-Grained Interpretability: Counterfactual Explanations for Misclassification with Saliency Partition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30053-30062} }
Prior-free 3D Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Xiuqiang and Jin, Li and Zhang, Zhengxian and Li, Jiachen and Zhong, Fan and Zhang, Guofeng and Qin, Xueying}, title = {Prior-free 3D Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1200-1209} }
LatentHOI: On the Generalizable Hand Object Motion Generation with Latent Hand Diffusion.-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Muchen and Christen, Sammy and Wan, Chengde and Cai, Yujun and Liao, Renjie and Sigal, Leonid and Ma, Shugao}, title = {LatentHOI: On the Generalizable Hand Object Motion Generation with Latent Hand Diffusion.}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17416-17425} }
Progressive Correspondence Regenerator for Robust 3D Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Guiyu and Ao, Sheng and Zhang, Ye and Xu, Kai and Guo, Yulan}, title = {Progressive Correspondence Regenerator for Robust 3D Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1210-1219} }
Cross-Modal 3D Representation with Multi-View Images and Point Clouds-
[pdf]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Ziyang and Wang, Pinghui and Liang, Zi and Bai, Haitao and Zhang, Ruofei}, title = {Cross-Modal 3D Representation with Multi-View Images and Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3728-3739} }
Chapter-Llama: Efficient Chaptering in Hour-Long Videos with LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Ventura_2025_CVPR, author = {Ventura, Lucas and Yang, Antoine and Schmid, Cordelia and Varol, G\"ul}, title = {Chapter-Llama: Efficient Chaptering in Hour-Long Videos with LLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18947-18958} }
Decompositional Neural Scene Reconstruction with Generative Diffusion Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2025_CVPR, author = {Ni, Junfeng and Liu, Yu and Lu, Ruijie and Zhou, Zirui and Zhu, Song-Chun and Chen, Yixin and Huang, Siyuan}, title = {Decompositional Neural Scene Reconstruction with Generative Diffusion Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6022-6033} }
Distribution Prototype Diffusion Learning for Open-set Supervised Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Fuyun and Zhang, Tong and Wang, Yuanzhi and Qiu, Yide and Liu, Xin and Guo, Xu and Cui, Zhen}, title = {Distribution Prototype Diffusion Learning for Open-set Supervised Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20416-20426} }
Learning Visual Generative Priors without Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Shuailei and Zheng, Kecheng and Wei, Ying and Wu, Wei and Lu, Fan and Zhang, Yifei and Xie, Chen-Wei and Gong, Biao and Zhu, Jiapeng and Shen, Yujun}, title = {Learning Visual Generative Priors without Text}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8051-8061} }
Joint Scheduling of Causal Prompts and Tasks for Multi-Task Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Chaoyang and Qin, Jianyang and Cui, Jinhao and Liu, Zeyu and Hu, Ning and Liao, Qing}, title = {Joint Scheduling of Causal Prompts and Tasks for Multi-Task Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25124-25134} }
Full-DoF Egomotion Estimation for Event Cameras Using Geometric Solvers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Ji and Guan, Banglei and Liu, Zibin and Kneip, Laurent}, title = {Full-DoF Egomotion Estimation for Event Cameras Using Geometric Solvers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11515-11524} }
3DGUT: Enabling Distorted Cameras and Secondary Rays in Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Qi and Esturo, Janick Martinez and Mirzaei, Ashkan and Mo\"enne-Loccoz, Nicolas and Gojcic, Zan}, title = {3DGUT: Enabling Distorted Cameras and Secondary Rays in Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26036-26046} }
Teaching Large Language Models to Regress Accurate Image Quality Scores Using Score Distribution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2025_CVPR, author = {You, Zhiyuan and Cai, Xin and Gu, Jinjin and Xue, Tianfan and Dong, Chao}, title = {Teaching Large Language Models to Regress Accurate Image Quality Scores Using Score Distribution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14483-14494} }
Lifting the Veil on Visual Information Flow in MLLMs: Unlocking Pathways to Faster Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Hao and Si, Guangzong and Wang, Zilei}, title = {Lifting the Veil on Visual Information Flow in MLLMs: Unlocking Pathways to Faster Inference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9382-9391} }
Your Scale Factors are My Weapon: Targeted Bit-Flip Attacks on Vision Transformers via Scale Factor Manipulation-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jialai and Wu, Yuxiao and Xu, Weiye and Huang, Yating and Zhang, Chao and Li, Zongpeng and Xu, Mingwei and Liang, Zhenkai}, title = {Your Scale Factors are My Weapon: Targeted Bit-Flip Attacks on Vision Transformers via Scale Factor Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20103-20112} }
Marten: Visual Question Answering with Mask Generation for Multi-modal Document Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zining and Guan, Tongkun and Fu, Pei and Duan, Chen and Jiang, Qianyi and Guo, Zhentao and Guo, Shan and Luo, Junfeng and Shen, Wei and Yang, Xiaokang}, title = {Marten: Visual Question Answering with Mask Generation for Multi-modal Document Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14460-14471} }
Mamba-Reg: Vision Mamba Also Needs Registers-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Feng and Wang, Jiahao and Ren, Sucheng and Wei, Guoyizhe and Mei, Jieru and Shao, Wei and Zhou, Yuyin and Yuille, Alan and Xie, Cihang}, title = {Mamba-Reg: Vision Mamba Also Needs Registers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14944-14953} }
It's a (Blind) Match! Towards Vision-Language Correspondence without Parallel Data-
[pdf]
[supp]
[bibtex]@InProceedings{Schnaus_2025_CVPR, author = {Schnaus, Dominik and Araslanov, Nikita and Cremers, Daniel}, title = {It's a (Blind) Match! Towards Vision-Language Correspondence without Parallel Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24983-24992} }
Open Set Label Shift with Test Time Out-of-Distribution Reference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Changkun and Tsuchida, Russell and Petersson, Lars and Barnes, Nick}, title = {Open Set Label Shift with Test Time Out-of-Distribution Reference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30619-30629} }
Visual Persona: Foundation Model for Full-Body Human Customization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2025_CVPR, author = {Nam, Jisu and Son, Soowon and Xu, Zhan and Shi, Jing and Liu, Difan and Liu, Feng and Kim, Seungryong and Zhou, Yang}, title = {Visual Persona: Foundation Model for Full-Body Human Customization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18630-18641} }
SOGS: Second-Order Anchor for Advanced 3D Gaussian Splatting-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiahui and Zhan, Fangneng and Shao, Ling and Lu, Shijian}, title = {SOGS: Second-Order Anchor for Advanced 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11167-11176} }
GaussianFormer-2: Probabilistic Gaussian Superposition for Efficient 3D Occupancy Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yuanhui and Thammatadatrakoon, Amonnut and Zheng, Wenzhao and Zhang, Yunpeng and Du, Dalong and Lu, Jiwen}, title = {GaussianFormer-2: Probabilistic Gaussian Superposition for Efficient 3D Occupancy Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27477-27486} }
MExD: An Expert-Infused Diffusion Model for Whole-Slide Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jianwei and Li, Xin and Yang, Fan and Zhai, Qiang and Luo, Ao and Zhao, Yang and Cheng, Hong and Fu, Huazhu}, title = {MExD: An Expert-Infused Diffusion Model for Whole-Slide Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20789-20799} }
Flexible Frame Selection for Efficient Video Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Buch_2025_CVPR, author = {Buch, Shyamal and Nagrani, Arsha and Arnab, Anurag and Schmid, Cordelia}, title = {Flexible Frame Selection for Efficient Video Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29071-29082} }
EventGPT: Event Stream Understanding with Multimodal Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Shaoyu and Li, Jianing and Zhao, Guanghui and Zhang, Yunjian and Meng, Xin and Yu, Fei Richard and Ji, Xiangyang and Li, Ming}, title = {EventGPT: Event Stream Understanding with Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29139-29149} }
Pixel-level and Semantic-level Adjustable Super-resolution: A Dual-LoRA Approach-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Lingchen and Wu, Rongyuan and Ma, Zhiyuan and Liu, Shuaizheng and Yi, Qiaosi and Zhang, Lei}, title = {Pixel-level and Semantic-level Adjustable Super-resolution: A Dual-LoRA Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2333-2343} }
Let Samples Speak: Mitigating Spurious Correlation by Exploiting the Clusterness of Samples-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Weiwei and Liu, Junzhuo and Ren, Yuanyuan and Zheng, Yuchen and Liu, Yahao and Li, Wen}, title = {Let Samples Speak: Mitigating Spurious Correlation by Exploiting the Clusterness of Samples}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15486-15496} }
Mr. DETR: Instructive Multi-Route Training for Detection Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chang-Bin and Zhong, Yujie and Han, Kai}, title = {Mr. DETR: Instructive Multi-Route Training for Detection Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9933-9943} }
MITracker: Multi-View Integration for Visual Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Mengjie and Zhu, Yitao and Jiang, Haotian and Li, Jiaming and Shen, Zhenrong and Wang, Sheng and Huang, Haolin and Wang, Xinyu and Zhang, Han and Yang, Qing and Wang, Qian}, title = {MITracker: Multi-View Integration for Visual Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27176-27185} }
Hearing Hands: Generating Sounds from Physical Interactions in 3D Scenes-
[pdf]
[bibtex]@InProceedings{Dou_2025_CVPR, author = {Dou, Yiming and Oh, Wonseok and Luo, Yuqing and Loquercio, Antonio and Owens, Andrew}, title = {Hearing Hands: Generating Sounds from Physical Interactions in 3D Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1795-1804} }
AirRoom: Objects Matter in Room Reidentification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_CVPR, author = {Yao, Runmao and Du, Yi and Chen, Zhuoqun and Zheng, Haoze and Wang, Chen}, title = {AirRoom: Objects Matter in Room Reidentification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1385-1394} }
Not Only Text: Exploring Compositionality of Visual Representations in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Berasi_2025_CVPR, author = {Berasi, Davide and Farina, Matteo and Mancini, Massimiliano and Ricci, Elisa and Strisciuglio, Nicola}, title = {Not Only Text: Exploring Compositionality of Visual Representations in Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24917-24927} }
DefMamba: Deformable Visual State Space Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Leiye and Zhang, Miao and Yin, Jihao and Liu, Tingwei and Ji, Wei and Piao, Yongri and Lu, Huchuan}, title = {DefMamba: Deformable Visual State Space Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8838-8847} }
HOP: Heterogeneous Topology-based Multimodal Entanglement for Co-Speech Gesture Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Hongye and Wang, Tianyu and Shi, Guangsi and Zhao, Zexing and Fu, Yanwei}, title = {HOP: Heterogeneous Topology-based Multimodal Entanglement for Co-Speech Gesture Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {906-916} }
VoxelSplat: Dynamic Gaussian Splatting as an Effective Loss for Occupancy and Flow Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Ziyue and Wang, Shenlong and Xie, Jin and Liu, Jiang-jiang and Wang, Jingdong and Yang, Jian}, title = {VoxelSplat: Dynamic Gaussian Splatting as an Effective Loss for Occupancy and Flow Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6761-6771} }
Exploring Scene Affinity for Semi-Supervised LiDAR Semantic Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Chuandong and Weng, Xingxing and Jiang, Shuguo and Li, Pengcheng and Yu, Lei and Xia, Gui-Song}, title = {Exploring Scene Affinity for Semi-Supervised LiDAR Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27380-27389} }
ControlFace: Harnessing Facial Parametric Control for Face Rigging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Wooseok and Hong, Youngjun and Cha, Geonho and Kim, Seungryong}, title = {ControlFace: Harnessing Facial Parametric Control for Face Rigging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5614-5624} }
Minority-Focused Text-to-Image Generation via Prompt Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Um_2025_CVPR, author = {Um, Soobin and Ye, Jong Chul}, title = {Minority-Focused Text-to-Image Generation via Prompt Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20926-20936} }
Imputation-free and Alignment-free: Incomplete Multi-view Clustering Driven by Consensus Semantic Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Yuzhuo and Jin, Jiaqi and Dong, Zhibin and Wang, Siwei and Liu, Xinwang and Zhu, En and Yang, Xihong and Gan, Xinbiao and Feng, Yu}, title = {Imputation-free and Alignment-free: Incomplete Multi-view Clustering Driven by Consensus Semantic Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5071-5081} }
Sensitivity-Aware Efficient Fine-Tuning via Compact Dynamic-Rank Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Tianran and Chen, Jiarui and Zhang, Baoquan and Yu, Zhehao and Chen, Shidong and Ye, Rui and Li, Xutao and Ye, Yunming}, title = {Sensitivity-Aware Efficient Fine-Tuning via Compact Dynamic-Rank Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9655-9664} }
MANTA: A Large-Scale Multi-View and Visual-Text Anomaly Detection Dataset for Tiny Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Lei and Fan, Dongdong and Hu, Zhiguang and Ding, Yiwen and Di, Donglin and Yi, Kai and Pagnucco, Maurice and Song, Yang}, title = {MANTA: A Large-Scale Multi-View and Visual-Text Anomaly Detection Dataset for Tiny Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25518-25527} }
MoDec-GS: Global-to-Local Motion Decomposition and Temporal Interval Adjustment for Compact Dynamic 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Kwak_2025_CVPR, author = {Kwak, Sangwoon and Kim, Joonsoo and Jeong, Jun Young and Cheong, Won-Sik and Oh, Jihyong and Kim, Munchurl}, title = {MoDec-GS: Global-to-Local Motion Decomposition and Temporal Interval Adjustment for Compact Dynamic 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11338-11348} }
DaCapo: Score Distillation as Stacked Bridge for Fast and High-quality 3D Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yufei and Liao, Bangyan and Hu, Yuqi and Lin, Haitao and Wu, Lirong and Li, Siyuan and Tan, Cheng and Liu, Zicheng and Liu, Yunfan and Zang, Zelin and Yu, Chang and Lei, Zhen}, title = {DaCapo: Score Distillation as Stacked Bridge for Fast and High-quality 3D Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16304-16313} }
A Selective Re-learning Mechanism for Hyperspectral Fusion Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuanye and Liu, Jinyang and Dian, Renwei and Li, Shutao}, title = {A Selective Re-learning Mechanism for Hyperspectral Fusion Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7437-7446} }
SCSegamba: Lightweight Structure-Aware Vision Mamba for Crack Segmentation in Structures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Hui and Jia, Chen and Shi, Fan and Cheng, Xu and Chen, Shengyong}, title = {SCSegamba: Lightweight Structure-Aware Vision Mamba for Crack Segmentation in Structures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29406-29416} }
Autoregressive Sequential Pretraining for Visual Tracking-
[pdf]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Shiyi and Bai, Yifan and Gong, Yihong and Wei, Xing}, title = {Autoregressive Sequential Pretraining for Visual Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7254-7264} }
Number it: Temporal Grounding Videos like Flipping Manga-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Yongliang and Hu, Xinting and Sun, Yuyang and Zhou, Yizhou and Zhu, Wenbo and Rao, Fengyun and Schiele, Bernt and Yang, Xu}, title = {Number it: Temporal Grounding Videos like Flipping Manga}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13754-13765} }
Collaborative Tree Search for Enhancing Embodied Multi-Agent Collaboration-
[pdf]
[supp]
[bibtex]@InProceedings{Zu_2025_CVPR, author = {Zu, Lizheng and Lin, Lin and Fu, Song and Zhao, Na and Zhou, Pan}, title = {Collaborative Tree Search for Enhancing Embodied Multi-Agent Collaboration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29513-29522} }
PromptHMR: Promptable Human Mesh Recovery-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yufu and Sun, Yu and Patel, Priyanka and Daniilidis, Kostas and Black, Michael J. and Kocabas, Muhammed}, title = {PromptHMR: Promptable Human Mesh Recovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1148-1159} }
SyncVP: Joint Diffusion for Synchronous Multi-Modal Video Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pallotta_2025_CVPR, author = {Pallotta, Enrico and Azar, Sina Mokhtarzadeh and Li, Shuai and Zatsarynna, Olga and Gall, Juergen}, title = {SyncVP: Joint Diffusion for Synchronous Multi-Modal Video Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13787-13797} }
HUSH: Holistic Panoramic 3D Scene Understanding using Spherical Harmonics-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Jongsung and Park, Harin and Lee, Byeong-Uk and Joo, Kyungdon}, title = {HUSH: Holistic Panoramic 3D Scene Understanding using Spherical Harmonics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16599-16608} }
SkillMimic: Learning Basketball Interaction Skills from Demonstrations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yinhuai and Zhao, Qihan and Yu, Runyi and Tsui, Hok Wai and Zeng, Ailing and Lin, Jing and Luo, Zhengyi and Yu, Jiwen and Li, Xiu and Chen, Qifeng and Zhang, Jian and Zhang, Lei and Tan, Ping}, title = {SkillMimic: Learning Basketball Interaction Skills from Demonstrations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17540-17549} }
VISTREAM: Improving Computation Efficiency of Visual Streaming Perception via Law-of-Charge-Conservation Inspired Spiking Neural Network-
[pdf]
[supp]
[bibtex]@InProceedings{You_2025_CVPR, author = {You, Kang and Wei, Ziling and Yan, Jing and Zhang, Boning and Guo, Qinghai and Zhang, Yaoyu and He, Zhezhi}, title = {VISTREAM: Improving Computation Efficiency of Visual Streaming Perception via Law-of-Charge-Conservation Inspired Spiking Neural Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8796-8805} }
STPro: Spatial and Temporal Progressive Learning for Weakly Supervised Spatio-Temporal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Garg_2025_CVPR, author = {Garg, Aaryan and Kumar, Akash and Rawat, Yogesh S}, title = {STPro: Spatial and Temporal Progressive Learning for Weakly Supervised Spatio-Temporal Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3384-3394} }
RGBAvatar: Reduced Gaussian Blendshapes for Online Modeling of Head Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Linzhou and Li, Yumeng and Weng, Yanlin and Zheng, Youyi and Zhou, Kun}, title = {RGBAvatar: Reduced Gaussian Blendshapes for Online Modeling of Head Avatars}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10747-10757} }
Rashomon Sets for Prototypical-Part Networks: Editing Interpretable Models in Real-Time-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Donnelly_2025_CVPR, author = {Donnelly, Jon and Guo, Zhicheng and Barnett, Alina Jade and McTavish, Hayden and Chen, Chaofan and Rudin, Cynthia}, title = {Rashomon Sets for Prototypical-Part Networks: Editing Interpretable Models in Real-Time}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4528-4538} }
EEE-Bench: A Comprehensive Multimodal Electrical And Electronics Engineering Benchmark-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Ming and Zhong, Jike and Chen, Tianle and Lai, Yuxiang and Psounis, Konstantinos}, title = {EEE-Bench: A Comprehensive Multimodal Electrical And Electronics Engineering Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13337-13349} }
Text-Driven Fashion Image Editing with Compositional Concept Learning and Counterfactual Abduction-
[pdf]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Shanshan and Li, Haoxuan and Zheng, Chunyuan and Ge, Mingyuan and Gao, Wei and Wang, Lei and Liu, Li}, title = {Text-Driven Fashion Image Editing with Compositional Concept Learning and Counterfactual Abduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28726-28735} }
EnvPoser: Environment-aware Realistic Human Motion Estimation from Sparse Observations with Uncertainty Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Songpengcheng and Zhang, Yu and Su, Zhuo and Zheng, Xiaozheng and Lv, Zheng and Wang, Guidong and Zhang, Yongjie and Wu, Qi and Chu, Lei and Pei, Ling}, title = {EnvPoser: Environment-aware Realistic Human Motion Estimation from Sparse Observations with Uncertainty Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1839-1849} }
A Unified Framework for Heterogeneous Semi-supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Heidari_2025_CVPR, author = {Heidari, Marzi and Alchihabi, Abdullah and Yan, Hao and Guo, Yuhong}, title = {A Unified Framework for Heterogeneous Semi-supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15371-15380} }
Adapting Text-to-Image Generation with Feature Difference Instruction for Generic Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Chao and Fan, Hehe and Yang, Huichen and Karimi, Sarvnaz and Yao, Lina and Yang, Yi}, title = {Adapting Text-to-Image Generation with Feature Difference Instruction for Generic Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23539-23550} }
ReCon: Enhancing True Correspondence Discrimination through Relation Consistency for Robust Noisy Correspondence Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zha_2025_CVPR, author = {Zha, Quanxing and Liu, Xin and Peng, Shu-Juan and Cheung, Yiu-ming and Xu, Xing and Wang, Nannan}, title = {ReCon: Enhancing True Correspondence Discrimination through Relation Consistency for Robust Noisy Correspondence Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29680-29689} }
Free360: Layered Gaussian Splatting for Unbounded 360-Degree View Synthesis from Extremely Sparse and Unposed Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bao_2025_CVPR, author = {Bao, Chong and Zhang, Xiyu and Yu, Zehao and Shi, Jiale and Zhang, Guofeng and Peng, Songyou and Cui, Zhaopeng}, title = {Free360: Layered Gaussian Splatting for Unbounded 360-Degree View Synthesis from Extremely Sparse and Unposed Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16377-16387} }
Tuning the Frequencies: Robust Training for Sinusoidal Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Novello_2025_CVPR, author = {Novello, Tiago and Aldana, Diana and Araujo, Andre and Velho, Luiz}, title = {Tuning the Frequencies: Robust Training for Sinusoidal Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3071-3080} }
Preconditioners for the Stochastic Training of Neural Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chng_2025_CVPR, author = {Chng, Shin-Fang and Saratchandran, Hemanth and Lucey, Simon}, title = {Preconditioners for the Stochastic Training of Neural Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27222-27232} }
Open Ad-hoc Categorization with Contextualized Feature Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zilin and Mo, Sangwoo and Yu, Stella X. and Behpour, Sima and Ren, Liu}, title = {Open Ad-hoc Categorization with Contextualized Feature Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15108-15117} }
Real-time Free-view Human Rendering from Sparse-view RGB Videos using Double Unprojected Textures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Guoxing and Dabral, Rishabh and Zhu, Heming and Fua, Pascal and Theobalt, Christian and Habermann, Marc}, title = {Real-time Free-view Human Rendering from Sparse-view RGB Videos using Double Unprojected Textures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {562-573} }
ECBench: Can Multi-modal Foundation Models Understand the Egocentric World? A Holistic Embodied Cognition Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dang_2025_CVPR, author = {Dang, Ronghao and Yuan, Yuqian and Zhang, Wenqi and Xin, Yifei and Zhang, Boqiang and Li, Long and Wang, Liuyi and Zeng, Qinyang and Li, Xin and Bing, Lidong}, title = {ECBench: Can Multi-modal Foundation Models Understand the Egocentric World? A Holistic Embodied Cognition Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24593-24602} }
SfM-Free 3D Gaussian Splatting via Hierarchical Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2025_CVPR, author = {Ji, Bo and Yao, Angela}, title = {SfM-Free 3D Gaussian Splatting via Hierarchical Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21654-21663} }
Large Self-Supervised Models Bridge the Gap in Domain Adaptive Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lavoie_2025_CVPR, author = {Lavoie, Marc-Antoine and Mahmoud, Anas and Waslander, Steven L.}, title = {Large Self-Supervised Models Bridge the Gap in Domain Adaptive Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4692-4702} }
Dynamic Updates for Language Adaptation in Visual-Language Tracking-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xiaohai and Zhong, Bineng and Liang, Qihua and Mo, Zhiyi and Nong, Jian and Song, Shuxiang}, title = {Dynamic Updates for Language Adaptation in Visual-Language Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19165-19174} }
Multi-focal Conditioned Latent Diffusion for Person Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jiaqi and Zhang, Jichao and Rota, Paolo and Sebe, Nicu}, title = {Multi-focal Conditioned Latent Diffusion for Person Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16019-16028} }
Uncertainty Meets Diversity: A Comprehensive Active Learning Framework for Indoor 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jiangyi and Zhao, Na}, title = {Uncertainty Meets Diversity: A Comprehensive Active Learning Framework for Indoor 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20329-20339} }
CASAGPT: Cuboid Arrangement and Scene Assembly for Interior Design-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Weitao and Zhou, Hang and Liao, Jing and Cheng, Li and Zhou, Wenbo}, title = {CASAGPT: Cuboid Arrangement and Scene Assembly for Interior Design}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29173-29182} }
Identity-Clothing Similarity Modeling for Unsupervised Clothing Change Person Re-Identification-
[pdf]
[bibtex]@InProceedings{Pang_2025_CVPR, author = {Pang, Zhiqi and Wang, Junjie and Zhao, Lingling and Wang, Chunyu}, title = {Identity-Clothing Similarity Modeling for Unsupervised Clothing Change Person Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19251-19260} }
Evaluating Model Perception of Color Illusions in Photorealistic Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2025_CVPR, author = {Mao, Lingjun and Tang, Zineng and Suhr, Alane}, title = {Evaluating Model Perception of Color Illusions in Photorealistic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7805-7814} }
MINIMA: Modality Invariant Image Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Jiangwei and Jiang, Xingyu and Li, Zizhuo and Liang, Dingkang and Zhou, Xin and Bai, Xiang}, title = {MINIMA: Modality Invariant Image Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23059-23068} }
OccMamba: Semantic Occupancy Prediction with State Space Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Heng and Hou, Yuenan and Xing, Xiaohan and Ma, Yuexin and Sun, Xiao and Zhang, Yanyong}, title = {OccMamba: Semantic Occupancy Prediction with State Space Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11949-11959} }
3D Convex Splatting: Radiance Field Rendering with 3D Smooth Convexes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Held_2025_CVPR, author = {Held, Jan and Vandeghen, Renaud and Hamdi, Abdullah and Deliege, Adrien and Cioppa, Anthony and Giancola, Silvio and Vedaldi, Andrea and Ghanem, Bernard and Van Droogenbroeck, Marc}, title = {3D Convex Splatting: Radiance Field Rendering with 3D Smooth Convexes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21360-21369} }
3D Prior Is All You Need: Cross-Task Few-shot 2D Gaze Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Yihua and Wang, Hengfei and Zhang, Zhongqun and Yue, Yang and Kim, Boeun and Lu, Feng and Chang, Hyung Jin}, title = {3D Prior Is All You Need: Cross-Task Few-shot 2D Gaze Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23891-23900} }
Cheb-GR: Rethinking K-nearest Neighbor Search in Re-ranking for Person Re-identification-
[pdf]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jinxi and Li, He and Du, Bo and Ye, Mang}, title = {Cheb-GR: Rethinking K-nearest Neighbor Search in Re-ranking for Person Re-identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19261-19270} }
Spotting the Unexpected (STU): A 3D LiDAR Dataset for Anomaly Segmentation in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nekrasov_2025_CVPR, author = {Nekrasov, Alexey and Burdorf, Malcolm and Worrall, Stewart and Leibe, Bastian and Perez, Julie Stephany Berrio}, title = {Spotting the Unexpected (STU): A 3D LiDAR Dataset for Anomaly Segmentation in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11875-11885} }
Is `Right' Right? Enhancing Object Orientation Understanding in Multimodal Large Language Models through Egocentric Instruction Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2025_CVPR, author = {Jung, Ji Hyeok and Kim, Eun Tae and Kim, Seoyeon and Lee, Joo Ho and Kim, Bumsoo and Chang, Buru}, title = {Is `Right' Right? Enhancing Object Orientation Understanding in Multimodal Large Language Models through Egocentric Instruction Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14257-14267} }
GCC: Generative Color Constancy via Diffusing a Color Checker-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2025_CVPR, author = {Chang, Chen-Wei and Fan, Cheng-De and Chang, Chia-Che and Lo, Yi-Chen and Tseng, Yu-Chee and Huang, Jiun-Long and Liu, Yu-Lun}, title = {GCC: Generative Color Constancy via Diffusing a Color Checker}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10868-10878} }
Do Visual Imaginations Improve Vision-and-Language Navigation Agents?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Perincherry_2025_CVPR, author = {Perincherry, Akhil and Krantz, Jacob and Lee, Stefan}, title = {Do Visual Imaginations Improve Vision-and-Language Navigation Agents?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3846-3855} }
On Denoising Walking Videos for Gait Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Dongyang and Fan, Chao and Ma, Jingzhe and Zhou, Jingkai and Chen, Weihua and Yu, Shiqi}, title = {On Denoising Walking Videos for Gait Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12347-12357} }
Conformal Prediction for Zero-Shot Models-
[pdf]
[supp]
[bibtex]@InProceedings{Silva-Rodriguez_2025_CVPR, author = {Silva-Rodr{\'\i}guez, Julio and Ben Ayed, Ismail and Dolz, Jose}, title = {Conformal Prediction for Zero-Shot Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19931-19941} }
PhysAnimator: Physics-Guided Generative Cartoon Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Tianyi and Zhao, Yiwei and Jiang, Ying and Jiang, Chenfanfu}, title = {PhysAnimator: Physics-Guided Generative Cartoon Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10793-10804} }
SeriesBench: A Benchmark for Narrative-Driven Drama Series Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chenkai and Lei, Yiming and Liu, Zeming and Leng, Haitao and Liu, ShaoGuo and Gao, Tingting and Liu, Qingjie and Wang, Yunhong}, title = {SeriesBench: A Benchmark for Narrative-Driven Drama Series Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28995-29004} }
Weakly Supervised Temporal Action Localization via Dual-Prior Collaborative Learning Guided by Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Quan and Fang, Jinwei and Yuan, Rui and Tang, Xi and Qi, Yuxin and Zhang, Ke and Yuan, Chun}, title = {Weakly Supervised Temporal Action Localization via Dual-Prior Collaborative Learning Guided by Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24139-24148} }
FIMA-Q: Post-Training Quantization for Vision Transformers by Fisher Information Matrix Approximation-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Zhuguanyu and Wang, Shihe and Zhang, Jiayi and Chen, Jiaxin and Wang, Yunhong}, title = {FIMA-Q: Post-Training Quantization for Vision Transformers by Fisher Information Matrix Approximation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14891-14900} }
HotSpot: Signed Distance Function Optimization with an Asymptotically Sufficient Condition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zimo and Wang, Cheng and Yoshino, Taiki and Tao, Sirui and Fu, Ziyang and Li, Tzu-Mao}, title = {HotSpot: Signed Distance Function Optimization with an Asymptotically Sufficient Condition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1276-1286} }
GliaNet: Adaptive Neural Network Structure Learning with Glia-Driven-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Mengqiao and Pan, Liyuan and Liu, Xiabi}, title = {GliaNet: Adaptive Neural Network Structure Learning with Glia-Driven}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25240-25249} }
BACON: Improving Clarity of Image Captions via Bag-of-Concept Graphs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Zhantao and Feng, Ruili and Yan, Keyu and Wang, Huangji and Wang, Zhicai and Zhu, Shangwen and Zhang, Han and Xiao, Jie and Wu, Pingyu and Zhu, Kai and Chen, Jixuan and Xie, Chen-Wei and Yang, Yue and Zhang, Hongyang and Liu, Yu and Cheng, Fan}, title = {BACON: Improving Clarity of Image Captions via Bag-of-Concept Graphs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14380-14389} }
EntitySAM: Segment Everything in Video-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Mingqiao and Oh, Seoung Wug and Ke, Lei and Lee, Joon-Young}, title = {EntitySAM: Segment Everything in Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24234-24243} }
GS-2DGS: Geometrically Supervised 2DGS for Reflective Object Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Tong_2025_CVPR, author = {Tong, Jinguang and Li, Xuesong and Maken, Fahira Afzal and Muthu, Sundaram and Petersson, Lars and Nguyen, Chuong and Li, Hongdong}, title = {GS-2DGS: Geometrically Supervised 2DGS for Reflective Object Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21547-21557} }
Libra-Merging: Importance-redundancy and Pruning-merging Trade-off for Acceleration Plug-in in Large Vision-Language Model-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Longrong and Shen, Dong and Cai, Chaoxiang and Chen, Kaibing and Yang, Fan and Gao, Tingting and Zhang, Di and Li, Xi}, title = {Libra-Merging: Importance-redundancy and Pruning-merging Trade-off for Acceleration Plug-in in Large Vision-Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9402-9412} }
VasTSD: Learning 3D Vascular Tree-state Space Diffusion Model for Angiography Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhifeng and Yi, Renjiao and Wen, Xin and Zhu, Chenyang and Xu, Kai}, title = {VasTSD: Learning 3D Vascular Tree-state Space Diffusion Model for Angiography Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15693-15702} }
PanSplat: 4K Panorama Synthesis with Feed-Forward Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Cheng and Xu, Haofei and Wu, Qianyi and Gambardella, Camilo Cruz and Phung, Dinh and Cai, Jianfei}, title = {PanSplat: 4K Panorama Synthesis with Feed-Forward Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11437-11447} }
WISNet: Pseudo Label Generation on Unbalanced and Patch Annotated Waste Images-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shifan and Zhu, Hongzi and He, Yinan and Guo, Minyi and Lou, Ziyang and Chang, Shan}, title = {WISNet: Pseudo Label Generation on Unbalanced and Patch Annotated Waste Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15076-15085} }
MixerMDM: Learnable Composition of Human Motion Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ruiz-Ponce_2025_CVPR, author = {Ruiz-Ponce, Pablo and Barquero, German and Palmero, Cristina and Escalera, Sergio and Garc{\'\i}a-Rodr{\'\i}guez, Jos\'e}, title = {MixerMDM: Learnable Composition of Human Motion Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12380-12390} }
Hand-held Object Reconstruction from RGB Video with Dynamic Interaction-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Shijian and Ye, Qi and Xie, Rengan and Huo, Yuchi and Chen, Jiming}, title = {Hand-held Object Reconstruction from RGB Video with Dynamic Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12220-12230} }
LEDiff: Latent Exposure Diffusion for HDR Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Chao and Xia, Zhihao and Leimkuhler, Thomas and Myszkowski, Karol and Zhang, Xuaner}, title = {LEDiff: Latent Exposure Diffusion for HDR Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {453-464} }
Video Depth Anything: Consistent Depth Estimation for Super-Long Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Sili and Guo, Hengkai and Zhu, Shengnan and Zhang, Feihu and Huang, Zilong and Feng, Jiashi and Kang, Bingyi}, title = {Video Depth Anything: Consistent Depth Estimation for Super-Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22831-22840} }
VideoAutoArena: An Automated Arena for Evaluating Large Multimodal Models in Video Analysis through User Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Ziyang and Wu, Haoning and Li, Dongxu and Ma, Jing and Kankanhalli, Mohan and Li, Junnan}, title = {VideoAutoArena: An Automated Arena for Evaluating Large Multimodal Models in Video Analysis through User Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8461-8474} }
InstanceCap: Improving Text-to-Video Generation via Instance-aware Structured Caption-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Tiehan and Nan, Kepan and Xie, Rui and Zhou, Penghao and Yang, Zhenheng and Fu, Chaoyou and Li, Xiang and Yang, Jian and Tai, Ying}, title = {InstanceCap: Improving Text-to-Video Generation via Instance-aware Structured Caption}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28974-28983} }
AudCast: Audio-Driven Human Video Generation by Cascaded Diffusion Transformers-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guan_2025_CVPR, author = {Guan, Jiazhi and Wang, Kaisiyuan and Xu, Zhiliang and Yang, Quanwei and Sun, Yasheng and He, Shengyi and Liang, Borong and Cao, Yukang and Li, Yingying and Feng, Haocheng and Ding, Errui and Wang, Jingdong and Zhao, Youjian and Zhou, Hang and Liu, Ziwei}, title = {AudCast: Audio-Driven Human Video Generation by Cascaded Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10678-10689} }
Luminance-GS: Adapting 3D Gaussian Splatting to Challenging Lighting Conditions with View-Adaptive Curve Adjustment-
[pdf]
[supp]
[bibtex]@InProceedings{Cui_2025_CVPR, author = {Cui, Ziteng and Chu, Xuangeng and Harada, Tatsuya}, title = {Luminance-GS: Adapting 3D Gaussian Splatting to Challenging Lighting Conditions with View-Adaptive Curve Adjustment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26472-26482} }
EventSplat: 3D Gaussian Splatting from Moving Event Cameras for Real-time Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yura_2025_CVPR, author = {Yura, Toshiya and Mirzaei, Ashkan and Gilitschenski, Igor}, title = {EventSplat: 3D Gaussian Splatting from Moving Event Cameras for Real-time Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26876-26886} }
Thinking in Space: How Multimodal Large Language Models See, Remember, and Recall Spaces-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jihan and Yang, Shusheng and Gupta, Anjali W. and Han, Rilyn and Fei-Fei, Li and Xie, Saining}, title = {Thinking in Space: How Multimodal Large Language Models See, Remember, and Recall Spaces}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10632-10643} }
3D Student Splatting and Scooping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jialin and Yue, Jiangbei and He, Feixiang and Wang, He}, title = {3D Student Splatting and Scooping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21045-21054} }
World-consistent Video Diffusion with Explicit 3D Modeling-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Qihang and Zhai, Shuangfei and Martin, Miguel \'Angel Bautista and Miao, Kevin and Toshev, Alexander and Susskind, Joshua and Gu, Jiatao}, title = {World-consistent Video Diffusion with Explicit 3D Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21685-21695} }
A Stitch in Time Saves Nine: Small VLM is a Precise Guidance for Accelerating Large VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Wangbo and Han, Yizeng and Tang, Jiasheng and Li, Zhikai and Song, Yibing and Wang, Kai and Wang, Zhangyang and You, Yang}, title = {A Stitch in Time Saves Nine: Small VLM is a Precise Guidance for Accelerating Large VLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19814-19824} }
Zero-Shot Novel View and Depth Synthesis with Multi-View Geometric Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guizilini_2025_CVPR, author = {Guizilini, Vitor and Irshad, Muhammad Zubair and Chen, Dian and Shakhnarovich, Greg and Ambrus, Rares}, title = {Zero-Shot Novel View and Depth Synthesis with Multi-View Geometric Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {764-776} }
Unveiling Visual Perception in Language Models: An Attention Head Analysis Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bi_2025_CVPR, author = {Bi, Jing and Guo, Junjia and Tang, Yunlong and Wen, Lianggong Bruce and Liu, Zhang and Wang, Bingjie and Xu, Chenliang}, title = {Unveiling Visual Perception in Language Models: An Attention Head Analysis Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4135-4144} }
SemanticDraw: Towards Real-Time Interactive Content Creation from Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Jaerin and Jung, Daniel Sungho and Lee, Kanggeon and Lee, Kyoung Mu}, title = {SemanticDraw: Towards Real-Time Interactive Content Creation from Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13021-13030} }
SemiDAViL: Semi-supervised Domain Adaptation with Vision-Language Guidance for Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Basak_2025_CVPR, author = {Basak, Hritam and Yin, Zhaozheng}, title = {SemiDAViL: Semi-supervised Domain Adaptation with Vision-Language Guidance for Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9816-9828} }
Learning Partonomic 3D Reconstruction from Image Collections-
[pdf]
[supp]
[bibtex]@InProceedings{Ruan_2025_CVPR, author = {Ruan, Xiaoqian and Yu, Pei and Jia, Dian and Park, Hyeonjeong and Xiong, Peixi and Tang, Wei}, title = {Learning Partonomic 3D Reconstruction from Image Collections}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26734-26744} }
Arc2Avatar: Generating Expressive 3D Avatars from a Single Image via ID Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gerogiannis_2025_CVPR, author = {Gerogiannis, Dimitrios and Papantoniou, Foivos Paraperas and Potamias, Rolandos Alexandros and Lattas, Alexandros and Zafeiriou, Stefanos}, title = {Arc2Avatar: Generating Expressive 3D Avatars from a Single Image via ID Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10770-10782} }
Seeing Speech and Sound: Distinguishing and Locating Audio Sources in Visual Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Ryu_2025_CVPR, author = {Ryu, Hyeonggon and Kim, Seongyu and Chung, Joon Son and Senocak, Arda}, title = {Seeing Speech and Sound: Distinguishing and Locating Audio Sources in Visual Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13540-13549} }
Structure from Collision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kaneko_2025_CVPR, author = {Kaneko, Takuhiro}, title = {Structure from Collision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16314-16324} }
ODA-GAN: Orthogonal Decoupling Alignment GAN Assisted by Weakly-supervised Learning for Virtual Immunohistochemistry Staining-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Tong and Wang, Mingkang and Wang, Zhongze and Wang, Hongkai and Xu, Qi and Cong, Fengyu and Xu, Hongming}, title = {ODA-GAN: Orthogonal Decoupling Alignment GAN Assisted by Weakly-supervised Learning for Virtual Immunohistochemistry Staining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25920-25929} }
EVOS: Efficient Implicit Neural Training via EVOlutionary Selector-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Weixiang and Xie, Shuzhao and Ren, Chengwei and Xie, Siyi and Tang, Chen and Ge, Shijia and Wang, Mingzi and Wang, Zhi}, title = {EVOS: Efficient Implicit Neural Training via EVOlutionary Selector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30472-30482} }
Crab: A Unified Audio-Visual Scene Understanding Model with Explicit Cooperation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Henghui and Li, Guangyao and Zhou, Chang and Zhang, Chunjie and Zhao, Alan and Hu, Di}, title = {Crab: A Unified Audio-Visual Scene Understanding Model with Explicit Cooperation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18804-18814} }
Nullu: Mitigating Object Hallucinations in Large Vision-Language Models via HalluSpace Projection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Le and Zheng, Ziwei and Chen, Boxu and Zhao, Zhengyu and Lin, Chenhao and Shen, Chao}, title = {Nullu: Mitigating Object Hallucinations in Large Vision-Language Models via HalluSpace Projection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14635-14645} }
OralXrays-9: Towards Hospital-Scale Panoramic X-ray Anomaly Detection via Personalized Multi-Object Query-Aware Mining-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Bingzhi and Fu, Sisi and Fang, Xiaocheng and Cai, Jieyi and Zhang, Boya and Lu, Minhua and Liu, Yishu}, title = {OralXrays-9: Towards Hospital-Scale Panoramic X-ray Anomaly Detection via Personalized Multi-Object Query-Aware Mining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15570-15579} }
MEET: Towards Memory-Efficient Temporal Sparse Deep Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Zeqi and Akkaya, Ibrahim Batuhan and Waeijen, Luc and Bondarev, Egor and Pourtaherian, Arash and Moreira, Orlando}, title = {MEET: Towards Memory-Efficient Temporal Sparse Deep Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29309-29320} }
SplatAD: Real-Time Lidar and Camera Rendering with 3D Gaussian Splatting for Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Hess_2025_CVPR, author = {Hess, Georg and Lindstr\"om, Carl and Fatemi, Maryam and Petersson, Christoffer and Svensson, Lennart}, title = {SplatAD: Real-Time Lidar and Camera Rendering with 3D Gaussian Splatting for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11982-11992} }
Audio-Visual Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Ruohao and Ying, Xianghua and Chen, Yaru and Niu, Dantong and Li, Guangyao and Qu, Liao and Qi, Yanyu and Zhou, Jinxing and Xing, Bowei and Yue, Wenzhen and Shi, Ji and Wang, Qixun and Zhang, Peiliang and Liang, Buwen}, title = {Audio-Visual Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13550-13560} }
Probabilistic Prompt Distribution Learning for Animal Pose Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Rao_2025_CVPR, author = {Rao, Jiyong and Zhao, Brian Nlong and Wang, Yu}, title = {Probabilistic Prompt Distribution Learning for Animal Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29438-29447} }
dFLMoE: Decentralized Federated Learning via Mixture of Experts for Medical Data Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Luyuan and Luan, Tianyu and Cai, Wenyuan and Yan, Guochen and Chen, Zhaoyu and Xi, Nan and Fang, Yuejian and Shen, Qingni and Wu, Zhonghai and Yuan, Junsong}, title = {dFLMoE: Decentralized Federated Learning via Mixture of Experts for Medical Data Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10203-10213} }
Reconstructing Humans with a Biomechanically Accurate Skeleton-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Yan and Zhou, Xiaowei and Vouga, Etienne and Huang, Qixing and Pavlakos, Georgios}, title = {Reconstructing Humans with a Biomechanically Accurate Skeleton}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5355-5365} }
AdaCM^2: On Understanding Extremely Long-Term Video with Adaptive Cross-Modality Memory Reduction-
[pdf]
[bibtex]@InProceedings{Man_2025_CVPR, author = {Man, Yuanbin and Huang, Ying and Zhang, Chengming and Li, Bingzhe and Niu, Wei and Yin, Miao}, title = {AdaCM{\textasciicircum}2: On Understanding Extremely Long-Term Video with Adaptive Cross-Modality Memory Reduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8534-8544} }
Mitigating Object Hallucinations in Large Vision-Language Models with Assembly of Global and Local Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2025_CVPR, author = {An, Wenbin and Tian, Feng and Leng, Sicong and Nie, Jiahao and Lin, Haonan and Wang, Qianying and Chen, Ping and Zhang, Xiaoqin and Lu, Shijian}, title = {Mitigating Object Hallucinations in Large Vision-Language Models with Assembly of Global and Local Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29915-29926} }
VGGT: Visual Geometry Grounded Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jianyuan and Chen, Minghao and Karaev, Nikita and Vedaldi, Andrea and Rupprecht, Christian and Novotny, David}, title = {VGGT: Visual Geometry Grounded Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5294-5306} }
Silent Branding Attack: Trigger-free Data Poisoning Attack on Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Sangwon and Choi, June Suk and Jo, Jaehyeong and Lee, Kimin and Hwang, Sung Ju}, title = {Silent Branding Attack: Trigger-free Data Poisoning Attack on Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8203-8212} }
UniSTD: Towards Unified Spatio-Temporal Learning across Diverse Disciplines-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Chen and Ma, Xinzhu and Su, Encheng and Song, Xiufeng and Liu, Xiaohong and Li, Wei-Hong and Bai, Lei and Ouyang, Wanli and Yue, Xiangyu}, title = {UniSTD: Towards Unified Spatio-Temporal Learning across Diverse Disciplines}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29213-29224} }
Visual Consensus Prompting for Co-Salient Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jie and Yu, Nana and Zhang, Zihao and Han, Yahong}, title = {Visual Consensus Prompting for Co-Salient Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9591-9600} }
Mani-GS: Gaussian Splatting Manipulation with Triangular Mesh-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Xiangjun and Li, Xiaoyu and Zhuang, Yiyu and Zhang, Qi and Hu, Wenbo and Zhang, Chaopeng and Yao, Yao and Shan, Ying and Quan, Long}, title = {Mani-GS: Gaussian Splatting Manipulation with Triangular Mesh}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21392-21402} }
UniHOPE: A Unified Approach for Hand-Only and Hand-Object Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yinqiao and Xu, Hao and Heng, Pheng-Ann and Fu, Chi-Wing}, title = {UniHOPE: A Unified Approach for Hand-Only and Hand-Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12231-12241} }
RL-RC-DoT: A Block-level RL agent for Task-Aware Video Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Gadot_2025_CVPR, author = {Gadot, Uri and Shocher, Assaf and Mannor, Shie and Chechik, Gal and Hallak, Assaf}, title = {RL-RC-DoT: A Block-level RL agent for Task-Aware Video Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12533-12542} }
Quantization without Tears-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Minghao and Yu, Hao and Shao, Jie and Zhou, Junjie and Zhu, Ke and Wu, Jianxin}, title = {Quantization without Tears}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4462-4472} }
PHGC: Procedural Heterogeneous Graph Completion for Natural Language Task Verification in Egocentric Videos-
[pdf]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Xun and Huang, Zhiyi and Xu, Xing and Song, Jingkuan and Shen, Fumin and Shen, Heng Tao}, title = {PHGC: Procedural Heterogeneous Graph Completion for Natural Language Task Verification in Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8615-8624} }
Recognition-Synergistic Scene Text Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Zhengyao and Lyu, Pengyuan and Wu, Jingjing and Zhang, Chengquan and Yu, Jun and Lu, Guangming and Pei, Wenjie}, title = {Recognition-Synergistic Scene Text Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13104-13113} }
Towards Consistent Multi-Task Learning: Unlocking the Potential of Task-Specific Parameters-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Xiaohan and Wang, Xiaoxing and Yan, Junchi}, title = {Towards Consistent Multi-Task Learning: Unlocking the Potential of Task-Specific Parameters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10067-10076} }
WildAvatar: Learning In-the-wild 3D Avatars from the Web-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zihao and Hu, Shoukang and Wang, Guangcong and Liu, Tianqi and Zang, Yuhang and Cao, Zhiguo and Li, Wei and Liu, Ziwei}, title = {WildAvatar: Learning In-the-wild 3D Avatars from the Web}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15963-15975} }
BooW-VTON: Boosting In-the-Wild Virtual Try-On via Mask-Free Pseudo Data Training-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xuanpu and Song, Dan and Zhan, Pengxin and Chang, Tianyu and Zeng, Jianhao and Chen, Qingguo and Luo, Weihua and Liu, An-An}, title = {BooW-VTON: Boosting In-the-Wild Virtual Try-On via Mask-Free Pseudo Data Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26399-26408} }
Rectified Diffusion Guidance for Conditional Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Mengfei and Xue, Nan and Shen, Yujun and Yi, Ran and Gong, Tieliang and Liu, Yong-Jin}, title = {Rectified Diffusion Guidance for Conditional Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13371-13380} }
SceneFactor: Factored Latent 3D Diffusion for Controllable 3D Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bokhovkin_2025_CVPR, author = {Bokhovkin, Aleksey and Meng, Quan and Tulsiani, Shubham and Dai, Angela}, title = {SceneFactor: Factored Latent 3D Diffusion for Controllable 3D Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {628-639} }
HiFi-Portrait: Zero-shot Identity-preserved Portrait Generation with High-fidelity Multi-face Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yifang and Zhai, Benxiang and Sun, Yunzhuo and Li, Ming and Li, Yang and Du, Sidan}, title = {HiFi-Portrait: Zero-shot Identity-preserved Portrait Generation with High-fidelity Multi-face Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5625-5635} }
IAAO: Interactive Affordance Learning for Articulated Objects in 3D Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Can and Lee, Gim Hee}, title = {IAAO: Interactive Affordance Learning for Articulated Objects in 3D Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12132-12142} }
FloVD: Optical Flow Meets Video Diffusion Model for Enhanced Camera-Controlled Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Wonjoon and Dai, Qi and Luo, Chong and Baek, Seung-Hwan and Cho, Sunghyun}, title = {FloVD: Optical Flow Meets Video Diffusion Model for Enhanced Camera-Controlled Video Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2040-2049} }
RAD: Region-Aware Diffusion Models for Image Inpainting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Sora and Suh, Sungho and Lee, Minsik}, title = {RAD: Region-Aware Diffusion Models for Image Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2439-2448} }
RaSS: Improving Denoising Diffusion Samplers with Reinforced Active Sampling Scheduler-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2025_CVPR, author = {Ding, Xin and Yu, Lei and Li, Xin and Tu, Zhijun and Chen, Hanting and Hu, Jie and Chen, Zhibo}, title = {RaSS: Improving Denoising Diffusion Samplers with Reinforced Active Sampling Scheduler}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12923-12933} }
Supervising Sound Localization by In-the-wild Egomotion-
[pdf]
[supp]
[bibtex]@InProceedings{Min_2025_CVPR, author = {Min, Anna and Chen, Ziyang and Zhao, Hang and Owens, Andrew}, title = {Supervising Sound Localization by In-the-wild Egomotion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23936-23946} }
AutoLUT: LUT-Based Image Super-Resolution with Automatic Sampling and Adaptive Residual Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yuheng and Yang, Shijie and Liu, Xin and Liu, Jie and Tang, Jie and Wu, Gangshan}, title = {AutoLUT: LUT-Based Image Super-Resolution with Automatic Sampling and Adaptive Residual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23131-23140} }
Understanding Fine-tuning CLIP for Open-vocabulary Semantic Segmentation in Hyperbolic Space-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Zelin and Xu, Zhengqin and Zeng, Zhilin and Wen, Changsong and Huang, Yu and Yang, Menglin and Tang, Feilong and Shen, Wei}, title = {Understanding Fine-tuning CLIP for Open-vocabulary Semantic Segmentation in Hyperbolic Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4562-4572} }
TexGaussian: Generating High-quality PBR Material via Octree-based 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2025_CVPR, author = {Xiong, Bojun and Liu, Jialun and Hu, Jiakui and Wu, Chenming and Wu, Jinbo and Liu, Xing and Zhao, Chen and Ding, Errui and Lian, Zhouhui}, title = {TexGaussian: Generating High-quality PBR Material via Octree-based 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {551-561} }
OSV: One Step is Enough for High-Quality Image to Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2025_CVPR, author = {Mao, Xiaofeng and Jiang, Zhengkai and Wang, Fu-yun and Zhang, Jiangning and Chen, Hao and Chi, Mingmin and Wang, Yabiao and Luo, Wenhan}, title = {OSV: One Step is Enough for High-Quality Image to Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12585-12594} }
Fuzzy Multimodal Learning for Trusted Cross-modal Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2025_CVPR, author = {Duan, Siyuan and Sun, Yuan and Peng, Dezhong and Liu, Zheng and Song, Xiaomin and Hu, Peng}, title = {Fuzzy Multimodal Learning for Trusted Cross-modal Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20747-20756} }
AniGS: Animatable Gaussian Avatar from a Single Image with Inconsistent Gaussian Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2025_CVPR, author = {Qiu, Lingteng and Zhu, Shenhao and Zuo, Qi and Gu, Xiaodong and Dong, Yuan and Zhang, Junfei and Xu, Chao and Li, Zhe and Yuan, Weihao and Bo, Liefeng and Chen, Guanying and Dong, Zilong}, title = {AniGS: Animatable Gaussian Avatar from a Single Image with Inconsistent Gaussian Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21148-21158} }
GUI-Xplore: Empowering Generalizable GUI Agents with One Exploration-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Yuchen and Zhao, Shanhui and Yu, Tao and Wen, Hao and Va, Samith and Xu, Mengwei and Li, Yuanchun and Zhang, Chongyang}, title = {GUI-Xplore: Empowering Generalizable GUI Agents with One Exploration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19477-19486} }
Few-Shot Recognition via Stage-Wise Retrieval-Augmented Finetuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Tian and Zhang, Huixin and Parashar, Shubham and Kong, Shu}, title = {Few-Shot Recognition via Stage-Wise Retrieval-Augmented Finetuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15086-15097} }
Concept Replacer: Replacing Sensitive Concepts in Diffusion Models via Precision Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Lingyun and Xie, Yu and Fu, Yanwei and Chen, Ping}, title = {Concept Replacer: Replacing Sensitive Concepts in Diffusion Models via Precision Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8172-8181} }
A Regularization-Guided Equivariant Approach for Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2025_CVPR, author = {Bai, Yulu and Fu, Jiahong and Xie, Qi and Meng, Deyu}, title = {A Regularization-Guided Equivariant Approach for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2300-2310} }
RestorGS: Depth-aware Gaussian Splatting for Efficient 3D Scene Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Qiao_2025_CVPR, author = {Qiao, Yuanjian and Shao, Mingwen and Meng, Lingzhuang and Xu, Kai}, title = {RestorGS: Depth-aware Gaussian Splatting for Efficient 3D Scene Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11177-11186} }
IM-Portrait: Learning 3D-aware Video Diffusion for Photorealistic Talking Heads from Monocular VideosC-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yuan and Bai, Ziqian and Tan, Feitong and Cui, Zhaopeng and Fanello, Sean and Zhang, Yinda}, title = {IM-Portrait: Learning 3D-aware Video Diffusion for Photorealistic Talking Heads from Monocular VideosC}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21107-21116} }
Deep Fair Multi-View Clustering with Attention KAN-
[pdf]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, HaiMing and Wang, Qianqian and Wang, Boyue and Gao, Quanxue}, title = {Deep Fair Multi-View Clustering with Attention KAN}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5061-5070} }
LineArt: A Knowledge-guided Training-free High-quality Appearance Transfer for Design Drawing with Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xi and Li, Hongzhen and Fang, Heng and Peng, Yichen and Xie, Haoran and Yang, Xi and Li, Chuntao}, title = {LineArt: A Knowledge-guided Training-free High-quality Appearance Transfer for Design Drawing with Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2912-2923} }
4Real-Video: Learning Generalizable Photo-Realistic 4D Video Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Chaoyang and Zhuang, Peiye and Ngo, Tuan Duc and Menapace, Willi and Siarohin, Aliaksandr and Vasilkovsky, Michael and Skorokhodov, Ivan and Tulyakov, Sergey and Wonka, Peter and Lee, Hsin-Ying}, title = {4Real-Video: Learning Generalizable Photo-Realistic 4D Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17723-17732} }
DynaMoDe-NeRF: Motion-aware Deblurring Neural Radiance Field for Dynamic Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2025_CVPR, author = {Kumar, Ashish and N., Rajagopalan A.}, title = {DynaMoDe-NeRF: Motion-aware Deblurring Neural Radiance Field for Dynamic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21728-21738} }
VideoICL: Confidence-based Iterative In-context Learning for Out-of-Distribution Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Kangsan and Park, Geon and Lee, Youngwan and Yeo, Woongyeong and Hwang, Sung Ju}, title = {VideoICL: Confidence-based Iterative In-context Learning for Out-of-Distribution Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3295-3305} }
Zero-Shot Image Restoration Using Few-Step Guidance of Consistency Models (and Beyond)-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Garber_2025_CVPR, author = {Garber, Tomer and Tirer, Tom}, title = {Zero-Shot Image Restoration Using Few-Step Guidance of Consistency Models (and Beyond)}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2398-2407} }
Similarity-Guided Layer-Adaptive Vision Transformer for UAV Tracking-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xue_2025_CVPR, author = {Xue, Chaocan and Zhong, Bineng and Liang, Qihua and Zheng, Yaozong and Li, Ning and Xue, Yuanliang and Song, Shuxiang}, title = {Similarity-Guided Layer-Adaptive Vision Transformer for UAV Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6730-6740} }
LidarGait++: Learning Local Features and Size Awareness from LiDAR Point Clouds for 3D Gait Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Chuanfu and Wang, Rui and Duan, Lixin and Yu, Shiqi}, title = {LidarGait++: Learning Local Features and Size Awareness from LiDAR Point Clouds for 3D Gait Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6627-6636} }
UrbanCAD: Towards Highly Controllable and Photorealistic 3D Vehicles for Urban Scene Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Yichong and Cai, Yichi and Zhang, Shangzhan and Zhou, Hongyu and Hu, Haoji and Yu, Huimin and Geiger, Andreas and Liao, Yiyi}, title = {UrbanCAD: Towards Highly Controllable and Photorealistic 3D Vehicles for Urban Scene Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27519-27530} }
Coarse Correspondences Boost Spatial-Temporal Reasoning in Multimodal Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Benlin and Dong, Yuhao and Wang, Yiqin and Ma, Zixian and Tang, Yansong and Tang, Luming and Rao, Yongming and Ma, Wei-Chiu and Krishna, Ranjay}, title = {Coarse Correspondences Boost Spatial-Temporal Reasoning in Multimodal Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3783-3792} }
Diff-Palm: Realistic Palmprint Generation with Polynomial Creases and Intra-Class Variation Controllable Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Jianlong and Zhao, Chenglong and Zhang, Ruixin and Shang, Sheng and Xu, Jianqing and Zhang, Jingyun and Wang, ShaoMing and Zhao, Yang and Ding, Shouhong and Jia, Wei and Wu, Yunsheng}, title = {Diff-Palm: Realistic Palmprint Generation with Polynomial Creases and Intra-Class Variation Controllable Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26367-26376} }
FoundationStereo: Zero-Shot Stereo Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2025_CVPR, author = {Wen, Bowen and Trepte, Matthew and Aribido, Joseph and Kautz, Jan and Gallo, Orazio and Birchfield, Stan}, title = {FoundationStereo: Zero-Shot Stereo Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5249-5260} }
Z-Magic: Zero-shot Multiple Attributes Guided Image Creator-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Yingying and He, Xiangyu and Tang, Fan and Dong, Weiming}, title = {Z-Magic: Zero-shot Multiple Attributes Guided Image Creator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18390-18400} }
UniNet: A Contrastive Learning-guided Unified Framework with Feature Selection for Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Shun and Jiang, Jielin and Xu, Xiaolong}, title = {UniNet: A Contrastive Learning-guided Unified Framework with Feature Selection for Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9994-10003} }
Chain of Semantics Programming in 3D Gaussian Splatting Representation for 3D Vision Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Jiaxin and Xiang, Mingyue and Sun, Hao and Huang, Yixuan and Weng, Zhi}, title = {Chain of Semantics Programming in 3D Gaussian Splatting Representation for 3D Vision Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24560-24569} }
On the Zero-shot Adversarial Robustness of Vision-Language Models: A Truly Zero-shot and Training-free Approach-
[pdf]
[bibtex]@InProceedings{Tong_2025_CVPR, author = {Tong, Baoshun and Lai, Hanjiang and Pan, Yan and Yin, Jian}, title = {On the Zero-shot Adversarial Robustness of Vision-Language Models: A Truly Zero-shot and Training-free Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19921-19930} }
Towards General Visual-Linguistic Face Forgery Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Ke and Chen, Shen and Yao, Taiping and Zhou, Ziyin and Ji, Jiayi and Sun, Xiaoshuai and Lin, Chia-Wen and Ji, Rongrong}, title = {Towards General Visual-Linguistic Face Forgery Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19576-19586} }
Movie Weaver: Tuning-Free Multi-Concept Video Personalization with Anchored Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Feng and Ma, Haoyu and He, Zecheng and Hou, Tingbo and Hou, Ji and Li, Kunpeng and Dai, Xiaoliang and Juefei-Xu, Felix and Azadi, Samaneh and Sinha, Animesh and Zhang, Peizhao and Vajda, Peter and Marculescu, Diana}, title = {Movie Weaver: Tuning-Free Multi-Concept Video Personalization with Anchored Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13146-13156} }
LongVALE: Vision-Audio-Language-Event Benchmark Towards Time-Aware Omni-Modal Perception of Long Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Geng_2025_CVPR, author = {Geng, Tiantian and Zhang, Jinrui and Wang, Qingni and Wang, Teng and Duan, Jinming and Zheng, Feng}, title = {LongVALE: Vision-Audio-Language-Event Benchmark Towards Time-Aware Omni-Modal Perception of Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18959-18969} }
MVPortrait: Text-Guided Motion and Emotion Control for Multi-view Vivid Portrait Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Yukang and Fung, Hokit and Xu, Jianjin and Ren, Zeping and Lau, Adela S.M. and Yin, Guosheng and Li, Xiu}, title = {MVPortrait: Text-Guided Motion and Emotion Control for Multi-view Vivid Portrait Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26242-26252} }
MoEdit: On Learning Quantity Perception for Multi-object Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yanfeng and Chan, Kahou and Sun, Yue and Lam, Chantong and Tong, Tong and Yu, Zitong and Fu, Keren and Liu, Xiaohong and Tan, Tao}, title = {MoEdit: On Learning Quantity Perception for Multi-object Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2683-2693} }
Seeing More with Less: Human-like Representations in Vision Models-
[pdf]
[supp]
[bibtex]@InProceedings{Gizdov_2025_CVPR, author = {Gizdov, Andrey and Ullman, Shimon and Harari, Daniel}, title = {Seeing More with Less: Human-like Representations in Vision Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4408-4417} }
Modeling Thousands of Human Annotators for Generalizable Text-to-Image Person Re-identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Jiayu and Ding, Changxing and Tan, Wentao and Wang, Junhong and Tao, Jin and Xu, Xiangmin}, title = {Modeling Thousands of Human Annotators for Generalizable Text-to-Image Person Re-identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9220-9230} }
Accelerating Multimodal Large Language Models by Searching Optimal Vision Token Reduction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shiyu and Wang, Zhenting and Juefei-Xu, Felix and Xia, Xide and Liu, Miao and Wang, Xiaofang and Liang, Mingfu and Zhang, Ning and Metaxas, Dimitris N. and Yu, Licheng}, title = {Accelerating Multimodal Large Language Models by Searching Optimal Vision Token Reduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29869-29879} }
Matrix-Free Shared Intrinsics Bundle Adjustment-
[pdf]
[bibtex]@InProceedings{Safari_2025_CVPR, author = {Safari, Daniel}, title = {Matrix-Free Shared Intrinsics Bundle Adjustment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27017-27026} }
AeroGen: Enhancing Remote Sensing Object Detection with Diffusion-Driven Data Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Datao and Cao, Xiangyong and Wu, Xuan and Li, Jialin and Yao, Jing and Bai, Xueru and Jiang, Dongsheng and Li, Yin and Meng, Deyu}, title = {AeroGen: Enhancing Remote Sensing Object Detection with Diffusion-Driven Data Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3614-3624} }
Tra-MoE: Learning Trajectory Prediction Model from Multiple Domains for Adaptive Policy Conditioning-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jiange and Zhu, Haoyi and Wang, Yating and Wu, Gangshan and He, Tong and Wang, Limin}, title = {Tra-MoE: Learning Trajectory Prediction Model from Multiple Domains for Adaptive Policy Conditioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6960-6970} }
Mitigating Hallucinations in Large Vision-Language Models via DPO: On-Policy Data Hold the Key-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Zhihe and Luo, Xufang and Han, Dongqi and Xu, Yunjian and Li, Dongsheng}, title = {Mitigating Hallucinations in Large Vision-Language Models via DPO: On-Policy Data Hold the Key}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10610-10620} }
Style Quantization for Data-Efficient GAN Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jian and Lan, Xin and Zhou, Jizhe and Tian, Yuxin and Lv, Jiancheng}, title = {Style Quantization for Data-Efficient GAN Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7696-7706} }
Localizing Events in Videos with Multimodal Queries-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Gengyuan and Fok, Mang Ling Ada and Ma, Jialu and Xia, Yan and Cremers, Daniel and Torr, Philip and Tresp, Volker and Gu, Jindong}, title = {Localizing Events in Videos with Multimodal Queries}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3339-3351} }
PhysVLM: Enabling Visual Language Models to Understand Robotic Physical Reachability-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Weijie and Tao, Manli and Zhao, Chaoyang and Guo, Haiyun and Dong, Honghui and Tang, Ming and Wang, Jinqiao}, title = {PhysVLM: Enabling Visual Language Models to Understand Robotic Physical Reachability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6940-6949} }
CleanDIFT: Diffusion Features without Noise-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stracke_2025_CVPR, author = {Stracke, Nick and Baumann, Stefan Andreas and Bauer, Kolja and Fundel, Frank and Ommer, Bj\"orn}, title = {CleanDIFT: Diffusion Features without Noise}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {117-127} }
Simpler Diffusion: 1.5 FID on ImageNet512 with Pixel-space Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Hoogeboom_2025_CVPR, author = {Hoogeboom, Emiel and Mensink, Thomas and Heek, Jonathan and Lamerigts, Kay and Gao, Ruiqi and Salimans, Tim}, title = {Simpler Diffusion: 1.5 FID on ImageNet512 with Pixel-space Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18062-18071} }
Uncertainty-Instructed Structure Injection for Generalizable HD Map Construction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xiaolu and Yang, Ruizi and Wang, Song and Li, Wentong and Chen, Junbo and Zhu, Jianke}, title = {Uncertainty-Instructed Structure Injection for Generalizable HD Map Construction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22359-22368} }
STING-BEE: Towards Vision-Language Model for Real-World X-ray Baggage Security Inspection-
[pdf]
[supp]
[bibtex]@InProceedings{Velayudhan_2025_CVPR, author = {Velayudhan, Divya and Ahmed, Abdelfatah and Alansari, Mohamad and Gour, Neha and Behouch, Abderaouf and Hassan, Taimur and Wasim, Syed Talal and Maalej, Nabil and Naseer, Muzammal and Gall, Juergen and Bennamoun, Mohammed and Damiani, Ernesto and Werghi, Naoufel}, title = {STING-BEE: Towards Vision-Language Model for Real-World X-ray Baggage Security Inspection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20767-20777} }
Not All Parameters Matter: Masking Diffusion Models for Enhancing Generation Ability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Lei and Li, Senmao and Yang, Fei and Wang, Jianye and Zhang, Ziheng and Liu, Yuhan and Wang, Yaxing and Yang, Jian}, title = {Not All Parameters Matter: Masking Diffusion Models for Enhancing Generation Ability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12880-12890} }
MAD: Memory-Augmented Detection of 3D Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Agro_2025_CVPR, author = {Agro, Ben and Casas, Sergio and Wang, Patrick and Gilles, Thomas and Urtasun, Raquel}, title = {MAD: Memory-Augmented Detection of 3D Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1449-1460} }
Doppelgangers and Adversarial Vulnerability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kamberov_2025_CVPR, author = {Kamberov, George}, title = {Doppelgangers and Adversarial Vulnerability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10244-10254} }
Complexity Experts are Task-Discriminative Learners for Any Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zamfir_2025_CVPR, author = {Zamfir, Eduard and Wu, Zongwei and Mehta, Nancy and Tan, Yuedong and Paudel, Danda Pani and Zhang, Yulun and Timofte, Radu}, title = {Complexity Experts are Task-Discriminative Learners for Any Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12753-12763} }
Generative Omnimatte: Learning to Decompose Video into Layers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Yao-Chih and Lu, Erika and Rumbley, Sarah and Geyer, Michal and Huang, Jia-Bin and Dekel, Tali and Cole, Forrester}, title = {Generative Omnimatte: Learning to Decompose Video into Layers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12522-12532} }
5%>100%: Breaking Performance Shackles of Full Fine-Tuning on Visual Recognition Tasks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Dongshuo and Hu, Leiyi and Li, Bin and Zhang, Youqun and Yang, Xue}, title = {5\%\ensuremath{>}100\%: Breaking Performance Shackles of Full Fine-Tuning on Visual Recognition Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20071-20081} }
Precise Event Spotting in Sports Videos: Solving Long-Range Dependency and Class Imbalance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Santra_2025_CVPR, author = {Santra, Sanchayan and Chudasama, Vishal and Wasnik, Pankaj and Balasubramanian, Vineeth N}, title = {Precise Event Spotting in Sports Videos: Solving Long-Range Dependency and Class Imbalance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3163-3172} }
Real-IAD D3: A Real-World 2D/Pseudo-3D/3D Dataset for Industrial Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Wenbing and Wang, Lidong and Zhou, Ziqing and Wang, Chengjie and Pan, Yurui and Zhang, Ruoyi and Chen, Zhuhao and Cheng, Linjie and Gao, Bin-Bin and Zhang, Jiangning and Gan, Zhenye and Wang, Yuxie and Chen, Yulong and Qian, Shuguang and Chi, Mingmin and Peng, Bo and Ma, Lizhuang}, title = {Real-IAD D3: A Real-World 2D/Pseudo-3D/3D Dataset for Industrial Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15214-15223} }
Steady Progress Beats Stagnation: Mutual Aid of Foundation and Conventional Models in Mixed Domain Semi-Supervised Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Qinghe and Zhang, Jian and Li, Zekun and Qi, Lei and Yu, Qian and Shi, Yinghuan}, title = {Steady Progress Beats Stagnation: Mutual Aid of Foundation and Conventional Models in Mixed Domain Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5175-5185} }
ATP: Adaptive Threshold Pruning for Efficient Data Encoding in Quantum Neural Networks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Afane_2025_CVPR, author = {Afane, Mohamed and Ebbrecht, Gabrielle and Wang, Ying and Chen, Juntao and Farooq, Junaid}, title = {ATP: Adaptive Threshold Pruning for Efficient Data Encoding in Quantum Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20427-20436} }
Color Alignment in Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shum_2025_CVPR, author = {Shum, Ka Chun and Hua, Binh-Son and Nguyen, Duc Thanh and Yeung, Sai-Kit}, title = {Color Alignment in Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28446-28455} }
LLAVIDAL: A Large LAnguage VIsion Model for Daily Activities of Living-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Reilly_2025_CVPR, author = {Reilly, Dominick and Chakraborty, Rajatsubhra and Sinha, Arkaprava and Govind, Manish Kumar and Wang, Pu and Bremond, Francois and Xue, Le and Das, Srijan}, title = {LLAVIDAL: A Large LAnguage VIsion Model for Daily Activities of Living}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24297-24308} }
Language-Guided Salient Object Ranking-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Fang and Liu, Yuhao and Xu, Ke and Ye, Shuquan and Hancke, Gerhard Petrus and Lau, Rynson W. H.}, title = {Language-Guided Salient Object Ranking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29803-29813} }
Decoupled Motion Expression Video Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Hao and Cong, Runmin and Lu, Xiankai and Zhou, Xiaofei and Kwong, Sam and Zhang, Wei}, title = {Decoupled Motion Expression Video Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13821-13831} }
K-LoRA: Unlocking Training-Free Fusion of Any Subject and Style LoRAs-
[pdf]
[supp]
[bibtex]@InProceedings{Ouyang_2025_CVPR, author = {Ouyang, Ziheng and Li, Zhen and Hou, Qibin}, title = {K-LoRA: Unlocking Training-Free Fusion of Any Subject and Style LoRAs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13041-13050} }
Towards More General Video-based Deepfake Detection through Facial Component Guided Adaptation for Foundation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Yue-Hua and Huang, Tai-Ming and Hua, Kai-Lung and Chen, Jun-Cheng}, title = {Towards More General Video-based Deepfake Detection through Facial Component Guided Adaptation for Foundation Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22995-23005} }
WF-VAE: Enhancing Video VAE by Wavelet-Driven Energy Flow for Latent Video Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zongjian and Lin, Bin and Ye, Yang and Chen, Liuhan and Cheng, Xinhua and Yuan, Shenghai and Yuan, Li}, title = {WF-VAE: Enhancing Video VAE by Wavelet-Driven Energy Flow for Latent Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17778-17788} }
SP3D: Boosting Sparsely-Supervised 3D Object Detection via Accurate Cross-Modal Semantic Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shijia and Xia, Qiming and Guo, Xusheng and Zou, Pufan and Zheng, Maoji and Wu, Hai and Wen, Chenglu and Wang, Cheng}, title = {SP3D: Boosting Sparsely-Supervised 3D Object Detection via Accurate Cross-Modal Semantic Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29374-29384} }
ARKit LabelMaker: A New Scale for Indoor 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2025_CVPR, author = {Ji, Guangda and Weder, Silvan and Engelmann, Francis and Pollefeys, Marc and Blum, Hermann}, title = {ARKit LabelMaker: A New Scale for Indoor 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4398-4407} }
VoCo-LLaMA: Towards Vision Compression with Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Xubing and Gan, Yukang and Huang, Xiaoke and Ge, Yixiao and Tang, Yansong}, title = {VoCo-LLaMA: Towards Vision Compression with Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29836-29846} }
StreamingT2V: Consistent, Dynamic, and Extendable Long Video Generation from Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Henschel_2025_CVPR, author = {Henschel, Roberto and Khachatryan, Levon and Poghosyan, Hayk and Hayrapetyan, Daniil and Tadevosyan, Vahram and Wang, Zhangyang and Navasardyan, Shant and Shi, Humphrey}, title = {StreamingT2V: Consistent, Dynamic, and Extendable Long Video Generation from Text}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2568-2577} }
Focal Split: Untethered Snapshot Depth from Differential Defocus-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Junjie and Mamish, John and Fu, Alan and Concannon, Thomas and Hester, Josiah and Alexander, Emma and Guo, Qi}, title = {Focal Split: Untethered Snapshot Depth from Differential Defocus}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26965-26974} }
AFL: A Single-Round Analytic Approach for Federated Learning with Pre-trained Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Run and Tong, Kai and Fang, Di and Sun, Han and Zeng, Ziqian and Li, Haoran and Chen, Tianyi and Zhuang, Huiping}, title = {AFL: A Single-Round Analytic Approach for Federated Learning with Pre-trained Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4988-4998} }
XLRS-Bench: Could Your Multimodal LLMs Understand Extremely Large Ultra-High-Resolution Remote Sensing Imagery?-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Fengxiang and Wang, Hongzhen and Guo, Zonghao and Wang, Di and Wang, Yulin and Chen, Mingshuo and Ma, Qiang and Lan, Long and Yang, Wenjing and Zhang, Jing and Liu, Zhiyuan and Sun, Maosong}, title = {XLRS-Bench: Could Your Multimodal LLMs Understand Extremely Large Ultra-High-Resolution Remote Sensing Imagery?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14325-14336} }
BOLT: Boost Large Vision-Language Model Without Training for Long-form Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Shuming and Zhao, Chen and Xu, Tianqi and Ghanem, Bernard}, title = {BOLT: Boost Large Vision-Language Model Without Training for Long-form Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3318-3327} }
Efficient Data Driven Mixture-of-Expert Extraction from Trained Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Berisha_2025_CVPR, author = {Berisha, Uranik and Mehnert, Jens and Condurache, Alexandru Paul}, title = {Efficient Data Driven Mixture-of-Expert Extraction from Trained Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20082-20091} }
Reference-Based 3D-Aware Image Editing with Triplanes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bilecen_2025_CVPR, author = {Bilecen, Bahri Batuhan and Yalin, Yigit and Yu, Ning and Dundar, Aysegul}, title = {Reference-Based 3D-Aware Image Editing with Triplanes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5904-5915} }
StyleSSP: Sampling StartPoint Enhancement for Training-free Diffusion-based Method for Style Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Ruojun and Xi, Weijie and Wang, XiaoDi and Mao, Yongbo and Cheng, Zach}, title = {StyleSSP: Sampling StartPoint Enhancement for Training-free Diffusion-based Method for Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18260-18269} }
PURA: Parameter Update-Recovery Test-Time Adaption for RGB-T Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Shao_2025_CVPR, author = {Shao, Zekai and Hu, Yufan and Fan, Bin and Liu, Hongmin}, title = {PURA: Parameter Update-Recovery Test-Time Adaption for RGB-T Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22089-22098} }
One is Plenty: A Polymorphic Feature Interpreter for Immutable Heterogeneous Collaborative Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Yuchen and Yuan, Quan and Luo, Guiyang and Fu, Xiaoyuan and Li, Yang and Zhu, Xuanhan and Luo, Tianyou and Chen, Siheng and Li, Jinglin}, title = {One is Plenty: A Polymorphic Feature Interpreter for Immutable Heterogeneous Collaborative Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1592-1601} }
Towards All-in-One Medical Image Re-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Yuan and Ji, Kaiyuan and Zhang, Rongzhao and Jiang, Yankai and Li, Chunyi and Wang, Xiaosong and Zhai, Guangtao}, title = {Towards All-in-One Medical Image Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30774-30786} }
SegAgent: Exploring Pixel Understanding Capabilities in MLLMs by Imitating Human Annotator Trajectories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Muzhi and Tian, Yuzhuo and Chen, Hao and Zhou, Chunluan and Guo, Qingpei and Liu, Yang and Yang, Ming and Shen, Chunhua}, title = {SegAgent: Exploring Pixel Understanding Capabilities in MLLMs by Imitating Human Annotator Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3686-3696} }
Motions as Queries: One-Stage Multi-Person Holistic Human Motion Capture-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Kenkun and Fu, Yurong and Yuan, Weihao and Lin, Jing and Li, Peihao and Gu, Xiaodong and Qiu, Lingteng and Wang, Haoqian and Dong, Zilong and Han, Xiaoguang}, title = {Motions as Queries: One-Stage Multi-Person Holistic Human Motion Capture}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17529-17539} }
SceneCrafter: Controllable Multi-View Driving Scene Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Zehao and Zou, Yuliang and Jiang, Chiyu Max and Sun, Bo and Casser, Vincent and Huang, Xiukun and Wang, Jiahao and Yang, Zhenpei and Gao, Ruiqi and Guibas, Leonidas and Tan, Mingxing and Anguelov, Dragomir}, title = {SceneCrafter: Controllable Multi-View Driving Scene Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6812-6822} }
AMO Sampler: Enhancing Text Rendering with Overshooting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Xixi and Xu, Keyang and Liu, Bo and Liu, Qiang and Fei, Hongliang}, title = {AMO Sampler: Enhancing Text Rendering with Overshooting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13157-13166} }
ImViD: Immersive Volumetric Videos for Enhanced VR Engagement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Zhengxian and Pan, Shi and Wang, Shengqi and Wang, Haoxiang and Lin, Li and Li, Guanjun and Wen, Zhengqi and Lin, Borong and Tao, Jianhua and Yu, Tao}, title = {ImViD: Immersive Volumetric Videos for Enhanced VR Engagement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16554-16564} }
Integral Fast Fourier Color Constancy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Wenjun and Qian, Yanlin and Chen, Huaian and Dai, Junkang and Jin, Yi}, title = {Integral Fast Fourier Color Constancy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26420-26429} }
I2VGuard: Safeguarding Images against Misuse in Diffusion-based Image-to-Video Models-
[pdf]
[supp]
[bibtex]@InProceedings{Gui_2025_CVPR, author = {Gui, Dongnan and Guo, Xun and Zhou, Wengang and Lu, Yan}, title = {I2VGuard: Safeguarding Images against Misuse in Diffusion-based Image-to-Video Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12595-12604} }
Saliuitl: Ensemble Salience Guided Recovery of Adversarial Patches against CNNs-
[pdf]
[supp]
[bibtex]@InProceedings{Victorica_2025_CVPR, author = {Victorica, Mauricio Byrd and D\'an, Gy\"orgy and Sandberg, Henrik}, title = {Saliuitl: Ensemble Salience Guided Recovery of Adversarial Patches against CNNs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20360-20369} }
HeatFormer: A Neural Optimizer for Multiview Human Mesh Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Matsubara_2025_CVPR, author = {Matsubara, Yuto and Nishino, Ko}, title = {HeatFormer: A Neural Optimizer for Multiview Human Mesh Recovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6415-6424} }
ResCLIP: Residual Attention for Training-free Dense Vision-language Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Yuhang and Deng, Jinhong and Li, Wen and Duan, Lixin}, title = {ResCLIP: Residual Attention for Training-free Dense Vision-language Inference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29968-29978} }
GPS as a Control Signal for Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Chao and Chen, Ziyang and Holynski, Aleksander and Efros, Alexei A. and Owens, Andrew}, title = {GPS as a Control Signal for Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2766-2778} }
CPath-Omni: A Unified Multimodal Foundation Model for Patch and Whole Slide Image Analysis in Computational Pathology-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Yuxuan and Si, Yixuan and Zhu, Chenglu and Gong, Xuan and Zhang, Kai and Chen, Pingyi and Zhang, Ye and Shui, Zhongyi and Lin, Tao and Yang, Lin}, title = {CPath-Omni: A Unified Multimodal Foundation Model for Patch and Whole Slide Image Analysis in Computational Pathology}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10360-10371} }
OPTICAL: Leveraging Optimal Transport for Contribution Allocation in Dataset Distillation-
[pdf]
[bibtex]@InProceedings{Cui_2025_CVPR, author = {Cui, Xiao and Qin, Yulei and Zhou, Wengang and Li, Hongsheng and Li, Houqiang}, title = {OPTICAL: Leveraging Optimal Transport for Contribution Allocation in Dataset Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15245-15254} }
MAGiC-SLAM: Multi-Agent Gaussian Globally Consistent SLAM-
[pdf]
[bibtex]@InProceedings{Yugay_2025_CVPR, author = {Yugay, Vladimir and Gevers, Theo and Oswald, Martin R.}, title = {MAGiC-SLAM: Multi-Agent Gaussian Globally Consistent SLAM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6741-6750} }
Dispider: Enabling Video LLMs with Active Real-Time Interaction via Disentangled Perception, Decision, and Reaction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Qian_2025_CVPR, author = {Qian, Rui and Ding, Shuangrui and Dong, Xiaoyi and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and Lin, Dahua and Wang, Jiaqi}, title = {Dispider: Enabling Video LLMs with Active Real-Time Interaction via Disentangled Perception, Decision, and Reaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24045-24055} }
NTClick: Achieving Precise Interactive Segmentation With Noise-tolerant Clicks-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chenyi and Liu, Ting and Qu, Xiaochao and Liu, Luoqi and Zhao, Yao and Wei, Yunchao}, title = {NTClick: Achieving Precise Interactive Segmentation With Noise-tolerant Clicks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8921-8930} }
Show and Segment: Universal Medical Image Segmentation via In-Context Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Yunhe and Liu, Di and Li, Zhuowei and Li, Yunsheng and Chen, Dongdong and Zhou, Mu and Metaxas, Dimitris N.}, title = {Show and Segment: Universal Medical Image Segmentation via In-Context Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20830-20840} }
MVGenMaster: Scaling Multi-View Generation from Any Image via 3D Priors Enhanced Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Chenjie and Yu, Chaohui and Liu, Shang and Wang, Fan and Xue, Xiangyang and Fu, Yanwei}, title = {MVGenMaster: Scaling Multi-View Generation from Any Image via 3D Priors Enhanced Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6045-6056} }
CADCrafter: Generating Computer-Aided Design Models from Unconstrained Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Cheng and Wei, Jiacheng and Chen, Tianrun and Zhang, Chi and Yang, Xiaofeng and Zhang, Shangzhan and Yang, Bingchen and Foo, Chuan-Sheng and Lin, Guosheng and Huang, Qixing and Liu, Fayao}, title = {CADCrafter: Generating Computer-Aided Design Models from Unconstrained Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11073-11082} }
Bayesian Test-Time Adaptation for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Lihua and Ye, Mao and Li, Shuaifeng and Li, Nianxin and Zhu, Xiatian and Deng, Lei and Liu, Hongbin and Lei, Zhen}, title = {Bayesian Test-Time Adaptation for Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29999-30009} }
Generative Multiview Relighting for 3D Reconstruction under Extreme Illumination Variation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alzayer_2025_CVPR, author = {Alzayer, Hadi and Henzler, Philipp and Barron, Jonathan T. and Huang, Jia-Bin and Srinivasan, Pratul P. and Verbin, Dor}, title = {Generative Multiview Relighting for 3D Reconstruction under Extreme Illumination Variation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10933-10942} }
Causal Composition Diffusion Model for Closed-loop Traffic Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Haohong and Huang, Xin and Phan, Tung and Hayden, David and Zhang, Huan and Zhao, Ding and Srinivasa, Siddhartha and Wolff, Eric and Chen, Hongge}, title = {Causal Composition Diffusion Model for Closed-loop Traffic Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27542-27552} }
Change3D: Revisiting Change Detection and Captioning from A Video Modeling Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Duowang and Huang, Xiaohu and Huang, Haiyan and Zhou, Hao and Shao, Zhenfeng}, title = {Change3D: Revisiting Change Detection and Captioning from A Video Modeling Perspective}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24011-24022} }
DyMO: Training-Free Diffusion Model Alignment with Dynamic Multi-Objective Scheduling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Xin and Gong, Dong}, title = {DyMO: Training-Free Diffusion Model Alignment with Dynamic Multi-Objective Scheduling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13220-13230} }
HiMoR: Monocular Deformable Gaussian Reconstruction with Hierarchical Motion Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Yiming and Xu, Tianhan and Kikuchi, Yuta}, title = {HiMoR: Monocular Deformable Gaussian Reconstruction with Hierarchical Motion Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {886-895} }
GENIUS: A Generative Framework for Universal Multimodal Search-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Sungyeon and Zhu, Xinliang and Lin, Xiaofan and Bastan, Muhammet and Gray, Douglas and Kwak, Suha}, title = {GENIUS: A Generative Framework for Universal Multimodal Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19659-19669} }
Enhanced Visual-Semantic Interaction with Tailored Prompts for Pedestrian Attribute Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Junyi and Huang, Yan and Gao, Min and Niu, Yuzhen and Chen, Yuzhong and Wu, Qiang}, title = {Enhanced Visual-Semantic Interaction with Tailored Prompts for Pedestrian Attribute Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9570-9579} }
SF3D: Stable Fast 3D Mesh Reconstruction with UV-unwrapping and Illumination Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Boss_2025_CVPR, author = {Boss, Mark and Huang, Zixuan and Vasishta, Aaryaman and Jampani, Varun}, title = {SF3D: Stable Fast 3D Mesh Reconstruction with UV-unwrapping and Illumination Disentanglement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16240-16250} }
HSI-GPT: A General-Purpose Large Scene-Motion-Language Model for Human Scene Interaction-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuan and Li, Yali and Li, Xiang and Wang, Shengjin}, title = {HSI-GPT: A General-Purpose Large Scene-Motion-Language Model for Human Scene Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7147-7157} }
Towards Precise Embodied Dialogue Localization via Causality Guided Diffusion-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Haoyu and Wang, Le and Zhou, Sanping and Tian, Jingyi and Qin, Zheng and Wang, Yabing and Hua, Gang and Tang, Wei}, title = {Towards Precise Embodied Dialogue Localization via Causality Guided Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13350-13360} }
Vid2Avatar-Pro: Authentic Avatar from Videos in the Wild via Universal Prior-
[pdf]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Chen and Li, Junxuan and Kant, Yash and Sheikh, Yaser and Saito, Shunsuke and Cao, Chen}, title = {Vid2Avatar-Pro: Authentic Avatar from Videos in the Wild via Universal Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5559-5570} }
RoomPainter: View-Integrated Diffusion for Consistent Indoor Scene Texturing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zhipeng and Yu, Wangbo and Cheng, Xinhua and Zhao, Chengshu and Ge, Yunyang and Guo, Mingyi and Yuan, Li and Tian, Yonghong}, title = {RoomPainter: View-Integrated Diffusion for Consistent Indoor Scene Texturing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {574-584} }
Attribute-formed Class-specific Concept Space: Endowing Language Bottleneck Model with Better Interpretability and Scalability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jianyang and Luo, Qianli and Yang, Guowu and Yang, Wenjing and Liu, Weide and Lin, Guosheng and Lv, Fengmao}, title = {Attribute-formed Class-specific Concept Space: Endowing Language Bottleneck Model with Better Interpretability and Scalability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30291-30300} }
Customized Condition Controllable Generation for Video Soundtrack-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2025_CVPR, author = {Qi, Fan and Ma, Kunsheng and Xu, Changsheng}, title = {Customized Condition Controllable Generation for Video Soundtrack}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23914-23924} }
ProjAttacker: A Configurable Physical Adversarial Attack for Face Recognition via Projector-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuanwei and Wei, Hui and Jia, Chengyu and Xiao, Ruqi and Ruan, Weijian and Wei, Xingxing and Zhou, Joey Tianyi and Wang, Zheng}, title = {ProjAttacker: A Configurable Physical Adversarial Attack for Face Recognition via Projector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21248-21257} }
EfficientViM: Efficient Vision Mamba with Hidden State Mixer based State Space Duality-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Sanghyeok and Choi, Joonmyung and Kim, Hyunwoo J.}, title = {EfficientViM: Efficient Vision Mamba with Hidden State Mixer based State Space Duality}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14923-14933} }
A4A: Adapter for Adapter Transfer via All-for-All Mapping for Cross-Architecture Models-
[pdf]
[supp]
[bibtex]@InProceedings{Tu_2025_CVPR, author = {Tu, Keyu and Huang, Mengqi and Chen, Zhuowei and Mao, Zhendong}, title = {A4A: Adapter for Adapter Transfer via All-for-All Mapping for Cross-Architecture Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18476-18485} }
ViCaS: A Dataset for Combining Holistic and Pixel-level Video Understanding using Captions with Grounded Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Athar_2025_CVPR, author = {Athar, Ali and Deng, Xueqing and Chen, Liang-Chieh}, title = {ViCaS: A Dataset for Combining Holistic and Pixel-level Video Understanding using Captions with Grounded Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19023-19035} }
A Universal Scale-Adaptive Deformable Transformer for Image Restoration across Diverse Artifacts-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Xuyi and Quan, Yuhui and Xu, Ruotao and Ji, Hui}, title = {A Universal Scale-Adaptive Deformable Transformer for Image Restoration across Diverse Artifacts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12731-12741} }
WISE: A Framework for Gigapixel Whole-Slide-Image Lossless Compression-
[pdf]
[arXiv]
[bibtex]@InProceedings{Mao_2025_CVPR, author = {Mao, Yu and Wang, Jun and Guan, Nan and Xue, Chun Jason}, title = {WISE: A Framework for Gigapixel Whole-Slide-Image Lossless Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29342-29351} }
Gromov-Wasserstein Problem with Cyclic Symmetry-
[pdf]
[supp]
[bibtex]@InProceedings{Takeda_2025_CVPR, author = {Takeda, Shoichiro and Akagi, Yasunori}, title = {Gromov-Wasserstein Problem with Cyclic Symmetry}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21011-21020} }
IRIS: Inverse Rendering of Indoor Scenes from Low Dynamic Range Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Chih-Hao and Huang, Jia-Bin and Li, Zhengqin and Dong, Zhao and Richardt, Christian and Li, Tuotuo and Zollh\"ofer, Michael and Kopf, Johannes and Wang, Shenlong and Kim, Changil}, title = {IRIS: Inverse Rendering of Indoor Scenes from Low Dynamic Range Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {465-474} }
SimAvatar: Simulation-Ready Avatars with Layered Hair and Clothing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xueting and Yuan, Ye and De Mello, Shalini and Daviet, Gilles and Leaf, Jonathan and Macklin, Miles and Kautz, Jan and Iqbal, Umar}, title = {SimAvatar: Simulation-Ready Avatars with Layered Hair and Clothing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26320-26330} }
Test-Time Backdoor Detection for Object Detection Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Hangtao and Wang, Yichen and Yan, Shihui and Zhu, Chenyu and Zhou, Ziqi and Hou, Linshan and Hu, Shengshan and Li, Minghui and Zhang, Yanjun and Zhang, Leo Yu}, title = {Test-Time Backdoor Detection for Object Detection Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24377-24386} }
Towards Precise Scaling Laws for Video Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Yuanyang and Zhao, Yaqi and Zheng, Mingwu and Lin, Ke and Ou, Jiarong and Chen, Rui and Huang, Victor Shea-Jay and Wang, Jiahao and Tao, Xin and Wan, Pengfei and Zhang, Di and Yin, Baoqun and Zhang, Wentao and Gai, Kun}, title = {Towards Precise Scaling Laws for Video Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18155-18165} }
RoGSplat: Learning Robust Generalizable Human Gaussian Splatting from Sparse Multi-View Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Junjin and Zhang, Qing and Nie, Yonewei and Zhu, Lei and Zheng, Wei-Shi}, title = {RoGSplat: Learning Robust Generalizable Human Gaussian Splatting from Sparse Multi-View Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5980-5990} }
SDBF: Steep-Decision-Boundary Fingerprinting for Hard-Label Tampering Detection of DNN Models-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2025_CVPR, author = {Bai, Xiaofan and Li, Shixin and Ma, Xiaojing and Zhu, Bin Benjamin and Zhang, Dongmei and Yu, Linchen}, title = {SDBF: Steep-Decision-Boundary Fingerprinting for Hard-Label Tampering Detection of DNN Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29278-29287} }
EnliveningGS: Active Locomotion of 3DGS-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Siyuan and Shao, Tianjia and Zhou, Kun and Jiang, Chenfanfu and Yang, Yin}, title = {EnliveningGS: Active Locomotion of 3DGS}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {896-905} }
SPMTrack: Spatio-Temporal Parameter-Efficient Fine-Tuning with Mixture of Experts for Scalable Visual Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Wenrui and Liu, Qingjie and Wang, Yunhong}, title = {SPMTrack: Spatio-Temporal Parameter-Efficient Fine-Tuning with Mixture of Experts for Scalable Visual Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16871-16881} }
AnyCam: Learning to Recover Camera Poses and Intrinsics from Casual Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wimbauer_2025_CVPR, author = {Wimbauer, Felix and Chen, Weirong and Muhle, Dominik and Rupprecht, Christian and Cremers, Daniel}, title = {AnyCam: Learning to Recover Camera Poses and Intrinsics from Casual Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16717-16727} }
Knowledge-Aligned Counterfactual-Enhancement Diffusion Perception for Unsupervised Cross-Domain Visual Emotion Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Wen and Wang, Yong and Duan, Guiduo and Zhang, Dongyang and Hu, Xin and Li, Yuan-Fang and He, Tao}, title = {Knowledge-Aligned Counterfactual-Enhancement Diffusion Perception for Unsupervised Cross-Domain Visual Emotion Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3888-3898} }
Distilling Multi-modal Large Language Models for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hegde_2025_CVPR, author = {Hegde, Deepti and Yasarla, Rajeev and Cai, Hong and Han, Shizhong and Bhattacharyya, Apratim and Mahajan, Shweta and Liu, Litian and Garrepalli, Risheek and Patel, Vishal M. and Porikli, Fatih}, title = {Distilling Multi-modal Large Language Models for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27575-27585} }
Pixel-aligned RGB-NIR Stereo Imaging and Dataset for Robot Vision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Jinnyeong and Baek, Seung-Hwan}, title = {Pixel-aligned RGB-NIR Stereo Imaging and Dataset for Robot Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11482-11492} }
Can Machines Understand Composition? Dataset and Benchmark for Photographic Image Composition Embedding and Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zhaoran and Lu, Peng and Zhang, Anran and Li, Peipei and Li, Xia and Liu, Xuannan and Hu, Yang and Chen, Shiyi and Wang, Liwei and Guo, Wenhao}, title = {Can Machines Understand Composition? Dataset and Benchmark for Photographic Image Composition Embedding and Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14411-14421} }
LPOSS: Label Propagation Over Patches and Pixels for Open-vocabulary Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Stojnic_2025_CVPR, author = {Stojni\'c, Vladan and Kalantidis, Yannis and Matas, Ji\v{r}{\'\i} and Tolias, Giorgos}, title = {LPOSS: Label Propagation Over Patches and Pixels for Open-vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9794-9803} }
Towards Efficient Foundation Model for Zero-shot Amodal Segmentation-
[pdf]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhaochen and Qiao, Limeng and Chu, Xiangxiang and Ma, Lin and Jiang, Tingting}, title = {Towards Efficient Foundation Model for Zero-shot Amodal Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20254-20264} }
PhysGen3D: Crafting a Miniature Interactive World from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Boyuan and Jiang, Hanxiao and Liu, Shaowei and Gupta, Saurabh and Li, Yunzhu and Zhao, Hao and Wang, Shenlong}, title = {PhysGen3D: Crafting a Miniature Interactive World from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6178-6189} }
Docopilot: Improving Multimodal Models for Document-Level Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2025_CVPR, author = {Duan, Yuchen and Chen, Zhe and Hu, Yusong and Wang, Weiyun and Ye, Shenglong and Shi, Botian and Lu, Lewei and Hou, Qibin and Lu, Tong and Li, Hongsheng and Dai, Jifeng and Wang, Wenhai}, title = {Docopilot: Improving Multimodal Models for Document-Level Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4026-4037} }
Scaling Properties of Diffusion Models For Perceptual Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ravishankar_2025_CVPR, author = {Ravishankar, Rahul and Patel, Zeeshan and Rajasegaran, Jathushan and Malik, Jitendra}, title = {Scaling Properties of Diffusion Models For Perceptual Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12945-12954} }
HD-EPIC: A Highly-Detailed Egocentric Video Dataset-
[pdf]
[supp]
[bibtex]@InProceedings{Perrett_2025_CVPR, author = {Perrett, Toby and Darkhalil, Ahmad and Sinha, Saptarshi and Emara, Omar and Pollard, Sam and Parida, Kranti Kumar and Liu, Kaiting and Gatti, Prajwal and Bansal, Siddhant and Flanagan, Kevin and Chalk, Jacob and Zhu, Zhifan and Guerrier, Rhodri and Abdelazim, Fahd and Zhu, Bin and Moltisanti, Davide and Wray, Michael and Doughty, Hazel and Damen, Dima}, title = {HD-EPIC: A Highly-Detailed Egocentric Video Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23901-23913} }
Exact: Exploring Space-Time Perceptive Clues for Weakly Supervised Satellite Image Time Series Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Hao and Zhu, Yan and Xiao, Jiayu and Xiao, Tianxiang and Ma, Yike and Zhang, Yucheng and Dai, Feng}, title = {Exact: Exploring Space-Time Perceptive Clues for Weakly Supervised Satellite Image Time Series Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14036-14045} }
Advancing Myopia To Holism: Fully Contrastive Language-Image Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Haicheng and Ju, Chen and Lin, Weixiong and Xiao, Shuai and Chen, Mengting and Huang, Yixuan and Liu, Chang and Yao, Mingshuai and Lan, Jinsong and Chen, Ying and Liu, Qingwen and Wang, Yanfeng}, title = {Advancing Myopia To Holism: Fully Contrastive Language-Image Pre-training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29791-29802} }
PolarFree: Polarization-based Reflection-Free Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_CVPR, author = {Yao, Mingde and Wang, Menglu and Tam, King-Man and Li, Lingen and Xue, Tianfan and Gu, Jinwei}, title = {PolarFree: Polarization-based Reflection-Free Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10890-10899} }
H-MoRe: Learning Human-centric Motion Representation for Action Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zhanbo and Liu, Xiaoming and Kong, Yu}, title = {H-MoRe: Learning Human-centric Motion Representation for Action Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22702-22713} }
Hierarchical Compact Clustering Attention (COCA) for Unsupervised Object-Centric Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kucuksozen_2025_CVPR, author = {Kucuksozen, Can and Yemez, Yucel}, title = {Hierarchical Compact Clustering Attention (COCA) for Unsupervised Object-Centric Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25388-25398} }
Effortless Active Labeling for Long-Term Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Guowei and Ding, Changxing}, title = {Effortless Active Labeling for Long-Term Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25633-25642} }
Leveraging Temporal Cues for Semi-Supervised Multi-View 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Jinhyung and Sanghvi, Navyata and Adachi, Hiroki and Shibata, Yoshihisa and Hunt, Shawn and Tanaka, Shinya and Fujiyoshi, Hironobu and Kitani, Kris}, title = {Leveraging Temporal Cues for Semi-Supervised Multi-View 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27401-27412} }
Self-supervised ControlNet with Spatio-Temporal Mamba for Real-world Video Super-resolution-
[pdf]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Shijun and Xu, Jing and Lu, Lijing and Li, Zhihang and Hu, Kai}, title = {Self-supervised ControlNet with Spatio-Temporal Mamba for Real-world Video Super-resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7385-7395} }
LATTE-MV: Learning to Anticipate Table Tennis Hits from Monocular Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Etaat_2025_CVPR, author = {Etaat, Daniel and Kalaria, Dvij and Rahmanian, Nima and Sastry, S. Shankar}, title = {LATTE-MV: Learning to Anticipate Table Tennis Hits from Monocular Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7115-7124} }
Logits DeConfusion with CLIP for Few-Shot Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Shuo and Liu, Fang and Hao, Zehua and Wang, Xinyi and Li, Lingling and Liu, Xu and Chen, Puhua and Ma, Wenping}, title = {Logits DeConfusion with CLIP for Few-Shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25411-25421} }
Distilling Spatially-Heterogeneous Distortion Perception for Blind Image Quality Assessment-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xudong and Nie, Wenjie and Zhang, Yan and Hu, Runze and Li, Ke and Zheng, Xiawu and Cao, Liujuan}, title = {Distilling Spatially-Heterogeneous Distortion Perception for Blind Image Quality Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2344-2354} }
Pay Attention to the Foreground in Object-Centric Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Pinzhuo and Yang, Shengjie and Yu, Hang and Kot, Alex}, title = {Pay Attention to the Foreground in Object-Centric Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30281-30290} }
2DMamba: Efficient State Space Model for Image Representation with Applications on Giga-Pixel Whole Slide Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jingwei and Nguyen, Anh Tien and Han, Xi and Trinh, Vincent Quoc-Huy and Qin, Hong and Samaras, Dimitris and Hosseini, Mahdi S.}, title = {2DMamba: Efficient State Space Model for Image Representation with Applications on Giga-Pixel Whole Slide Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3583-3592} }
Unboxed: Geometrically and Temporally Consistent Video Outpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Zhongrui and Megaro-Boldini, Martina and Sumner, Robert W. and Djelouah, Abdelaziz}, title = {Unboxed: Geometrically and Temporally Consistent Video Outpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7309-7319} }
K-Sort Arena: Efficient and Reliable Benchmarking for Generative Models via K-wise Human Preferences-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zhikai and Liu, Xuewen and Fu, Dongrong Joe and Li, Jianquan and Gu, Qingyi and Keutzer, Kurt and Dong, Zhen}, title = {K-Sort Arena: Efficient and Reliable Benchmarking for Generative Models via K-wise Human Preferences}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9131-9141} }
Seeking Consistent Flat Minima for Better Domain Generalization via Refining Loss Landscapes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Aodi and Zhuang, Liansheng and Long, Xiao and Yao, Minghong and Wang, Shafei}, title = {Seeking Consistent Flat Minima for Better Domain Generalization via Refining Loss Landscapes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15349-15359} }
MultimodalStudio: A Heterogeneous Sensor Dataset and Framework for Neural Rendering across Multiple Imaging Modalities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lincetto_2025_CVPR, author = {Lincetto, Federico and Agresti, Gianluca and Rossi, Mattia and Zanuttigh, Pietro}, title = {MultimodalStudio: A Heterogeneous Sensor Dataset and Framework for Neural Rendering across Multiple Imaging Modalities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10964-10973} }
Dense-SfM: Structure from Motion with Dense Consistent Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, JongMin and Yoo, Sungjoo}, title = {Dense-SfM: Structure from Motion with Dense Consistent Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6404-6414} }
FluidNexus: 3D Fluid Reconstruction and Prediction from a Single Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Yue and Yu, Hong-Xing and Zhu, Bo and Wu, Jiajun}, title = {FluidNexus: 3D Fluid Reconstruction and Prediction from a Single Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26091-26101} }
MuTri: Multi-view Tri-alignment for OCT to OCTA 3D Image Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zhuangzhuang and Wang, Hualiang and Ou, Chubin and Li, Xiaomeng}, title = {MuTri: Multi-view Tri-alignment for OCT to OCTA 3D Image Translation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20885-20894} }
Sketchy Bounding-box Supervision for 3D Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Qian and Hui, Le and Xie, Jin and Yang, Jian}, title = {Sketchy Bounding-box Supervision for 3D Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8879-8888} }
Image Quality Assessment: Investigating Causal Perceptual Effects with Abductive Counterfactual Inference-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Wenhao and Zhou, Mingliang and Chen, Yu and Wei, Xuekai and Feng, Yong and Pu, Huayan and Jia, Weijia}, title = {Image Quality Assessment: Investigating Causal Perceptual Effects with Abductive Counterfactual Inference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17990-17999} }
Pos3R: 6D Pose Estimation for Unseen Objects Made Easy-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Weijian and Campbell, Dylan and Sun, Chunyi and Zhang, Jiahao and Kanitkar, Shubham and Shaffer, Matt E. and Gould, Stephen}, title = {Pos3R: 6D Pose Estimation for Unseen Objects Made Easy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16818-16828} }
DeformCL: Learning Deformable Centerline Representation for Vessel Extraction in 3D Medical Image-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Ziwei and Zhang, Zhixing and Liu, Yuhang and Zhang, Zhao and Yu, Haojun and Wang, Dong and Wang, Liwei}, title = {DeformCL: Learning Deformable Centerline Representation for Vessel Extraction in 3D Medical Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30896-30905} }
StreetCrafter: Street View Synthesis with Controllable Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Yunzhi and Xu, Zhen and Lin, Haotong and Jin, Haian and Guo, Haoyu and Wang, Yida and Zhan, Kun and Lang, Xianpeng and Bao, Hujun and Zhou, Xiaowei and Peng, Sida}, title = {StreetCrafter: Street View Synthesis with Controllable Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {822-832} }
OCRT: Boosting Foundation Models in the Open World with Object-Concept-Relation Triad-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Luyao and Yuan, Yuxuan and Chen, Chaoqi and Zhang, Zeyu and Huang, Yue and Zhang, Kun}, title = {OCRT: Boosting Foundation Models in the Open World with Object-Concept-Relation Triad}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25422-25433} }
SPARS3R: Semantic Prior Alignment and Regularization for Sparse 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Yutao and Guo, Yuxiang and Li, Deming and Peng, Cheng}, title = {SPARS3R: Semantic Prior Alignment and Regularization for Sparse 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26810-26821} }
VidBot: Learning Generalizable 3D Actions from In-the-Wild 2D Human Videos for Zero-Shot Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Hanzhi and Sun, Boyang and Zhang, Anran and Pollefeys, Marc and Leutenegger, Stefan}, title = {VidBot: Learning Generalizable 3D Actions from In-the-Wild 2D Human Videos for Zero-Shot Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27661-27672} }
Learning Person-Specific Animatable Face Models from In-the-Wild Images via a Shared Base Model-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2025_CVPR, author = {Mao, Yuxiang and Fan, Zhenfeng and Zhang, ZhiJie and Zhang, Zhiheng and Xia, Shihong}, title = {Learning Person-Specific Animatable Face Models from In-the-Wild Images via a Shared Base Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5602-5613} }
TIMotion: Temporal and Interactive Framework for Efficient Human-Human Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yabiao and Wang, Shuo and Zhang, Jiangning and Fan, Ke and Wu, Jiafu and Xue, Zhucun and Liu, Yong}, title = {TIMotion: Temporal and Interactive Framework for Efficient Human-Human Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7169-7178} }
Which Viewpoint Shows it Best? Language for Weakly Supervising View Selection in Multi-view Instructional Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Majumder_2025_CVPR, author = {Majumder, Sagnik and Nagarajan, Tushar and Al-Halah, Ziad and Pradhan, Reina and Grauman, Kristen}, title = {Which Viewpoint Shows it Best? Language for Weakly Supervising View Selection in Multi-view Instructional Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29016-29028} }
RaCFormer: Towards High-Quality 3D Object Detection via Query-based Radar-Camera Fusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chu_2025_CVPR, author = {Chu, Xiaomeng and Deng, Jiajun and You, Guoliang and Duan, Yifan and Li, Houqiang and Zhang, Yanyong}, title = {RaCFormer: Towards High-Quality 3D Object Detection via Query-based Radar-Camera Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17081-17091} }
Hybrid Reciprocal Transformer with Triplet Feature Alignment for Scene Graph Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Jiawei and Zhang, Tiantian and Chen, Kai and Dou, Qi}, title = {Hybrid Reciprocal Transformer with Triplet Feature Alignment for Scene Graph Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8953-8963} }
Understanding Multi-Task Activities from Single-Task Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Yuhan and Elhamifar, Ehsan}, title = {Understanding Multi-Task Activities from Single-Task Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19120-19131} }
Co-Speech Gesture Video Generation with Implicit Motion-Audio Entanglement-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xinjie and Chen, Ziyi and Yu, Xinlu and Chu, Iek-Heng and Chang, Peng and Xiao, Jing}, title = {Co-Speech Gesture Video Generation with Implicit Motion-Audio Entanglement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11384-11394} }
TransPixeler: Advancing Text-to-Video Generation with Transparency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Luozhou and Li, Yijun and Chen, Zhifei and Wang, Jui-Hsien and Zhang, Zhifei and Zhang, He and Lin, Zhe and Chen, Ying-Cong}, title = {TransPixeler: Advancing Text-to-Video Generation with Transparency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18229-18239} }
Adaptive Keyframe Sampling for Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Xi and Qiu, Jihao and Xie, Lingxi and Tian, Yunjie and Jiao, Jianbin and Ye, Qixiang}, title = {Adaptive Keyframe Sampling for Long Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29118-29128} }
What's in the Image? A Deep-Dive into the Vision of Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Kaduri_2025_CVPR, author = {Kaduri, Omri and Bagon, Shai and Dekel, Tali}, title = {What's in the Image? A Deep-Dive into the Vision of Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14549-14558} }
Person De-reidentification: A Variation-guided Identity Shift Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Yi-Xing and Tang, Yu-Ming and Lin, Kun-Yu and Yang, Qize and Meng, Jingke and Wei, Xihan and Zheng, Wei-Shi}, title = {Person De-reidentification: A Variation-guided Identity Shift Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29331-29341} }
FreeSim: Toward Free-viewpoint Camera Simulation in Driving Scenes-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Lue and Zhang, Hao and Wang, Qitai and Li, Hongsheng and Zhang, Zhaoxiang}, title = {FreeSim: Toward Free-viewpoint Camera Simulation in Driving Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12004-12014} }
Gradient Inversion Attacks on Parameter-Efficient Fine-Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Sami_2025_CVPR, author = {Sami, Hasin Us and Sen, Swapneel and Roy-Chowdhury, Amit K. and Krishnamurthy, Srikanth V. and Guler, Basak}, title = {Gradient Inversion Attacks on Parameter-Efficient Fine-Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10224-10234} }
UPME: An Unsupervised Peer Review Framework for Multimodal Large Language Model Evaluation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Qihui and Ning, Munan and Liu, Zheyuan and Huang, Yue and Yang, Shuo and Wang, Yanbo and Ye, Jiayi and Chen, Xiao and Song, Yibing and Yuan, Li}, title = {UPME: An Unsupervised Peer Review Framework for Multimodal Large Language Model Evaluation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9165-9174} }
DiGIT: Multi-Dilated Gated Encoder and Central-Adjacent Region Integrated Decoder for Temporal Action Detection Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Ho-Joong and Lee, Yearang and Hong, Jung-Ho and Lee, Seong-Whan}, title = {DiGIT: Multi-Dilated Gated Encoder and Central-Adjacent Region Integrated Decoder for Temporal Action Detection Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24286-24296} }
MBQ: Modality-Balanced Quantization for Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Shiyao and Hu, Yingchun and Ning, Xuefei and Liu, Xihui and Hong, Ke and Jia, Xiaotao and Li, Xiuhong and Yan, Yaqi and Ran, Pei and Dai, Guohao and Yan, Shengen and Yang, Huazhong and Wang, Yu}, title = {MBQ: Modality-Balanced Quantization for Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4167-4177} }
Florence-VL: Enhancing Vision-Language Models with Generative Vision Encoder and Depth-Breadth Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Jiuhai and Yang, Jianwei and Wu, Haiping and Li, Dianqi and Gao, Jianfeng and Zhou, Tianyi and Xiao, Bin}, title = {Florence-VL: Enhancing Vision-Language Models with Generative Vision Encoder and Depth-Breadth Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24928-24938} }
VideoDPO: Omni-Preference Alignment for Video Diffusion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Runtao and Wu, Haoyu and Zheng, Ziqiang and Wei, Chen and He, Yingqing and Pi, Renjie and Chen, Qifeng}, title = {VideoDPO: Omni-Preference Alignment for Video Diffusion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8009-8019} }
Seq2Time: Sequential Knowledge Transfer for Video LLM Temporal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Andong and Gao, Zhongpai and Choudhuri, Anwesa and Planche, Benjamin and Zheng, Meng and Wang, Bin and Chen, Terrence and Chen, Chen and Wu, Ziyan}, title = {Seq2Time: Sequential Knowledge Transfer for Video LLM Temporal Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13766-13775} }
GPVK-VL: Geometry-Preserving Virtual Keyframes for Visual Localization under Large Viewpoint Changes-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yunxuan and Fan, Lei and Xing, Xiaoying and Zhou, Jianxiong and Wu, Ying}, title = {GPVK-VL: Geometry-Preserving Virtual Keyframes for Visual Localization under Large Viewpoint Changes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16728-16738} }
Realistic Test-Time Adaptation of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zanella_2025_CVPR, author = {Zanella, Maxime and Fuchs, Cl\'ement and De Vleeschouwer, Christophe and Ben Ayed, Ismail}, title = {Realistic Test-Time Adaptation of Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25103-25112} }
SelfSplat: Pose-Free and 3D Prior-Free Generalizable 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_CVPR, author = {Kang, Gyeongjin and Yoo, Jisang and Park, Jihyeon and Nam, Seungtae and Im, Hyeonsoo and Shin, Sangheon and Kim, Sangpil and Park, Eunbyung}, title = {SelfSplat: Pose-Free and 3D Prior-Free Generalizable 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22012-22022} }
Enhancing Virtual Try-On with Synthetic Pairs and Error-Aware Noise Scheduling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Nannan and Shih, Kevin J. and Plummer, Bryan A.}, title = {Enhancing Virtual Try-On with Synthetic Pairs and Error-Aware Noise Scheduling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21238-21247} }
Exploring Simple Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Zihang}, title = {Exploring Simple Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30221-30230} }
MP-GUI: Modality Perception with MLLMs for GUI Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ziwei and Chen, Weizhi and Yang, Leyang and Zhou, Sheng and Zhao, Shengchu and Zhan, Hanbei and Jin, Jiongchao and Li, Liangcheng and Shao, Zirui and Bu, Jiajun}, title = {MP-GUI: Modality Perception with MLLMs for GUI Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29711-29721} }
Associative Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Yuwei and Ochiai, Hideya and Wu, Zhirong and Lin, Stephen and Kanai, Ryota}, title = {Associative Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4518-4527} }
Improving Adversarial Transferability on Vision Transformers via Forward Propagation Refinement-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Yuchen and Zhao, Zhengyu and Lin, Chenhao and Yang, Bo and Zhou, Lu and Liu, Zhe and Shen, Chao}, title = {Improving Adversarial Transferability on Vision Transformers via Forward Propagation Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25071-25080} }
Seeing What Matters: Empowering CLIP with Patch Generation-to-Selection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pei_2025_CVPR, author = {Pei, Gensheng and Chen, Tao and Wang, Yujia and Cai, Xinhao and Shu, Xiangbo and Zhou, Tianfei and Yao, Yazhou}, title = {Seeing What Matters: Empowering CLIP with Patch Generation-to-Selection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24862-24872} }
ChatGarment: Garment Estimation, Generation and Editing via Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bian_2025_CVPR, author = {Bian, Siyuan and Xu, Chenghao and Xiu, Yuliang and Grigorev, Artur and Liu, Zhen and Lu, Cewu and Black, Michael J. and Feng, Yao}, title = {ChatGarment: Garment Estimation, Generation and Editing via Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2924-2934} }
Enhancing Online Continual Learning with Plug-and-Play State Space Model and Class-Conditional Mixture of Discretization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Sihao and Yang, Yibo and Li, Xiaojie and Clifton, David A. and Ghanem, Bernard}, title = {Enhancing Online Continual Learning with Plug-and-Play State Space Model and Class-Conditional Mixture of Discretization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20502-20511} }
RDD: Robust Feature Detector and Descriptor using Deformable Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Gonglin and Fu, Tianwen and Chen, Haiwei and Teng, Wenbin and Xiao, Hanyuan and Zhao, Yajie}, title = {RDD: Robust Feature Detector and Descriptor using Deformable Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6394-6403} }
Building Vision Models upon Heat Conduction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhaozhi and Liu, Yue and Tian, Yunjie and Liu, Yunfan and Wang, Yaowei and Ye, Qixiang}, title = {Building Vision Models upon Heat Conduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9707-9717} }
GRAPHGPT-O: Synergistic Multimodal Comprehension and Generation on Graphs-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Yi and Jin, Bowen and Shen, Jiacheng and Ding, Sirui and Tan, Qiaoyu and Han, Jiawei}, title = {GRAPHGPT-O: Synergistic Multimodal Comprehension and Generation on Graphs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19467-19476} }
Model Poisoning Attacks to Federated Learning via Multi-Round Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Yueqi and Fang, Minghong and Gong, Neil Zhenqiang}, title = {Model Poisoning Attacks to Federated Learning via Multi-Round Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15454-15463} }
TaoAvatar: Real-Time Lifelike Full-Body Talking Avatars for Augmented Reality via 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Jianchuan and Hu, Jingchuan and Wang, Gaige and Jiang, Zhonghua and Zhou, Tiansong and Chen, Zhiwen and Lv, Chengfei}, title = {TaoAvatar: Real-Time Lifelike Full-Body Talking Avatars for Augmented Reality via 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10723-10734} }
Erasing Undesirable Influence in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Jing and Le, Trung and Hayat, Munawar and Harandi, Mehrtash}, title = {Erasing Undesirable Influence in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28263-28273} }
LT3SD: Latent Trees for 3D Scene Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Meng_2025_CVPR, author = {Meng, Quan and Li, Lei and Nie{\ss}ner, Matthias and Dai, Angela}, title = {LT3SD: Latent Trees for 3D Scene Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {650-660} }
Stacking Brick by Brick: Aligned Feature Isolation for Incremental Face Forgery Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Jikang and Yan, Zhiyuan and Zhang, Ying and Hao, Li and Ai, Jiaxin and Zou, Qin and Li, Chen and Wang, Zhongyuan}, title = {Stacking Brick by Brick: Aligned Feature Isolation for Incremental Face Forgery Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13927-13936} }
CO-SPY: Combining Semantic and Pixel Features to Detect Synthetic Images by AI-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Siyuan and Lyu, Lingjuan and Wang, Zhenting and Zhang, Xiangyu and Sehwag, Vikash}, title = {CO-SPY: Combining Semantic and Pixel Features to Detect Synthetic Images by AI}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13455-13465} }
Closest Neighbors are Harmful for Lightweight Masked Auto-encoders-
[pdf]
[supp]
[bibtex]@InProceedings{Meng_2025_CVPR, author = {Meng, Jian and Hasssan, Ahmed and Yang, Li and Fan, Deliang and Shin, Jinwoo and Seo, Jae-sun}, title = {Closest Neighbors are Harmful for Lightweight Masked Auto-encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25230-25239} }
CraftsMan3D: High-fidelity Mesh Generation with 3D Native Diffusion and Interactive Geometry Refiner-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Weiyu and Liu, Jiarui and Yan, Hongyu and Chen, Rui and Liang, Yixun and Chen, Xuelin and Tan, Ping and Long, Xiaoxiao}, title = {CraftsMan3D: High-fidelity Mesh Generation with 3D Native Diffusion and Interactive Geometry Refiner}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5307-5317} }
Decouple-Then-Merge: Finetune Diffusion Models as Multi-Task Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Qianli and Ning, Xuefei and Liu, Dongrui and Niu, Li and Zhang, Linfeng}, title = {Decouple-Then-Merge: Finetune Diffusion Models as Multi-Task Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23281-23291} }
GIF: Generative Inspiration for Face Recognition at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ebrahimi_2025_CVPR, author = {Ebrahimi, Saeed and Rahimi, Sahar and Dabouei, Ali and Das, Srinjoy and Dawson, Jeremy M. and Nasrabadi, Nasser M.}, title = {GIF: Generative Inspiration for Face Recognition at Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3528-3539} }
HELVIPAD: A Real-World Dataset for Omnidirectional Stereo Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zayene_2025_CVPR, author = {Zayene, Mehdi and Endres, Jannik and Havolli, Albias and Corbi\`ere, Charles and Cherkaoui, Salim and Kontouli, Alexandre and Alahi, Alexandre}, title = {HELVIPAD: A Real-World Dataset for Omnidirectional Stereo Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26975-26984} }
GENMANIP: LLM-driven Simulation for Generalizable Instruction-Following Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Ning and Chen, Yilun and Yang, Shuai and Chen, Xinyi and Tian, Yang and Li, Hao and Huang, Haifeng and Wang, Hanqing and Wang, Tai and Pang, Jiangmiao}, title = {GENMANIP: LLM-driven Simulation for Generalizable Instruction-Following Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12187-12198} }
SKDream: Controllable Multi-view and 3D Generation with Arbitrary Skeletons-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yuanyou and Yang, Zongxin and Yang, Yi}, title = {SKDream: Controllable Multi-view and 3D Generation with Arbitrary Skeletons}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {314-325} }
Towards Enhanced Image Inpainting: Mitigating Unwanted Object Insertion and Preserving Color Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yikai and Cao, Chenjie and Yu, Junqiu and Fan, Ke and Xue, Xiangyang and Fu, Yanwei}, title = {Towards Enhanced Image Inpainting: Mitigating Unwanted Object Insertion and Preserving Color Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23237-23248} }
Optimus-2: Multimodal Minecraft Agent with Goal-Observation-Action Conditioned Policy-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zaijing and Xie, Yuquan and Shao, Rui and Chen, Gongwei and Jiang, Dongmei and Nie, Liqiang}, title = {Optimus-2: Multimodal Minecraft Agent with Goal-Observation-Action Conditioned Policy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9039-9049} }
Classic Video Denoising in a Machine Learning World: Robust, Fast, and Controllable-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Xin and Niklaus, Simon and Zhang, Zhoutong and Xia, Zhihao and Guo, Chunle and Yang, Yuting and Chen, Jiawen and Li, Chongyi}, title = {Classic Video Denoising in a Machine Learning World: Robust, Fast, and Controllable}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2084-2093} }
Practical Solutions to the Relative Pose of Three Calibrated Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tzamos_2025_CVPR, author = {Tzamos, Charalambos and Kocur, Viktor and Ding, Yaqing and Barath, Daniel and Haladova, Zuzana Berger and Sattler, Torsten and Kukelova, Zuzana}, title = {Practical Solutions to the Relative Pose of Three Calibrated Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21913-21923} }
Localized Concept Erasure for Text-to-Image Diffusion Models Using Training-Free Gated Low-Rank Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Byung Hyun and Lim, Sungjin and Chun, Se Young}, title = {Localized Concept Erasure for Text-to-Image Diffusion Models Using Training-Free Gated Low-Rank Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18596-18606} }
PARC: A Quantitative Framework Uncovering the Symmetries within Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Schmalfuss_2025_CVPR, author = {Schmalfuss, Jenny and Chang, Nadine and VS, Vibashan and Shen, Maying and Bruhn, Andres and Alvarez, Jose M.}, title = {PARC: A Quantitative Framework Uncovering the Symmetries within Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25081-25091} }
RoboTwin: Dual-Arm Robot Benchmark with Generative Digital Twins-
[pdf]
[supp]
[bibtex]@InProceedings{Mu_2025_CVPR, author = {Mu, Yao and Chen, Tianxing and Chen, Zanxin and Peng, Shijia and Lan, Zhiqian and Gao, Zeyu and Liang, Zhixuan and Yu, Qiaojun and Zou, Yude and Xu, Mingkun and Lin, Lunkai and Xie, Zhiqiang and Ding, Mingyu and Luo, Ping}, title = {RoboTwin: Dual-Arm Robot Benchmark with Generative Digital Twins}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27649-27660} }
Population Normalization for Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhuoyao and Yi, Fan and Gong, Peizhu and He, Caitou and Jin, Cheng and Zhang, Weizhong}, title = {Population Normalization for Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10214-10223} }
AnimateAnything: Consistent and Controllable Animation for Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2025_CVPR, author = {Lei, Guojun and Wang, Chi and Zhang, Rong and Wang, Yikai and Li, Hong and Xu, Weiwei}, title = {AnimateAnything: Consistent and Controllable Animation for Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27946-27956} }
PRaDA: Projective Radial Distortion Averaging-
[pdf]
[supp]
[bibtex]@InProceedings{Sinitsyn_2025_CVPR, author = {Sinitsyn, Daniil and H\"arenstam-Nielsen, Linus and Cremers, Daniel}, title = {PRaDA: Projective Radial Distortion Averaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21902-21912} }
GenAssets: Generating in-the-wild 3D Assets in Latent Space-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Ze and Wang, Jingkang and Zhang, Haowei and Manivasagam, Sivabalan and Chen, Yun and Urtasun, Raquel}, title = {GenAssets: Generating in-the-wild 3D Assets in Latent Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22392-22403} }
RipVIS: Rip Currents Video Instance Segmentation Benchmark for Beach Monitoring and Safety-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dumitriu_2025_CVPR, author = {Dumitriu, Andrei and Tatui, Florin and Miron, Florin and Ralhan, Aakash and Ionescu, Radu Tudor and Timofte, Radu}, title = {RipVIS: Rip Currents Video Instance Segmentation Benchmark for Beach Monitoring and Safety}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3427-3437} }
Low-Rank Adaptation in Multilinear Operator Networks for Security-Preserving Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Ta_2025_CVPR, author = {Ta, Huu Binh and Nguyen, Duc and Tran, Quyen and Tran, Toan and Pham, Tung}, title = {Low-Rank Adaptation in Multilinear Operator Networks for Security-Preserving Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24341-24350} }
Camera Resection from Known Line Pencils and a Radially Distorted Scanline-
[pdf]
[bibtex]@InProceedings{Dibene_2025_CVPR, author = {Dibene, Juan C. and Dunn, Enrique}, title = {Camera Resection from Known Line Pencils and a Radially Distorted Scanline}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15843-15851} }
ESCAPE: Equivariant Shape Completion via Anchor Point Encoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bekci_2025_CVPR, author = {Bekci, Burak and Navab, Nassir and Tombari, Federico and Saleh, Mahdi}, title = {ESCAPE: Equivariant Shape Completion via Anchor Point Encoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6480-6489} }
SPC-GS: Gaussian Splatting with Semantic-Prompt Consistency for Indoor Open-World Free-view Synthesis from Sparse Inputs-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2025_CVPR, author = {Liao, Guibiao and Li, Qing and Bao, Zhenyu and Qiu, Guoping and Liu, Kanglin}, title = {SPC-GS: Gaussian Splatting with Semantic-Prompt Consistency for Indoor Open-World Free-view Synthesis from Sparse Inputs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11264-11274} }
M3amba: Memory Mamba is All You Need for Whole Slide Image Classification-
[pdf]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Tingting and Jiang, Kui and Xiao, Yi and Zhao, Sicheng and Yao, Hongxun}, title = {M3amba: Memory Mamba is All You Need for Whole Slide Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15601-15610} }
Satellite to GroundScape - Large-scale Consistent Ground View Generation from Satellite Views-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Ningli and Qin, Rongjun}, title = {Satellite to GroundScape - Large-scale Consistent Ground View Generation from Satellite Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6068-6077} }
Variance-Based Membership Inference Attacks Against Large-Scale Image Captioning Models-
[pdf]
[supp]
[bibtex]@InProceedings{Samira_2025_CVPR, author = {Samira, Daniel and Habler, Edan and Elovici, Yuval and Shabtai, Asaf}, title = {Variance-Based Membership Inference Attacks Against Large-Scale Image Captioning Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9210-9219} }
Redefining <Creative> in Dictionary: Towards an Enhanced Semantic Understanding of Creative Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Fu and Xie, Yucheng and Yang, Xu and Wang, Jing and Geng, Xin}, title = {Redefining \ensuremath{<}Creative\ensuremath{>} in Dictionary: Towards an Enhanced Semantic Understanding of Creative Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18444-18454} }
FiRe: Fixed-points of Restoration Priors for Solving Inverse Problems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Terris_2025_CVPR, author = {Terris, Matthieu and Kamilov, Ulugbek S. and Moreau, Thomas}, title = {FiRe: Fixed-points of Restoration Priors for Solving Inverse Problems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23185-23194} }
Learning Dynamic Collaborative Network for Semi-supervised 3D Vessel Segmentation-
[pdf]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Jiao and Chen, Xin and Zhang, Lihe}, title = {Learning Dynamic Collaborative Network for Semi-supervised 3D Vessel Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10445-10454} }
Temporal Alignment-Free Video Matching for Few-shot Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, SuBeen and Moon, WonJun and Seong, Hyun Seok and Heo, Jae-Pil}, title = {Temporal Alignment-Free Video Matching for Few-shot Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5412-5421} }
OSLoPrompt: Bridging Low-Supervision Challenges and Open-Set Domain Generalization in CLIP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{C_2025_CVPR, author = {C, Mohamad Hassan N and Gupta, Divyam and Singha, Mainak and Rongali, Sai Bhargav and Jha, Ankit and Khan, Muhammad Haris and Banerjee, Biplab}, title = {OSLoPrompt: Bridging Low-Supervision Challenges and Open-Set Domain Generalization in CLIP}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10110-10120} }
Dinomaly: The Less Is More Philosophy in Multi-Class Unsupervised Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Jia and Lu, Shuai and Zhang, Weihang and Chen, Fang and Li, Huiqi and Liao, Hongen}, title = {Dinomaly: The Less Is More Philosophy in Multi-Class Unsupervised Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20405-20415} }
VLog: Video-Language Models by Generative Retrieval of Narration Vocabulary-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Kevin Qinghong and Shou, Mike Zheng}, title = {VLog: Video-Language Models by Generative Retrieval of Narration Vocabulary}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3218-3228} }
CoMBO: Conflict Mitigation via Branched Optimization for Class Incremental Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Kai and Zhang, Anqi and Gao, Guangyu and Jiao, Jianbo and Liu, Chi Harold and Wei, Yunchao}, title = {CoMBO: Conflict Mitigation via Branched Optimization for Class Incremental Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25667-25676} }
Forensics-Bench: A Comprehensive Forgery Detection Benchmark Suite for Large Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jin and Lv, Chenghui and Li, Xian and Dong, Shichao and Li, Huadong and Yao, Kelu and Li, Chao and Shao, Wenqi and Luo, Ping}, title = {Forensics-Bench: A Comprehensive Forgery Detection Benchmark Suite for Large Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4233-4245} }
Detect-and-Guide: Self-regulation of Diffusion Models for Safe Text-to-Image Generation via Guideline Token Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Feifei and Zhang, Mi and Sun, Yiming and Yang, Min}, title = {Detect-and-Guide: Self-regulation of Diffusion Models for Safe Text-to-Image Generation via Guideline Token Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13252-13262} }
MirrorVerse: Pushing Diffusion Models to Realistically Reflect the World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dhiman_2025_CVPR, author = {Dhiman, Ankit and Shah, Manan and Babu, R Venkatesh}, title = {MirrorVerse: Pushing Diffusion Models to Realistically Reflect the World}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11239-11249} }
EAP-GS: Efficient Augmentation of Pointcloud for 3D Gaussian Splatting in Few-shot Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Dongrui and Xing, Yuxiang}, title = {EAP-GS: Efficient Augmentation of Pointcloud for 3D Gaussian Splatting in Few-shot Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16498-16507} }
Empowering Large Language Models with 3D Situation Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Zhihao and Peng, Yibo and Ren, Jinke and Liao, Yinghong and Han, Yatong and Feng, Chun-Mei and Zhao, Hengshuang and Li, Guanbin and Cui, Shuguang and Li, Zhen}, title = {Empowering Large Language Models with 3D Situation Awareness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19435-19445} }
Forensic Self-Descriptions Are All You Need for Zero-Shot Detection, Open-Set Source Attribution, and Clustering of AI-generated Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Tai D. and Azizpour, Aref and Stamm, Matthew C.}, title = {Forensic Self-Descriptions Are All You Need for Zero-Shot Detection, Open-Set Source Attribution, and Clustering of AI-generated Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3040-3050} }
EchoTraffic: Enhancing Traffic Anomaly Understanding with Audio-Visual Insights-
[pdf]
[supp]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Zhenghao and Chen, Hao and Xie, Binzhu and Xu, Jiaqi and Guo, Ziyu and Xu, Xuemiao and Hao, Jianye and Fu, Chi-Wing and Hu, Xiaowei and Heng, Pheng-Ann}, title = {EchoTraffic: Enhancing Traffic Anomaly Understanding with Audio-Visual Insights}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19098-19108} }
FlexDrive: Toward Trajectory Flexibility in Driving Scene Gaussian Splatting Reconstruction and Rendering-
[pdf]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jingqiu and Fan, Lue and Huang, Linjiang and Shi, Xiaoyu and Liu, Si and Zhang, Zhaoxiang and Li, Hongsheng}, title = {FlexDrive: Toward Trajectory Flexibility in Driving Scene Gaussian Splatting Reconstruction and Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1549-1558} }
Taming Video Diffusion Prior with Scene-Grounding Guidance for 3D Gaussian Splatting from Sparse Inputs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_CVPR, author = {Zhong, Yingji and Li, Zhihao and Chen, Dave Zhenyu and Hong, Lanqing and Xu, Dan}, title = {Taming Video Diffusion Prior with Scene-Grounding Guidance for 3D Gaussian Splatting from Sparse Inputs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6133-6143} }
Interactive Medical Image Segmentation: A Benchmark Dataset and Baseline-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Junlong and Fu, Bin and Ye, Jin and Wang, Guoan and Li, Tianbin and Wang, Haoyu and Li, Ruoyu and Yao, He and Cheng, Junren and Li, Jingwen and Su, Yanzhou and Zhu, Min and He, Junjun}, title = {Interactive Medical Image Segmentation: A Benchmark Dataset and Baseline}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20841-20851} }
GigaHands: A Massive Annotated Dataset of Bimanual Hand Activities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Rao and Zhang, Dingxi and Jiang, Alex and Fu, Wanjia and Funk, Austin and Ritchie, Daniel and Sridhar, Srinath}, title = {GigaHands: A Massive Annotated Dataset of Bimanual Hand Activities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17461-17474} }
AutoSSVH: Exploring Automated Frame Sampling for Efficient Self-Supervised Video Hashing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lian_2025_CVPR, author = {Lian, Niu and Li, Jun and Wang, Jinpeng and Luo, Ruisheng and Wang, Yaowei and Xia, Shu-Tao and Chen, Bin}, title = {AutoSSVH: Exploring Automated Frame Sampling for Efficient Self-Supervised Video Hashing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18881-18890} }
Augmenting Multimodal LLMs with Self-Reflective Tokens for Knowledge-based Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cocchi_2025_CVPR, author = {Cocchi, Federico and Moratelli, Nicholas and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Augmenting Multimodal LLMs with Self-Reflective Tokens for Knowledge-based Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9199-9209} }
DifIISR: A Diffusion Model with Gradient Guidance for Infrared Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xingyuan and Wang, Zirui and Zou, Yang and Chen, Zhixin and Ma, Jun and Jiang, Zhiying and Ma, Long and Liu, Jinyuan}, title = {DifIISR: A Diffusion Model with Gradient Guidance for Infrared Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7534-7544} }
Recurrent Feature Mining and Keypoint Mixup Padding for Category-Agnostic Pose Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Junjie and Chen, Weilong and Zuo, Yifan and Fang, Yuming}, title = {Recurrent Feature Mining and Keypoint Mixup Padding for Category-Agnostic Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22035-22044} }
FrugalNeRF: Fast Convergence for Extreme Few-shot Novel View Synthesis without Learned Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Chin-Yang and Wu, Chung-Ho and Yeh, Chang-Han and Yen, Shih-Han and Sun, Cheng and Liu, Yu-Lun}, title = {FrugalNeRF: Fast Convergence for Extreme Few-shot Novel View Synthesis without Learned Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11227-11238} }
3D-GSW: 3D Gaussian Splatting for Robust Watermarking-
[pdf]
[supp]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Youngdong and Park, Hyunje and Yang, Feng and Ko, Heeju and Choo, Euijin and Kim, Sangpil}, title = {3D-GSW: 3D Gaussian Splatting for Robust Watermarking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5938-5948} }
Pioneering 4-Bit FP Quantization for Diffusion Models: Mixup-Sign Quantization and Timestep-Aware Fine-Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Maosen and Chen, Pengtao and Yu, Chong and Wen, Yan and Tan, Xudong and Chen, Tao}, title = {Pioneering 4-Bit FP Quantization for Diffusion Models: Mixup-Sign Quantization and Timestep-Aware Fine-Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18134-18143} }
OpenING: A Comprehensive Benchmark for Judging Open-ended Interleaved Image-Text Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Pengfei and Peng, Xiaopeng and Song, Jiajun and Li, Chuanhao and Xu, Zhaopan and Yang, Yue and Guo, Ziyao and Zhang, Hao and Lin, Yuqi and He, Yefei and Zhao, Lirui and Liu, Shuo and Li, Tianhua and Xie, Yuxuan and Chang, Xiaojun and Qiao, Yu and Shao, Wenqi and Zhang, Kaipeng}, title = {OpenING: A Comprehensive Benchmark for Judging Open-ended Interleaved Image-Text Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {56-66} }
Dual Exposure Stereo for Extended Dynamic Range 3D Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2025_CVPR, author = {Choi, Juhyung and Kim, Jinnyeong and Choi, Seokjun and Lee, Jinwoo and Brucker, Samuel and Bijelic, Mario and Heide, Felix and Baek, Seung-Hwan}, title = {Dual Exposure Stereo for Extended Dynamic Range 3D Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6283-6293} }
Unbiasing through Textual Descriptions: Mitigating Representation Bias in Video Benchmarks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shvetsova_2025_CVPR, author = {Shvetsova, Nina and Nagrani, Arsha and Schiele, Bernt and Kuehne, Hilde and Rupprecht, Christian}, title = {Unbiasing through Textual Descriptions: Mitigating Representation Bias in Video Benchmarks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29050-29059} }
Embodied Scene Understanding for Vision Language Models via MetaVQA-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Weizhen and Duan, Chenda and Peng, Zhenghao and Liu, Yuxin and Zhou, Bolei}, title = {Embodied Scene Understanding for Vision Language Models via MetaVQA}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22453-22464} }
CompGS: Unleashing 2D Compositionality for Compositional Text-to-3D via Dynamically Optimizing 3D Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2025_CVPR, author = {Ge, Chongjian and Xu, Chenfeng and Ji, Yuanfeng and Peng, Chensheng and Tomizuka, Masayoshi and Luo, Ping and Ding, Mingyu and Jampani, Varun and Zhan, Wei}, title = {CompGS: Unleashing 2D Compositionality for Compositional Text-to-3D via Dynamically Optimizing 3D Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18509-18520} }
Learning Temporally Consistent Video Depth from Video Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2025_CVPR, author = {Shao, Jiahao and Yang, Yuanbo and Zhou, Hongyu and Zhang, Youmin and Shen, Yujun and Guizilini, Vitor and Wang, Yue and Poggi, Matteo and Liao, Yiyi}, title = {Learning Temporally Consistent Video Depth from Video Diffusion Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22841-22852} }
FIRE: Robust Detection of Diffusion-Generated Images via Frequency-Guided Reconstruction Error-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chu_2025_CVPR, author = {Chu, Beilin and Xu, Xuan and Wang, Xin and Zhang, Yufei and You, Weike and Zhou, Linna}, title = {FIRE: Robust Detection of Diffusion-Generated Images via Frequency-Guided Reconstruction Error}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12830-12839} }
Assessing and Learning Alignment of Unimodal Vision and Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Le and Yang, Qian and Agrawal, Aishwarya}, title = {Assessing and Learning Alignment of Unimodal Vision and Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14604-14614} }
Samba: A Unified Mamba-based Framework for General Salient Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Jiahao and Fu, Keren and Liu, Xiaohong and Zhao, Qijun}, title = {Samba: A Unified Mamba-based Framework for General Salient Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25314-25324} }
Action Detail Matters: Refining Video Recognition with Local Action Queries-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Mengmeng and Huang, Zeyi and Kong, Xiangjie and Shen, Guojiang and Dai, Guang and Wang, Jingdong and Liu, Yong}, title = {Action Detail Matters: Refining Video Recognition with Local Action Queries}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19132-19142} }
PAVE: Patching and Adapting Video Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhuoming and Li, Yiquan and Nguyen, Khoi Duc and Zhong, Yiwu and Li, Yin}, title = {PAVE: Patching and Adapting Video Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3306-3317} }
LesionLocator: Zero-Shot Universal Tumor Segmentation and Tracking in 3D Whole-Body Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rokuss_2025_CVPR, author = {Rokuss, Maximilian and Kirchhoff, Yannick and Akbal, Seval and Kovacs, Balint and Roy, Saikat and Ulrich, Constantin and Wald, Tassilo and Rotkopf, Lukas T. and Schlemmer, Heinz-Peter and Maier-Hein, Klaus}, title = {LesionLocator: Zero-Shot Universal Tumor Segmentation and Tracking in 3D Whole-Body Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30872-30885} }
Generative Map Priors for Collaborative BEV Semantic Segmentation-
[pdf]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Jiahui and Gong, Yue and Wang, Luting and Zhang, Shifeng and Zhou, Xu and Liu, Si}, title = {Generative Map Priors for Collaborative BEV Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11919-11928} }
Coherent 3D Portrait Video Reconstruction via Triplane Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shengze and Li, Xueting and Liu, Chao and Chan, Matthew and Stengel, Michael and Fuchs, Henry and De Mello, Shalini and Nagano, Koki}, title = {Coherent 3D Portrait Video Reconstruction via Triplane Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10712-10722} }
Generative Image Layer Decomposition with Visual Effects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jinrui and Liu, Qing and Li, Yijun and Kim, Soo Ye and Pakhomov, Daniil and Ren, Mengwei and Zhang, Jianming and Lin, Zhe and Xie, Cihang and Zhou, Yuyin}, title = {Generative Image Layer Decomposition with Visual Effects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7643-7653} }
AR-Diffusion: Asynchronous Video Generation with Auto-Regressive Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Mingzhen and Wang, Weining and Li, Gen and Liu, Jiawei and Sun, Jiahui and Feng, Wanquan and Lao, Shanshan and Zhou, Siyu and He, Qian and Liu, Jing}, title = {AR-Diffusion: Asynchronous Video Generation with Auto-Regressive Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7364-7373} }
ManiVideo: Generating Hand-Object Manipulation Video with Dexterous and Generalizable Grasping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pang_2025_CVPR, author = {Pang, Youxin and Shao, Ruizhi and Zhang, Jiajun and Tu, Hanzhang and Liu, Yun and Zhou, Boyao and Zhang, Hongwen and Liu, Yebin}, title = {ManiVideo: Generating Hand-Object Manipulation Video with Dexterous and Generalizable Grasping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12209-12219} }
DOF-GS: Adjustable Depth-of-Field 3D Gaussian Splatting for Post-Capture Refocusing, Defocus Rendering and Blur Removal-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yujie and Chakravarthula, Praneeth and Chen, Baoquan}, title = {DOF-GS: Adjustable Depth-of-Field 3D Gaussian Splatting for Post-Capture Refocusing, Defocus Rendering and Blur Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21297-21306} }
The Photographer's Eye: Teaching Multimodal Large Language Models to See, and Critique Like Photographers-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2025_CVPR, author = {Qi, Daiqing and Zhao, Handong and Shi, Jing and Jenni, Simon and Fan, Yifei and Dernoncourt, Franck and Cohen, Scott and Li, Sheng}, title = {The Photographer's Eye: Teaching Multimodal Large Language Models to See, and Critique Like Photographers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24807-24816} }
Revisiting Audio-Visual Segmentation with Vision-Centric Transformer-
[pdf]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Shaofei and Ling, Rui and Hui, Tianrui and Li, Hongyu and Zhou, Xu and Zhang, Shifeng and Liu, Si and Hong, Richang and Wang, Meng}, title = {Revisiting Audio-Visual Segmentation with Vision-Centric Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8352-8361} }
Synergizing Motion and Appearance: Multi-Scale Compensatory Codebooks for Talking Head Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shuling and Hong, Fa-Ting and Huang, Xiaoshui and Xu, Dan}, title = {Synergizing Motion and Appearance: Multi-Scale Compensatory Codebooks for Talking Head Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26232-26241} }
HOIGPT: Learning Long-Sequence Hand-Object Interaction with Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Mingzhen and Chu, Fu-Jen and Tekin, Bugra and Liang, Kevin J. and Ma, Haoyu and Wang, Weiyao and Chen, Xingyu and Gleize, Pierre and Xue, Hongfei and Lyu, Siwei and Kitani, Kris and Feiszli, Matt and Tang, Hao}, title = {HOIGPT: Learning Long-Sequence Hand-Object Interaction with Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7136-7146} }
GraphI2P: Image-to-Point Cloud Registration with Exploring Pattern of Correspondence via Graph Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Bie_2025_CVPR, author = {Bie, Lin and Pan, Shouan and Li, Siqi and Zhao, Yining and Gao, Yue}, title = {GraphI2P: Image-to-Point Cloud Registration with Exploring Pattern of Correspondence via Graph Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22161-22171} }
SoftVQ-VAE: Efficient 1-Dimensional Continuous Tokenizer-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Hao and Wang, Ze and Li, Xiang and Sun, Ximeng and Chen, Fangyi and Liu, Jiang and Wang, Jindong and Raj, Bhiksha and Liu, Zicheng and Barsoum, Emad}, title = {SoftVQ-VAE: Efficient 1-Dimensional Continuous Tokenizer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28358-28370} }
FedCS: Coreset Selection for Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Hao_2025_CVPR, author = {Hao, Chenhe and Xie, Weiying and Li, Daixun and Qin, Haonan and Ye, Hangyu and Fang, Leyuan and Li, Yunsong}, title = {FedCS: Coreset Selection for Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15434-15443} }
DPC: Dual-Prompt Collaboration for Tuning Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Haoyang and Wang, Liang and Wang, Chao and Jiang, Jing and Peng, Yan and Long, Guodong}, title = {DPC: Dual-Prompt Collaboration for Tuning Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25623-25632} }
Dual-Granularity Semantic Guided Sparse Routing Diffusion Model for General Pansharpening-
[pdf]
[supp]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Yinghui and Qu, Litao and Zhang, Shizhou and Xu, Di and Yang, Yingkun and Zhang, Yanning}, title = {Dual-Granularity Semantic Guided Sparse Routing Diffusion Model for General Pansharpening}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12658-12668} }
AIM-Fair: Advancing Algorithmic Fairness via Selectively Fine-Tuning Biased Models with Contextual Synthetic Data-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zengqun and Liu, Ziquan and Cao, Yu and Gong, Shaogang and Patras, Ioannis}, title = {AIM-Fair: Advancing Algorithmic Fairness via Selectively Fine-Tuning Biased Models with Contextual Synthetic Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28748-28758} }
Robust Multi-Object 4D Generation for In-the-wild Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Chu_2025_CVPR, author = {Chu, Wen-Hsuan and Ke, Lei and Liu, Jianmeng and Huo, Mingxiao and Tokmakov, Pavel and Fragkiadaki, Katerina}, title = {Robust Multi-Object 4D Generation for In-the-wild Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22067-22077} }
OmniMMI: A Comprehensive Multi-modal Interaction Benchmark in Streaming Video Contexts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuxuan and Wang, Yueqian and Chen, Bo and Wu, Tong and Zhao, Dongyan and Zheng, Zilong}, title = {OmniMMI: A Comprehensive Multi-modal Interaction Benchmark in Streaming Video Contexts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18925-18935} }
SOAP: Vision-Centric 3D Semantic Scene Completion with Scene-Adaptive Decoder and Occluded Region-Aware View Projection-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Hyo-Jun and Koh, Yeong Jun and Kim, Hanul and Kim, Hyunseop and Lee, Yonguk and Lee, Jinu}, title = {SOAP: Vision-Centric 3D Semantic Scene Completion with Scene-Adaptive Decoder and Occluded Region-Aware View Projection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17145-17154} }
Taxonomy-Aware Evaluation of Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Snaebjarnarson_2025_CVPR, author = {Sn{\ae}bjarnarson, V\'esteinn and Du, Kevin and Stoehr, Niklas and Belongie, Serge and Cotterell, Ryan and Lang, Nico and Frank, Stella}, title = {Taxonomy-Aware Evaluation of Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9109-9120} }
Active Event-based Stereo Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jianing and Zhang, Yunjian and Han, Haiqian and Ji, Xiangyang}, title = {Active Event-based Stereo Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {971-981} }
Mono-InternVL: Pushing the Boundaries of Monolithic Multimodal Large Language Models with Endogenous Visual Pre-training-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Gen and Yang, Xue and Dou, Wenhan and Wang, Zhaokai and Liu, Jiawen and Dai, Jifeng and Qiao, Yu and Zhu, Xizhou}, title = {Mono-InternVL: Pushing the Boundaries of Monolithic Multimodal Large Language Models with Endogenous Visual Pre-training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24960-24971} }
SimVS: Simulating World Inconsistencies for Robust View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Trevithick_2025_CVPR, author = {Trevithick, Alex and Paiss, Roni and Henzler, Philipp and Verbin, Dor and Wu, Rundi and Alzayer, Hadi and Gao, Ruiqi and Poole, Ben and Barron, Jonathan T. and Holynski, Aleksander and Ramamoorthi, Ravi and Srinivasan, Pratul P.}, title = {SimVS: Simulating World Inconsistencies for Robust View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16464-16474} }
FLAVC: Learned Video Compression with Feature Level Attention-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chun and Sun, Heming and Katto, Jiro}, title = {FLAVC: Learned Video Compression with Feature Level Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28019-28028} }
An End-to-End Robust Point Cloud Semantic Segmentation Network with Single-Step Conditional Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Wentao and Wang, Jing and Gong, YongShun and Huang, Xiaoshui and Xiao, Liang}, title = {An End-to-End Robust Point Cloud Semantic Segmentation Network with Single-Step Conditional Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27325-27335} }
From Zero to Detail: Deconstructing Ultra-High-Definition Image Restoration from Progressive Spectral Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Chen and Chen, Zhizhou and Xu, Yunzhe and Gu, Enxuan and Li, Jian and Yi, Zili and Wang, Qian and Yang, Jian and Tai, Ying}, title = {From Zero to Detail: Deconstructing Ultra-High-Definition Image Restoration from Progressive Spectral Perspective}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17935-17946} }
SMILE: Infusing Spatial and Motion Semantics in Masked Video Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thoker_2025_CVPR, author = {Thoker, Fida Mohammad and Jiang, Letian and Zhao, Chen and Ghanem, Bernard}, title = {SMILE: Infusing Spatial and Motion Semantics in Masked Video Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8438-8449} }
Video Language Model Pretraining with Spatio-temporal Masking-
[pdf]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Yue and Qi, Zhaobo and Sun, Junshu and Wang, Yaowei and Huang, Qingming and Wang, Shuhui}, title = {Video Language Model Pretraining with Spatio-temporal Masking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8557-8567} }
COSMOS: Cross-Modality Self-Distillation for Vision Language Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Sanghwan and Xiao, Rui and Georgescu, Mariana-Iuliana and Alaniz, Stephan and Akata, Zeynep}, title = {COSMOS: Cross-Modality Self-Distillation for Vision Language Pre-training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14690-14700} }
Lifting Motion to the 3D World via 2D Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiaman and Liu, C. Karen and Wu, Jiajun}, title = {Lifting Motion to the 3D World via 2D Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17518-17528} }
TAPT: Test-Time Adversarial Prompt Tuning for Robust Inference in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xin and Chen, Kai and Zhang, Jiaming and Chen, Jingjing and Ma, Xingjun}, title = {TAPT: Test-Time Adversarial Prompt Tuning for Robust Inference in Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19910-19920} }
Active Data Curation Effectively Distills Large-Scale Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Udandarao_2025_CVPR, author = {Udandarao, Vishaal and Parthasarathy, Nikhil and Naeem, Muhammad Ferjad and Evans, Talfan and Albanie, Samuel and Tombari, Federico and Xian, Yongqin and Tonioni, Alessio and Henaff, Olivier J.}, title = {Active Data Curation Effectively Distills Large-Scale Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14422-14437} }
PCDreamer: Point Cloud Completion Through Multi-view Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Guangshun and Feng, Yuan and Ma, Long and Wang, Chen and Zhou, Yuanfeng and Li, Changjian}, title = {PCDreamer: Point Cloud Completion Through Multi-view Diffusion Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27243-27253} }
Your ViT is Secretly an Image Segmentation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kerssies_2025_CVPR, author = {Kerssies, Tommie and Cavagnero, Niccol\`o and Hermans, Alexander and Norouzi, Narges and Averta, Giuseppe and Leibe, Bastian and Dubbelman, Gijs and de Geus, Daan}, title = {Your ViT is Secretly an Image Segmentation Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25303-25313} }
Cross-Rejective Open-Set SAR Image Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2025_CVPR, author = {Mao, Shasha and Lu, Shiming and Du, Zhaolong and Jiao, Licheng and Gou, Shuiping and Mou, Luntian and Lu, Xuequan and Xiong, Lin and Zhang, Yimeng}, title = {Cross-Rejective Open-Set SAR Image Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23027-23036} }
Synthetic Data is an Elegant GIFT for Continual Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Bin and Shi, Wuxuan and Wang, Jinqiao and Ye, Mang}, title = {Synthetic Data is an Elegant GIFT for Continual Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2813-2823} }
SplineGS: Robust Motion-Adaptive Spline for Real-Time Dynamic 3D Gaussians from Monocular Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Jongmin and Bui, Minh-Quan Viet and Bello, Juan Luis Gonzalez and Moon, Jaeho and Oh, Jihyong and Kim, Munchurl}, title = {SplineGS: Robust Motion-Adaptive Spline for Real-Time Dynamic 3D Gaussians from Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26866-26875} }
SCSA: A Plug-and-Play Semantic Continuous-Sparse Attention for Arbitrary Semantic Style Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shang_2025_CVPR, author = {Shang, Chunnan and Wang, Zhizhong and Wang, Hongwei and Meng, Xiangming}, title = {SCSA: A Plug-and-Play Semantic Continuous-Sparse Attention for Arbitrary Semantic Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13051-13060} }
Timestep Embedding Tells: It's Time to Cache for Video Diffusion Model-
[pdf]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Feng and Zhang, Shiwei and Wang, Xiaofeng and Wei, Yujie and Qiu, Haonan and Zhao, Yuzhong and Zhang, Yingya and Ye, Qixiang and Wan, Fang}, title = {Timestep Embedding Tells: It's Time to Cache for Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7353-7363} }
Can't Slow Me Down: Learning Robust and Hardware-Adaptive Object Detectors against Latency Attacks for Edge Devices-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Tianyi and Wang, Zichen and Wang, Cong and Shu, Yuanchao and Deng, Ruilong and Cheng, Peng and Chen, Jiming}, title = {Can't Slow Me Down: Learning Robust and Hardware-Adaptive Object Detectors against Latency Attacks for Edge Devices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19230-19240} }
Multi-modal Knowledge Distillation-based Human Trajectory Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_CVPR, author = {Jeong, Jaewoo and Lee, Seohee and Park, Daehee and Lee, Giwon and Yoon, Kuk-Jin}, title = {Multi-modal Knowledge Distillation-based Human Trajectory Forecasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24222-24233} }
SAM2Object: Consolidating View Consistency via SAM2 for Zero-Shot 3D Instance Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jihuai and Zhuo, Junbao and Chen, Jiansheng and Ma, Huimin}, title = {SAM2Object: Consolidating View Consistency via SAM2 for Zero-Shot 3D Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19325-19334} }
CDI: Copyrighted Data Identification in Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Dubinski_2025_CVPR, author = {Dubi\'nski, Jan and Kowalczuk, Antoni and Boenisch, Franziska and Dziedzic, Adam}, title = {CDI: Copyrighted Data Identification in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18674-18684} }
Hypergraph Vision Transformers: Images are More than Nodes, More than Edges-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fixelle_2025_CVPR, author = {Fixelle, Joshua}, title = {Hypergraph Vision Transformers: Images are More than Nodes, More than Edges}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9751-9761} }
Binarized Neural Network for Multi-spectral Image Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Hou_2025_CVPR, author = {Hou, Junming and Chen, Xiaoyu and Ran, Ran and Cong, Xiaofeng and Liu, Xinyang and You, Jian Wei and Deng, Liang-Jian}, title = {Binarized Neural Network for Multi-spectral Image Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2236-2245} }
CRISP: Object Pose and Shape Estimation with Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Jingnan and Talak, Rajat and Zhang, Harry and Jin, David and Carlone, Luca}, title = {CRISP: Object Pose and Shape Estimation with Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11644-11653} }
ShiftwiseConv: Small Convolutional Kernel with Large Kernel Effect-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Dachong and Li, Li and Chen, Zhuangzhuang and Li, Jianqiang}, title = {ShiftwiseConv: Small Convolutional Kernel with Large Kernel Effect}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25281-25291} }
GaussianIP: Identity-Preserving Realistic 3D Human Generation via Human-Centric Diffusion Prior-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Zichen and Yao, Yuan and Cui, Miaomiao and Bo, Liefeng and Yang, Hongyu}, title = {GaussianIP: Identity-Preserving Realistic 3D Human Generation via Human-Centric Diffusion Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {348-358} }
Creating Your Editable 3D Photorealistic Avatar with Tetrahedron-constrained Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Hanxi and Men, Yifang and Lian, Zhouhui}, title = {Creating Your Editable 3D Photorealistic Avatar with Tetrahedron-constrained Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15976-15986} }
FineVQ: Fine-Grained User Generated Content Video Quality Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duan_2025_CVPR, author = {Duan, Huiyu and Hu, Qiang and Wang, Jiarui and Yang, Liu and Xu, Zitong and Liu, Lu and Min, Xiongkuo and Cai, Chunlei and Ye, Tianxiao and Zhang, Xiaoyun and Zhai, Guangtao}, title = {FineVQ: Fine-Grained User Generated Content Video Quality Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3206-3217} }
Unveiling the Ignorance of MLLMs: Seeing Clearly, Answering Incorrectly-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yexin and Liang, Zhengyang and Wang, Yueze and Wu, Xianfeng and Tang, Feilong and He, Muyang and Li, Jian and Liu, Zheng and Yang, Harry and Lim, Sernam and Zhao, Bo}, title = {Unveiling the Ignorance of MLLMs: Seeing Clearly, Answering Incorrectly}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9087-9097} }
Object-Shot Enhanced Grounding Network for Egocentric Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Yisen and Zhang, Haoyu and Liu, Meng and Guan, Weili and Nie, Liqiang}, title = {Object-Shot Enhanced Grounding Network for Egocentric Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24190-24200} }
MANTA: Diffusion Mamba for Efficient and Effective Stochastic Long-Term Dense Action Anticipation-
[pdf]
[supp]
[bibtex]@InProceedings{Zatsarynna_2025_CVPR, author = {Zatsarynna, Olga and Bahrami, Emad and Abu Farha, Yazan and Francesca, Gianpiero and Gall, Juergen}, title = {MANTA: Diffusion Mamba for Efficient and Effective Stochastic Long-Term Dense Action Anticipation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3438-3448} }
METASCENES: Towards Automated Replica Creation for Real-world 3D Scans-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Huangyue and Jia, Baoxiong and Chen, Yixin and Yang, Yandan and Li, Puhao and Su, Rongpeng and Li, Jiaxin and Li, Qing and Liang, Wei and Zhu, Song-Chun and Liu, Tengyu and Huang, Siyuan}, title = {METASCENES: Towards Automated Replica Creation for Real-world 3D Scans}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1667-1679} }
Robust Multimodal Survival Prediction with Conditional Latent Differentiation Variational AutoEncoder-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Junjie and Tang, Jiao and Zuo, Yingli and Wan, Peng and Zhang, Daoqiang and Shao, Wei}, title = {Robust Multimodal Survival Prediction with Conditional Latent Differentiation Variational AutoEncoder}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10384-10393} }
Sim-to-Real Causal Transfer: A Metric Learning Approach to Causally-Aware Interaction Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rahimi_2025_CVPR, author = {Rahimi, Ahmad and Luan, Po-Chien and Liu, Yuejiang and Raji\v{c}, Frano and Alahi, Alexandre}, title = {Sim-to-Real Causal Transfer: A Metric Learning Approach to Causally-Aware Interaction Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17271-17281} }
Ev-3DOD: Pushing the Temporal Boundaries of 3D Object Detection with Event Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2025_CVPR, author = {Cho, Hoonhee and Kang, Jae-Young and Kim, Youngho and Yoon, Kuk-Jin}, title = {Ev-3DOD: Pushing the Temporal Boundaries of 3D Object Detection with Event Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27197-27210} }
Zero-Shot Blind-spot Image Denoising via Implicit Neural Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Quan_2025_CVPR, author = {Quan, Yuhui and Zheng, Tianxiang and Ma, Zhiyuan and Ji, Hui}, title = {Zero-Shot Blind-spot Image Denoising via Implicit Neural Sampling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7502-7512} }
Nearly Zero-Cost Protection Against Mimicry by Personalized Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ahn_2025_CVPR, author = {Ahn, Namhyuk and Yoo, KiYoon and Ahn, Wonhyuk and Kim, Daesik and Nam, Seung-Hun}, title = {Nearly Zero-Cost Protection Against Mimicry by Personalized Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28801-28810} }
Tripartite Weight-Space Ensemble for Few-Shot Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Juntae and Hayat, Munawar and Yun, Sungrack}, title = {Tripartite Weight-Space Ensemble for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15329-15338} }
The Devil is in Temporal Token: High Quality Video Reasoning Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2025_CVPR, author = {Gong, Sitong and Zhuge, Yunzhi and Zhang, Lu and Yang, Zongxin and Zhang, Pingping and Lu, Huchuan}, title = {The Devil is in Temporal Token: High Quality Video Reasoning Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29183-29192} }
PerLA: Perceptive 3D Language Assistant-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mei_2025_CVPR, author = {Mei, Guofeng and Lin, Wei and Riz, Luigi and Wu, Yujiao and Poiesi, Fabio and Wang, Yiming}, title = {PerLA: Perceptive 3D Language Assistant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14369-14379} }
LITA-GS: Illumination-Agnostic Novel View Synthesis via Reference-Free 3D Gaussian Splatting and Physical Priors-
[pdf]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Han and Dong, Wei and Chen, Jun}, title = {LITA-GS: Illumination-Agnostic Novel View Synthesis via Reference-Free 3D Gaussian Splatting and Physical Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21580-21589} }
PhyT2V: LLM-Guided Iterative Self-Refinement for Physics-Grounded Text-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2025_CVPR, author = {Xue, Qiyao and Yin, Xiangyu and Yang, Boyuan and Gao, Wei}, title = {PhyT2V: LLM-Guided Iterative Self-Refinement for Physics-Grounded Text-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18826-18836} }
Track4Gen: Teaching Video Diffusion Models to Track Points Improves Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_CVPR, author = {Jeong, Hyeonho and Huang, Chun-Hao P. and Ye, Jong Chul and Mitra, Niloy J. and Ceylan, Duygu}, title = {Track4Gen: Teaching Video Diffusion Models to Track Points Improves Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7276-7287} }
Mask^2DiT: Dual Mask-based Diffusion Transformer for Multi-Scene Long Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2025_CVPR, author = {Qi, Tianhao and Yuan, Jianlong and Feng, Wanquan and Fang, Shancheng and Liu, Jiawei and Zhou, SiYu and He, Qian and Xie, Hongtao and Zhang, Yongdong}, title = {Mask{\textasciicircum}2DiT: Dual Mask-based Diffusion Transformer for Multi-Scene Long Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18837-18846} }
JamMa: Ultra-lightweight Local Feature Matching with Joint Mamba-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Xiaoyong and Du, Songlin}, title = {JamMa: Ultra-lightweight Local Feature Matching with Joint Mamba}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14934-14943} }
DyCoke: Dynamic Compression of Tokens for Fast Video Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2025_CVPR, author = {Tao, Keda and Qin, Can and You, Haoxuan and Sui, Yang and Wang, Huan}, title = {DyCoke: Dynamic Compression of Tokens for Fast Video Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18992-19001} }
T-FAKE: Synthesizing Thermal Images for Facial Landmarking-
[pdf]
[supp]
[bibtex]@InProceedings{Flotho_2025_CVPR, author = {Flotho, Philipp and Piening, Moritz and Kukleva, Anna and Steidl, Gabriele}, title = {T-FAKE: Synthesizing Thermal Images for Facial Landmarking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26356-26366} }
Multi-Resolution Pathology-Language Pre-training Model with Text-Guided Visual Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Albastaki_2025_CVPR, author = {Albastaki, Shahad and Sohail, Anabia and Ganapathi, Iyyakutti Iyappan and Alawode, Basit and Khan, Asim and Javed, Sajid and Werghi, Naoufel and Bennamoun, Mohammed and Mahmood, Arif}, title = {Multi-Resolution Pathology-Language Pre-training Model with Text-Guided Visual Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25907-25919} }
InteractAnything: Zero-shot Human Object Interaction Synthesis via LLM Feedback and Object Affordance Parsing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinlu and Chen, Yixin and Wang, Zan and Yang, Jie and Wang, Yizhou and Huang, Siyuan}, title = {InteractAnything: Zero-shot Human Object Interaction Synthesis via LLM Feedback and Object Affordance Parsing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7015-7025} }
MammAlps: A Multi-view Video Behavior Monitoring Dataset of Wild Mammals in the Swiss Alps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gabeff_2025_CVPR, author = {Gabeff, Valentin and Qi, Haozhe and Flaherty, Brendan and Sumbul, Gencer and Mathis, Alexander and Tuia, Devis}, title = {MammAlps: A Multi-view Video Behavior Monitoring Dataset of Wild Mammals in the Swiss Alps}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13854-13864} }
Diffusion-based Realistic Listening Head Generation via Hybrid Motion Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yinuo and Fan, Yanbo and Wang, Xuan and Yu, Guo and Wang, Fei}, title = {Diffusion-based Realistic Listening Head Generation via Hybrid Motion Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15885-15895} }
SAT-HMR: Real-Time Multi-Person 3D Mesh Estimation via Scale-Adaptive Tokens-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2025_CVPR, author = {Su, Chi and Ma, Xiaoxuan and Su, Jiajun and Wang, Yizhou}, title = {SAT-HMR: Real-Time Multi-Person 3D Mesh Estimation via Scale-Adaptive Tokens}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16796-16806} }
PICD: Versatile Perceptual Image Compression with Diffusion Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Tongda and Li, Jiahao and Li, Bin and Wang, Yan and Zhang, Ya-Qin and Lu, Yan}, title = {PICD: Versatile Perceptual Image Compression with Diffusion Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28436-28445} }
UniScene: Unified Occupancy-centric Driving Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Bohan and Guo, Jiazhe and Liu, Hongsi and Zou, Yingshuang and Ding, Yikang and Chen, Xiwu and Zhu, Hu and Tan, Feiyang and Zhang, Chi and Wang, Tiancai and Zhou, Shuchang and Zhang, Li and Qi, Xiaojuan and Zhao, Hao and Yang, Mu and Zeng, Wenjun and Jin, Xin}, title = {UniScene: Unified Occupancy-centric Driving Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11971-11981} }
Wonderland: Navigating 3D Scenes from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Hanwen and Cao, Junli and Goel, Vidit and Qian, Guocheng and Korolev, Sergei and Terzopoulos, Demetri and Plataniotis, Konstantinos N. and Tulyakov, Sergey and Ren, Jian}, title = {Wonderland: Navigating 3D Scenes from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {798-810} }
Learning from Streaming Video with Orthogonal Gradients-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Tengda and Gokay, Dilara and Heyward, Joseph and Zhang, Chuhan and Zoran, Daniel and Patraucean, Viorica and Carreira, Joao and Damen, Dima and Zisserman, Andrew}, title = {Learning from Streaming Video with Orthogonal Gradients}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13651-13660} }
Towards Satellite Image Road Graph Extraction: A Global-Scale Dataset and A Novel Method-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Pan and Li, Kaiyu and Cao, Xiangyong and Yao, Jing and Liu, Lei and Bai, Xueru and Zhou, Feng and Meng, Deyu}, title = {Towards Satellite Image Road Graph Extraction: A Global-Scale Dataset and A Novel Method}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1527-1537} }
SuperLightNet: Lightweight Parameter Aggregation Network for Multimodal Brain Tumor Segmentation-
[pdf]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Feng and Cao, Jiacheng and Liu, Li and Jiang, Minghua}, title = {SuperLightNet: Lightweight Parameter Aggregation Network for Multimodal Brain Tumor Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5197-5206} }
VideoSPatS: Video SPatiotemporal Splines for Disentangled Occlusion, Appearance and Motion Modeling and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gonzalez_2025_CVPR, author = {Gonzalez, Juan Luis and Yao, Xu and Whelan, Alex and Olszewski, Kyle and Kim, Hyeongwoo and Garrido, Pablo}, title = {VideoSPatS: Video SPatiotemporal Splines for Disentangled Occlusion, Appearance and Motion Modeling and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22901-22910} }
Classifier-to-Bias: Toward Unsupervised Automatic Bias Detection for Visual Classifiers-
[pdf]
[supp]
[bibtex]@InProceedings{Guimard_2025_CVPR, author = {Guimard, Quentin and D'Inc\`a, Moreno and Mancini, Massimiliano and Ricci, Elisa}, title = {Classifier-to-Bias: Toward Unsupervised Automatic Bias Detection for Visual Classifiers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15151-15161} }
Self-Supervised Spatial Correspondence Across Modalities-
[pdf]
[bibtex]@InProceedings{Shrivastava_2025_CVPR, author = {Shrivastava, Ayush and Owens, Andrew}, title = {Self-Supervised Spatial Correspondence Across Modalities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6383-6393} }
MOS-Attack: A Scalable Multi-objective Adversarial Attack Framework-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Ping and Gong, Cheng and Lin, Xi and Liu, Fei and Lu, Zhichao and Zhang, Qingfu and Wang, Zhenkun}, title = {MOS-Attack: A Scalable Multi-objective Adversarial Attack Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5041-5051} }
Six-CD: Benchmarking Concept Removals for Text-to-image Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Jie and Chen, Kangrui and Cui, Yingqian and Zeng, Shenglai and Liu, Hui and Xing, Yue and Tang, Jiliang and Lyu, Lingjuan}, title = {Six-CD: Benchmarking Concept Removals for Text-to-image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28769-28778} }
Motion Modes: What Could Happen Next?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pandey_2025_CVPR, author = {Pandey, Karran and Hold-Geoffroy, Yannick and Gadelha, Matheus and Mitra, Niloy J. and Singh, Karan and Guerrero, Paul}, title = {Motion Modes: What Could Happen Next?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2030-2039} }
Finer-CAM: Spotting the Difference Reveals Finer Details for Visual Explanation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Ziheng and Gu, Jianyang and Chowdhury, Arpita and Mai, Zheda and Carlyn, David and Berger-Wolf, Tanya and Su, Yu and Chao, Wei-Lun}, title = {Finer-CAM: Spotting the Difference Reveals Finer Details for Visual Explanation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9611-9620} }
The Change You Want To Detect: Semantic Change Detection In Earth Observation With Hybrid Data Generationf-
[pdf]
[supp]
[bibtex]@InProceedings{Benidir_2025_CVPR, author = {Benidir, Yanis and Gonthier, Nicolas and Mallet, Clement}, title = {The Change You Want To Detect: Semantic Change Detection In Earth Observation With Hybrid Data Generationf}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2204-2214} }
Black-Box Forgery Attacks on Semantic Watermarks for Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Muller_2025_CVPR, author = {M\"uller, Andreas and Lukovnikov, Denis and Thietke, Jonas and Fischer, Asja and Quiring, Erwin}, title = {Black-Box Forgery Attacks on Semantic Watermarks for Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20937-20946} }
An Image-like Diffusion Method for Human-Object Interaction Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hui_2025_CVPR, author = {Hui, Xiaofei and Qu, Haoxuan and Rahmani, Hossein and Liu, Jun}, title = {An Image-like Diffusion Method for Human-Object Interaction Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14002-14012} }
VidSeg: Training-free Video Semantic Segmentation based on Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Qian and Eldesokey, Abdelrahman and Mendiratta, Mohit and Zhan, Fangneng and Kortylewski, Adam and Theobalt, Christian and Wonka, Peter}, title = {VidSeg: Training-free Video Semantic Segmentation based on Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22985-22994} }
COB-GS: Clear Object Boundaries in 3DGS Segmentation Based on Boundary-Adaptive Gaussian Splitting-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiaxin and Jiang, Junjun and Chen, Youyu and Jiang, Kui and Liu, Xianming}, title = {COB-GS: Clear Object Boundaries in 3DGS Segmentation Based on Boundary-Adaptive Gaussian Splitting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19335-19344} }
Weakly Supervised Semantic Segmentation via Progressive Confidence Region Expansion-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Xiangfeng and Zhang, Pinyi and Huang, Wenxuan and Shen, Yunhang and Chen, Haosheng and Lin, Jingzhong and Li, Wei and He, Gaoqi and Xie, Jiao and Lin, Shaohui}, title = {Weakly Supervised Semantic Segmentation via Progressive Confidence Region Expansion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9829-9838} }
RELOCATE: A Simple Training-Free Baseline for Visual Query Localization Using Region-Based Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khosla_2025_CVPR, author = {Khosla, Savya and V, Sethuraman T and Schwing, Alexander and Hoiem, Derek}, title = {RELOCATE: A Simple Training-Free Baseline for Visual Query Localization Using Region-Based Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3697-3706} }
PEER Pressure: Model-to-Model Regularization for Single Source Domain Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cho_2025_CVPR, author = {Cho, Dong Kyu and Hwang, Inwoo and Lee, Sanghack}, title = {PEER Pressure: Model-to-Model Regularization for Single Source Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15360-15370} }
HOTFormerLoc: Hierarchical Octree Transformer for Versatile Lidar Place Recognition Across Ground and Aerial Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Griffiths_2025_CVPR, author = {Griffiths, Ethan and Haghighat, Maryam and Denman, Simon and Fookes, Clinton and Ramezani, Milad}, title = {HOTFormerLoc: Hierarchical Octree Transformer for Versatile Lidar Place Recognition Across Ground and Aerial Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6648-6658} }
Revisiting Fairness in Multitask Learning: A Performance-Driven Approach for Variance Reduction-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Xiaohan and Wang, Xiaoxing and Yan, Junchi}, title = {Revisiting Fairness in Multitask Learning: A Performance-Driven Approach for Variance Reduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20492-20501} }
UniK3D: Universal Camera Monocular 3D Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Piccinelli_2025_CVPR, author = {Piccinelli, Luigi and Sakaridis, Christos and Segu, Mattia and Yang, Yung-Hsu and Li, Siyuan and Abbeloos, Wim and Van Gool, Luc}, title = {UniK3D: Universal Camera Monocular 3D Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1028-1039} }
ConMo: Controllable Motion Disentanglement and Recomposition for Zero-Shot Motion Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Jiayi and Yin, Zijin and Hua, Changcheng and Peng, Yuxin and Liang, Kongming and Ma, Zhanyu and Guo, Jun and Liu, Yang}, title = {ConMo: Controllable Motion Disentanglement and Recomposition for Zero-Shot Motion Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7191-7200} }
VideoMage: Multi-Subject and Motion Customization of Text-to-Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Chi-Pin and Wu, Yen-Siang and Chung, Hung-Kai and Chang, Kai-Po and Yang, Fu-En and Wang, Yu-Chiang Frank}, title = {VideoMage: Multi-Subject and Motion Customization of Text-to-Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17603-17612} }
AG-VPReID: A Challenging Large-Scale Benchmark for Aerial-Ground Video-based Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Huy and Nguyen, Kien and Pemasiri, Akila and Liu, Feng and Sridharan, Sridha and Fookes, Clinton}, title = {AG-VPReID: A Challenging Large-Scale Benchmark for Aerial-Ground Video-based Person Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1241-1251} }
EBS-EKF: Accurate and High Frequency Event-based Star Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Reed_2025_CVPR, author = {Reed, Albert W. and Hashemi, Connor and Melamed, Dennis and Menon, Nitesh and Hirakawa, Keigo and McCloskey, Scott}, title = {EBS-EKF: Accurate and High Frequency Event-based Star Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6510-6519} }
PersonaBooth: Personalized Text-to-Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Boeun and Jeong, Hea In and Sung, JungHoon and Cheng, Yihua and Lee, Jeongmin and Chang, Ju Yong and Choi, Sang-Il and Choi, Younggeun and Shin, Saim and Kim, Jungho and Chang, Hyung Jin}, title = {PersonaBooth: Personalized Text-to-Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22756-22765} }
Benchmarking Object Detectors under Real-World Distribution Shifts in Satellite Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Al-Emadi_2025_CVPR, author = {Al-Emadi, Sara A. and Yang, Yin and Ofli, Ferda}, title = {Benchmarking Object Detectors under Real-World Distribution Shifts in Satellite Imagery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8299-8309} }
SAIST: Segment Any Infrared Small Target Model Guided by Contrastive Language-Image Pretraining-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Mingjin and Li, Xiaolong and Gao, Fei and Guo, Jie and Gao, Xinbo and Zhang, Jing}, title = {SAIST: Segment Any Infrared Small Target Model Guided by Contrastive Language-Image Pretraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9549-9558} }
Star with Bilinear Mapping-
[pdf]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Zelin and Huang, Yu and Xu, Zhengqin and Tang, Feilong and Hu, Ming and Yang, Xiaokang and Shen, Wei}, title = {Star with Bilinear Mapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25292-25302} }
Closed-Loop Supervised Fine-Tuning of Tokenized Traffic Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhejun and Karkus, Peter and Igl, Maximilian and Ding, Wenhao and Chen, Yuxiao and Ivanovic, Boris and Pavone, Marco}, title = {Closed-Loop Supervised Fine-Tuning of Tokenized Traffic Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5422-5432} }
DIFIX3D+: Improving 3D Reconstructions with Single-Step Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Jay Zhangjie and Zhang, Yuxuan and Turki, Haithem and Ren, Xuanchi and Gao, Jun and Shou, Mike Zheng and Fidler, Sanja and Gojcic, Zan and Ling, Huan}, title = {DIFIX3D+: Improving 3D Reconstructions with Single-Step Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26024-26035} }
Time of the Flight of the Gaussians: Optimizing Depth Indirectly in Dynamic Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Runfeng and Okunev, Mikhail and Guo, Zixuan and Duong, Anh Ha and Richardt, Christian and O'Toole, Matthew and Tompkin, James}, title = {Time of the Flight of the Gaussians: Optimizing Depth Indirectly in Dynamic Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21021-21030} }
Align3R: Aligned Monocular Depth Estimation for Dynamic Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Jiahao and Huang, Tianyu and Li, Peng and Dou, Zhiyang and Lin, Cheng and Cui, Zhiming and Dong, Zhen and Yeung, Sai-Kit and Wang, Wenping and Liu, Yuan}, title = {Align3R: Aligned Monocular Depth Estimation for Dynamic Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22820-22830} }
Compositional Caching for Training-free Open-vocabulary Attribute Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Garosi_2025_CVPR, author = {Garosi, Marco and Conti, Alessandro and Liu, Gaowen and Ricci, Elisa and Mancini, Massimiliano}, title = {Compositional Caching for Training-free Open-vocabulary Attribute Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15098-15107} }
Seek Common Ground While Reserving Differences: Semi-Supervised Image-Text Sentiment Recognition-
[pdf]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Wuyou and Jia, Guoli and Zhao, Sicheng and Yang, Jufeng}, title = {Seek Common Ground While Reserving Differences: Semi-Supervised Image-Text Sentiment Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29601-29611} }
CoLLM: A Large Language Model for Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huynh_2025_CVPR, author = {Huynh, Chuong and Yang, Jinyu and Tawari, Ashish and Shah, Mubarak and Tran, Son and Hamid, Raffay and Chilimbi, Trishul and Shrivastava, Abhinav}, title = {CoLLM: A Large Language Model for Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3994-4004} }
Anomize: Better Open Vocabulary Video Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Fei and Liu, Wenxuan and Chen, Jingjing and Zhang, Ruixu and Wang, Yuran and Zhong, Xian and Wang, Zheng}, title = {Anomize: Better Open Vocabulary Video Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29203-29212} }
Efficient Diffusion as Low Light Enhancer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lan_2025_CVPR, author = {Lan, Guanzhou and Ma, Qianli and Yang, Yuqi and Wang, Zhigang and Wang, Dong and Li, Xuelong and Zhao, Bin}, title = {Efficient Diffusion as Low Light Enhancer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21277-21286} }
GraphMimic: Graph-to-Graphs Generative Modeling from Videos for Policy Learning-
[pdf]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Guangyan and Cui, Te and Wang, Meiling and Yang, Chengcai and Hu, Mengxiao and Lu, Haoyang and Mu, Yao and Peng, Zicai and Zhou, Tianxing and Jiang, Xinran and Yang, Yi and Yue, Yufeng}, title = {GraphMimic: Graph-to-Graphs Generative Modeling from Videos for Policy Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1756-1768} }
VI^3NR: Variance Informed Initialization for Implicit Neural Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Koneputugodage_2025_CVPR, author = {Koneputugodage, Chamin Hewa and Ben-Shabat, Yizhak and Ramasinghe, Sameera and Gould, Stephen}, title = {VI{\textasciicircum}3NR: Variance Informed Initialization for Implicit Neural Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13477-13486} }
MMVU: Measuring Expert-Level Multi-Discipline Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yilun and Zhang, Haowei and Xie, Lujing and Hu, Tongyan and Gan, Guo and Long, Yitao and Hu, Zhiyuan and Chen, Weiyuan and Li, Chuhan and Xu, Zhijian and Wang, Chengye and Shangguan, Ziyao and Liang, Zhenwen and Liu, Yixin and Zhao, Chen and Cohan, Arman}, title = {MMVU: Measuring Expert-Level Multi-Discipline Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8475-8489} }
M-LLM Based Video Frame Selection for Efficient Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Kai and Gao, Feng and Nie, Xiaohan and Zhou, Peng and Tran, Son and Neiman, Tal and Wang, Lingyun and Shah, Mubarak and Hamid, Raffay and Yin, Bing and Chilimbi, Trishul}, title = {M-LLM Based Video Frame Selection for Efficient Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13702-13712} }
Search and Detect: Training-Free Long Tail Object Detection via Web-Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sidhu_2025_CVPR, author = {Sidhu, Mankeerat and Chopra, Hetarth and Blume, Ansel and Kim, Jeonghwan and Reddy, Revanth Gangi and Ji, Heng}, title = {Search and Detect: Training-Free Long Tail Object Detection via Web-Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15129-15138} }
EgoLM: Multi-Modal Language Model of Egocentric Motions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2025_CVPR, author = {Hong, Fangzhou and Guzov, Vladimir and Kim, Hyo Jin and Ye, Yuting and Newcombe, Richard and Liu, Ziwei and Ma, Lingni}, title = {EgoLM: Multi-Modal Language Model of Egocentric Motions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5344-5354} }
Unleashing the Potential of Multi-modal Foundation Models and Video Diffusion for 4D Dynamic Physical Scene Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhuoman and Ye, Weicai and Luximon, Yan and Wan, Pengfei and Zhang, Di}, title = {Unleashing the Potential of Multi-modal Foundation Models and Video Diffusion for 4D Dynamic Physical Scene Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11016-11025} }
Diffusion Model is Effectively Its Own Teacher-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Xinyin and Yu, Runpeng and Liu, Songhua and Fang, Gongfan and Wang, Xinchao}, title = {Diffusion Model is Effectively Its Own Teacher}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12901-12911} }
Long Video Diffusion Generation with Segmented Cross-Attention and Content-Rich Video Data Curation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Xin and Cai, Yuxuan and Wang, Qiuyue and Zhou, Yuan and Huang, Wenhao and Yang, Huan}, title = {Long Video Diffusion Generation with Segmented Cross-Attention and Content-Rich Video Data Curation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3184-3194} }
Learning Heterogeneous Tissues with Mixture of Experts for Gigapixel Whole Slide Images-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Junxian and Chen, Minheng and Ke, Xinyi and Xun, Tianwang and Jiang, Xiaoming and Zhou, Hongyu and Shao, Lizhi and Kong, Youyong}, title = {Learning Heterogeneous Tissues with Mixture of Experts for Gigapixel Whole Slide Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5144-5153} }
HyperNVD: Accelerating Neural Video Decomposition via Hypernetworks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pilligua_2025_CVPR, author = {Pilligua, Maria and Xue, Danna and Vazquez-Corral, Javier}, title = {HyperNVD: Accelerating Neural Video Decomposition via Hypernetworks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22933-22942} }
UnCommon Objects in 3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xingchen and Tayal, Piyush and Wang, Jianyuan and Zarzar, Jesus and Monnier, Tom and Tertikas, Konstantinos and Duan, Jiali and Toisoul, Antoine and Zhang, Jason Y. and Neverova, Natalia and Vedaldi, Andrea and Shapovalov, Roman and Novotny, David}, title = {UnCommon Objects in 3D}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14102-14113} }
Disentangled Pose and Appearance Guidance for Multi-Pose Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Tengfei and Wu, Yue and Li, Yuelong and Qin, Can and Gong, Maoguo and Miao, Qiguang and Ma, Wenping}, title = {Disentangled Pose and Appearance Guidance for Multi-Pose Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5646-5655} }
Mind the Gap: Confidence Discrepancy Can Guide Federated Semi-Supervised Learning Across Pseudo-Mismatch-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yijie and Shang, Xinyi and Zhang, Yiqun and Lu, Yang and Gong, Chen and Xue, Jing-Hao and Wang, Hanzi}, title = {Mind the Gap: Confidence Discrepancy Can Guide Federated Semi-Supervised Learning Across Pseudo-Mismatch}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10173-10182} }
Instant Adversarial Purification with Adversarial Consistency Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2025_CVPR, author = {Lei, Chun Tong and Yam, Hon Ming and Guo, Zhongliang and Qian, Yifei and Lau, Chun Pong}, title = {Instant Adversarial Purification with Adversarial Consistency Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24331-24340} }
Learning Textual Prompts for Open-World Semi-Supervised Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Yuxin and Cui, Junbiao and Liang, Jiye}, title = {Learning Textual Prompts for Open-World Semi-Supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14756-14765} }
Electromyography-Informed Facial Expression Reconstruction for Physiological-Based Synthesis and Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Buchner_2025_CVPR, author = {B\"uchner, Tim and Anders, Christoph and Guntinas-Lichius, Orlando and Denzler, Joachim}, title = {Electromyography-Informed Facial Expression Reconstruction for Physiological-Based Synthesis and Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {215-227} }
LongDiff: Training-Free Long Video Generation in One Go-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zhuoling and Rahmani, Hossein and Ke, Qiuhong and Liu, Jun}, title = {LongDiff: Training-Free Long Video Generation in One Go}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17789-17798} }
Feature Selection for Latent Factor Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kansabanik_2025_CVPR, author = {Kansabanik, Rittwika and Barbu, Adrian}, title = {Feature Selection for Latent Factor Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30742-30751} }
Preserve or Modify? Context-Aware Evaluation for Balancing Preservation and Modification in Text-Guided Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Yoonjeon and Ryu, Soohyun and Jung, Yeonsung and Lee, Hyunkoo and Kim, Joowon and Yang, June Yong and Hwang, Jaeryong and Yang, Eunho}, title = {Preserve or Modify? Context-Aware Evaluation for Balancing Preservation and Modification in Text-Guided Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23474-23483} }
Mask-Adapter: The Devil is in the Masks for Open-Vocabulary Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yongkang and Cheng, Tianheng and Feng, Bin and Liu, Wenyu and Wang, Xinggang}, title = {Mask-Adapter: The Devil is in the Masks for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14998-15008} }
MPDrive: Improving Spatial Understanding with Marker-Based Prompt Learning for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhiyuan and Li, Xiaofan and Xu, Zhihao and Peng, Wenjie and Zhou, Zijian and Shi, Miaojing and Huang, Shuangping}, title = {MPDrive: Improving Spatial Understanding with Marker-Based Prompt Learning for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12089-12099} }
Improving the Transferability of Adversarial Attacks on Face Recognition with Diverse Parameters Augmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Fengfan and Yin, Bangjie and Ling, Hefei and Zhou, Qianyu and Wang, Wenxuan}, title = {Improving the Transferability of Adversarial Attacks on Face Recognition with Diverse Parameters Augmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3516-3527} }
Adapting to Observation Length of Trajectory Prediction via Contrastive Learning-
[pdf]
[bibtex]@InProceedings{Qiu_2025_CVPR, author = {Qiu, Ruiqi and Gong, Jun and Zhang, Xinyu and Luo, Siqi and Zhang, Bowen and Cen, Yi}, title = {Adapting to Observation Length of Trajectory Prediction via Contrastive Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1645-1654} }
Fine-Grained Image-Text Correspondence with Cost Aggregation for Open-Vocabulary Part Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2025_CVPR, author = {Choi, Jiho and Lee, Seonho and Lee, Minhyun and Lee, Seungho and Shim, Hyunjung}, title = {Fine-Grained Image-Text Correspondence with Cost Aggregation for Open-Vocabulary Part Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9782-9793} }
NitroFusion: High-Fidelity Single-Step Diffusion through Dynamic Adversarial Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Dar-Yen and Bandyopadhyay, Hmrishav and Zou, Kai and Song, Yi-Zhe}, title = {NitroFusion: High-Fidelity Single-Step Diffusion through Dynamic Adversarial Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7654-7663} }
ByTheWay: Boost Your Text-to-Video Generation Model to Higher Quality in a Training-free Way-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bu_2025_CVPR, author = {Bu, Jiazi and Ling, Pengyang and Zhang, Pan and Wu, Tong and Dong, Xiaoyi and Zang, Yuhang and Cao, Yuhang and Lin, Dahua and Wang, Jiaqi}, title = {ByTheWay: Boost Your Text-to-Video Generation Model to Higher Quality in a Training-free Way}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12999-13008} }
CMMLoc: Advancing Text-to-PointCloud Localization with Cauchy-Mixture-Model Based Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yanlong and Qu, Haoxuan and Liu, Jun and Zhang, Wenxiao and Yang, Xun}, title = {CMMLoc: Advancing Text-to-PointCloud Localization with Cauchy-Mixture-Model Based Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6637-6647} }
Masked Point-Entity Contrast for Open-Vocabulary 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yan and Jia, Baoxiong and Zhu, Ziyu and Huang, Siyuan}, title = {Masked Point-Entity Contrast for Open-Vocabulary 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14125-14136} }
Decoupling Training-Free Guided Diffusion by ADMM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Youyuan and Liu, Zehua and Li, Zenan and Li, Zhaoyu and Clark, James J. and Si, Xujie}, title = {Decoupling Training-Free Guided Diffusion by ADMM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23292-23302} }
On the Generalization of Handwritten Text Recognition Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Garrido-Munoz_2025_CVPR, author = {Garrido-Munoz, Carlos and Calvo-Zaragoza, Jorge}, title = {On the Generalization of Handwritten Text Recognition Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15275-15286} }
SwiftEdit: Lightning Fast Text-Guided Image Editing via One-Step Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Trong-Tung and Nguyen, Quang and Nguyen, Khoi and Tran, Anh and Pham, Cuong}, title = {SwiftEdit: Lightning Fast Text-Guided Image Editing via One-Step Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21492-21501} }
Learning from Synchronization: Self-Supervised Uncalibrated Multi-View Person Association in Challenging Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Keqi and Srivastav, Vinkle and Mutter, Didier and Padoy, Nicolas}, title = {Learning from Synchronization: Self-Supervised Uncalibrated Multi-View Person Association in Challenging Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24419-24428} }
RC-AutoCalib: An End-to-End Radar-Camera Automatic Calibration Network-
[pdf]
[supp]
[bibtex]@InProceedings{Luu_2025_CVPR, author = {Luu, Van-Tin and Cai, Yon-Lin and Tran, Vu-Hoang and Chiu, Wei-Chen and Chen, Yi-Ting and Huang, Ching-Chun}, title = {RC-AutoCalib: An End-to-End Radar-Camera Automatic Calibration Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6700-6709} }
Argus: A Compact and Versatile Foundation Model for Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Zhuang_2025_CVPR, author = {Zhuang, Weiming and Chen, Chen and Li, Zhizhong and Sajadmanesh, Sina and Li, Jingtao and Huang, Jiabo and Sehwag, Vikash and Sharma, Vivek and Shinozaki, Hirotaka and Garcia, Felan Carlo and Zhan, Yihao and Adachi, Naohiro and Eki, Ryoji and Spranger, Michael and Stone, Peter and Lyu, Lingjuan}, title = {Argus: A Compact and Versatile Foundation Model for Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4418-4429} }
CLIP-driven Coarse-to-fine Semantic Guidance for Fine-grained Open-set Semi-supervised Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xiaokun and Huang, Yaping and Guan, Qingji}, title = {CLIP-driven Coarse-to-fine Semantic Guidance for Fine-grained Open-set Semi-supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30312-30321} }
InsTaG: Learning Personalized 3D Talking Head from Few-Second Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiahe and Zhang, Jiawei and Bai, Xiao and Zheng, Jin and Zhou, Jun and Gu, Lin}, title = {InsTaG: Learning Personalized 3D Talking Head from Few-Second Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10690-10700} }
Sampling Innovation-Based Adaptive Compressive Sensing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Zhifu and Hu, Tao and Niu, Chaoyang and Wu, Di and Wang, Shu}, title = {Sampling Innovation-Based Adaptive Compressive Sensing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2387-2397} }
A Simple Data Augmentation for Feature Distribution Skewed Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Yunlu and Fu, Huazhu and Li, Yuexiang and Xie, Jinheng and Ma, Jun and Yang, Guang and Zhu, Lei}, title = {A Simple Data Augmentation for Feature Distribution Skewed Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25749-25758} }
MotionBench: Benchmarking and Improving Fine-grained Video Motion Understanding for Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2025_CVPR, author = {Hong, Wenyi and Cheng, Yean and Yang, Zhuoyi and Wang, Weihan and Wang, Lefan and Gu, Xiaotao and Huang, Shiyu and Dong, Yuxiao and Tang, Jie}, title = {MotionBench: Benchmarking and Improving Fine-grained Video Motion Understanding for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8450-8460} }
Benchmarking Large Vision-Language Models via Directed Scene Graph for Comprehensive Image Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Fan and Wu, Wei and Zheng, Kecheng and Ma, Shuailei and Gong, Biao and Liu, Jiawei and Zhai, Wei and Cao, Yang and Shen, Yujun and Zha, Zheng-Jun}, title = {Benchmarking Large Vision-Language Models via Directed Scene Graph for Comprehensive Image Captioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19618-19627} }
ART: Anonymous Region Transformer for Variable Multi-Layer Transparent Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pu_2025_CVPR, author = {Pu, Yifan and Zhao, Yiming and Tang, Zhicong and Yin, Ruihong and Ye, Haoxing and Yuan, Yuhui and Chen, Dong and Bao, Jianmin and Zhang, Sirui and Wang, Yanbin and Liang, Lin and Wang, Lijuan and Li, Ji and Li, Xiu and Lian, Zhouhui and Huang, Gao and Guo, Baining}, title = {ART: Anonymous Region Transformer for Variable Multi-Layer Transparent Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7952-7962} }
Rotation-Equivariant Self-Supervised Method in Image Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Hanze and Fu, Jiahong and Xie, Qi and Meng, Deyu}, title = {Rotation-Equivariant Self-Supervised Method in Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12720-12730} }
ArcPro: Architectural Programs for Structured 3D Abstraction of Sparse Points-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Qirui and Zhang, Runze and Liu, Kangjun and Gong, Minglun and Zhang, Hao and Huang, Hui}, title = {ArcPro: Architectural Programs for Structured 3D Abstraction of Sparse Points}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6563-6572} }
GLane3D: Detecting Lanes with Graph of 3D Keypoints-
[pdf]
[supp]
[bibtex]@InProceedings{Ozturk_2025_CVPR, author = {\"Ozt\"urk, Halil \.Ibrahim and Kalfao\u{g}lu, Muhammet Esat and Kilinc, Ozsel}, title = {GLane3D: Detecting Lanes with Graph of 3D Keypoints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27508-27518} }
Minimal Interaction Seperated Tuning: A New Paradigm for Visual Adaptation-
[pdf]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Ningyuan and Fu, Minghao and Wu, Jianxin}, title = {Minimal Interaction Seperated Tuning: A New Paradigm for Visual Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25208-25217} }
Hardware-Rasterized Ray-Based Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Bulo_2025_CVPR, author = {Bul\`o, Samuel Rota and Bartolovic, Nemanja and Porzi, Lorenzo and Kontschieder, Peter}, title = {Hardware-Rasterized Ray-Based Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {485-494} }
FlashSloth : Lightning Multimodal Large Language Models via Embedded Visual Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tong_2025_CVPR, author = {Tong, Bo and Lai, Bokai and Zhou, Yiyi and Luo, Gen and Shen, Yunhang and Li, Ke and Sun, Xiaoshuai and Ji, Rongrong}, title = {FlashSloth : Lightning Multimodal Large Language Models via Embedded Visual Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14570-14581} }
FreqDebias: Towards Generalizable Deepfake Detection via Consistency-Driven Frequency Debiasing-
[pdf]
[supp]
[bibtex]@InProceedings{Kashiani_2025_CVPR, author = {Kashiani, Hossein and Talemi, Niloufar Alipour and Afghah, Fatemeh}, title = {FreqDebias: Towards Generalizable Deepfake Detection via Consistency-Driven Frequency Debiasing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8775-8785} }
Multi-subject Open-set Personalization in Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Tsai-Shien and Siarohin, Aliaksandr and Menapace, Willi and Fang, Yuwei and Lee, Kwot Sin and Skorokhodov, Ivan and Aberman, Kfir and Zhu, Jun-Yan and Yang, Ming-Hsuan and Tulyakov, Sergey}, title = {Multi-subject Open-set Personalization in Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6099-6110} }
Wav2Sem: Plug-and-Play Audio Semantic Decoupling for 3D Speech-Driven Facial Animation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Hao and Dai, Ju and Zhao, Xin and Zhou, Feng and Pan, Junjun and Li, Lei}, title = {Wav2Sem: Plug-and-Play Audio Semantic Decoupling for 3D Speech-Driven Facial Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {183-192} }
Attraction Diminishing and Distributing for Few-Shot Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Li-Jun and Chen, Zhen-Duo and Wang, Yongxin and Luo, Xin and Xu, Xin-Shun}, title = {Attraction Diminishing and Distributing for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25657-25666} }
4DTAM: Non-Rigid Tracking and Mapping via Dynamic Surface Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Matsuki_2025_CVPR, author = {Matsuki, Hidenobu and Bae, Gwangbin and Davison, Andrew J.}, title = {4DTAM: Non-Rigid Tracking and Mapping via Dynamic Surface Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26921-26932} }
T2SG: Traffic Topology Scene Graph for Topology Reasoning in Autonomous Driving-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lv_2025_CVPR, author = {Lv, Changsheng and Qi, Mengshi and Liu, Liang and Ma, Huadong}, title = {T2SG: Traffic Topology Scene Graph for Topology Reasoning in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17197-17206} }
Unseen Visual Anomaly Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Han and Cao, Yunkang and Dong, Hao and Fink, Olga}, title = {Unseen Visual Anomaly Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25508-25517} }
T2ICount: Enhancing Cross-modal Understanding for Zero-Shot Counting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qian_2025_CVPR, author = {Qian, Yifei and Guo, Zhongliang and Deng, Bowen and Lei, Chun Tong and Zhao, Shuai and Lau, Chun Pong and Hong, Xiaopeng and Pound, Michael P.}, title = {T2ICount: Enhancing Cross-modal Understanding for Zero-Shot Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25336-25345} }
RealEdit: Reddit Edits As a Large-scale Empirical Dataset for Image Transformations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sushko_2025_CVPR, author = {Sushko, Peter and Bharadwaj, Ayana and Lim, Zhi Yang and Ilin, Vasily and Caffee, Ben and Chen, Dongping and Salehi, Mohammadreza and Hsieh, Cheng-Yu and Krishna, Ranjay}, title = {RealEdit: Reddit Edits As a Large-scale Empirical Dataset for Image Transformations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13403-13413} }
VideoScene: Distilling Video Diffusion Model to Generate 3D Scenes in One Step-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Hanyang and Liu, Fangfu and Chi, Jiawei and Duan, Yueqi}, title = {VideoScene: Distilling Video Diffusion Model to Generate 3D Scenes in One Step}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16475-16485} }
3D-HGS: 3D Half-Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Haolin and Liu, Jinyang and Sznaier, Mario and Camps, Octavia}, title = {3D-HGS: 3D Half-Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10996-11005} }
FG^2: Fine-Grained Cross-View Localization by Fine-Grained Feature Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Zimin and Alahi, Alexandre}, title = {FG{\textasciicircum}2: Fine-Grained Cross-View Localization by Fine-Grained Feature Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6362-6372} }
ReNeg: Learning Negative Embedding with Reward Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xiaomin and Liu, Yixuan and Isobe, Takashi and Jia, Xu and Cui, Qinpeng and Zhou, Dong and Li, Dong and He, You and Lu, Huchuan and Wang, Zhongdao and Barsoum, Emad}, title = {ReNeg: Learning Negative Embedding with Reward Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23636-23645} }
Scale Efficient Training for Large Datasets-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Qing and Gao, Junyu and Wang, Qi}, title = {Scale Efficient Training for Large Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20458-20467} }
Distilled Prompt Learning for Incomplete Multimodal Survival Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yingxue and Zhou, Fengtao and Zhao, Chenyu and Wang, Yihui and Yang, Can and Chen, Hao}, title = {Distilled Prompt Learning for Incomplete Multimodal Survival Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5102-5111} }
Decoder Gradient Shield: Provable and High-Fidelity Prevention of Gradient-Based Box-Free Watermark Removal-
[pdf]
[arXiv]
[bibtex]@InProceedings{An_2025_CVPR, author = {An, Haonan and Hua, Guang and Fang, Zhengru and Xu, Guowen and Rahardja, Susanto and Fang, Yuguang}, title = {Decoder Gradient Shield: Provable and High-Fidelity Prevention of Gradient-Based Box-Free Watermark Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13424-13433} }
MotionPro: A Precise Motion Controller for Image-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhongwei and Long, Fuchen and Qiu, Zhaofan and Pan, Yingwei and Liu, Wu and Yao, Ting and Mei, Tao}, title = {MotionPro: A Precise Motion Controller for Image-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27957-27967} }
Goku: Flow Based Video Generative Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Shoufa and Ge, Chongjian and Zhang, Yuqi and Zhang, Yida and Zhu, Fengda and Yang, Hao and Hao, Hongxiang and Wu, Hui and Lai, Zhichao and Hu, Yifei and Lin, Ting-Che and Zhang, Shilong and Li, Fu and Li, Chuan and Wang, Xing and Peng, Yanghua and Sun, Peize and Luo, Ping and Jiang, Yi and Yuan, Zehuan and Peng, Bingyue and Liu, Xiaobing}, title = {Goku: Flow Based Video Generative Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23516-23527} }
Learning Conditional Space-Time Prompt Distributions for Video Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2025_CVPR, author = {Zou, Xiaohan and Ma, Wenchao and Zhao, Shu}, title = {Learning Conditional Space-Time Prompt Distributions for Video Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4862-4873} }
Convex Combination Star Shape Prior for Data-driven Image Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Xinyu and Xie, Jun and Chen, Shengzhe and Liu, Jun}, title = {Convex Combination Star Shape Prior for Data-driven Image Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14068-14077} }
Hyperbolic Safety-Aware Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Poppi_2025_CVPR, author = {Poppi, Tobia and Kasarla, Tejaswi and Mettes, Pascal and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Hyperbolic Safety-Aware Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4222-4232} }
WISH: Weakly Supervised Instance Segmentation using Heterogeneous Labels-
[pdf]
[supp]
[bibtex]@InProceedings{Kweon_2025_CVPR, author = {Kweon, Hyeokjun and Yoon, Kuk-Jin}, title = {WISH: Weakly Supervised Instance Segmentation using Heterogeneous Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25377-25387} }
SinGS: Animatable Single-Image Human Gaussian Splats with Kinematic Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Yufan and Chen, Xuanhong and Li, Wen and Jia, Shunran and Wei, Hualiang and Feng, Kairui and Chen, Jialiang and Li, Yuhan and He, Ang and Zhang, Weimin and Ni, Bingbing and Zhang, Wenjun}, title = {SinGS: Animatable Single-Image Human Gaussian Splats with Kinematic Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5571-5580} }
Parameter-efficient Fine-tuning in Hyperspherical Space for Open-vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Zelin and Xu, Zhengqin and Zeng, Zhilin and Huang, Yu and Wang, Yaoming and Shen, Wei}, title = {Parameter-efficient Fine-tuning in Hyperspherical Space for Open-vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15009-15020} }
Relative Pose Estimation through Affine Corrections of Monocular Depth Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Yifan and Liu, Shaohui and Pautrat, R\'emi and Pollefeys, Marc and Larsson, Viktor}, title = {Relative Pose Estimation through Affine Corrections of Monocular Depth Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16706-16716} }
Zero-1-to-A: Zero-Shot One Image to Animatable Head Avatars Using Video Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zhenglin and Ma, Fan and Fan, Hehe and Chua, Tat-Seng}, title = {Zero-1-to-A: Zero-Shot One Image to Animatable Head Avatars Using Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15941-15952} }
Occlusion-aware Text-Image-Point Cloud Pretraining for Open-World 3D Object Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Khanh and Hassan, Ghulam Mubashar and Mian, Ajmal}, title = {Occlusion-aware Text-Image-Point Cloud Pretraining for Open-World 3D Object Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16965-16975} }
Conical Visual Concentration for Efficient Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Long and Huang, Qidong and Dong, Xiaoyi and Lu, Jiajie and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and He, Conghui and Wang, Jiaqi and Wu, Feng and Lin, Dahua}, title = {Conical Visual Concentration for Efficient Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14593-14603} }
Good, Cheap, and Fast: Overfitted Image Compression with Wasserstein Distortion-
[pdf]
[supp]
[bibtex]@InProceedings{Balle_2025_CVPR, author = {Ball\'e, Jona and Versari, Luca and Dupont, Emilien and Kim, Hyunjik and Bauer, Matthias}, title = {Good, Cheap, and Fast: Overfitted Image Compression with Wasserstein Distortion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23259-23268} }
Period-LLM: Extending the Periodic Capability of Multimodal Large Language Model-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yuting and Lu, Hao and Hu, Qingyong and Wang, Yin and Yuan, Kaishen and Liu, Xin and Wu, Kaishun}, title = {Period-LLM: Extending the Periodic Capability of Multimodal Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29237-29247} }
V2X-R: Cooperative LiDAR-4D Radar Fusion with Denoising Diffusion for 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Xun and Wang, Jinlong and Xia, Qiming and Chen, Siheng and Yang, Bisheng and Li, Xin and Wang, Cheng and Wen, Chenglu}, title = {V2X-R: Cooperative LiDAR-4D Radar Fusion with Denoising Diffusion for 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27390-27400} }
Multi-Modal Synergistic Implicit Image Enhancement for Efficient Optical Flow Estimation-
[pdf]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Weichen and Wu, Hexing and Weng, Xiaoyang and Zheng, Yuxin and Ming, Yuhang and Kong, Wanzeng}, title = {Multi-Modal Synergistic Implicit Image Enhancement for Efficient Optical Flow Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2173-2182} }
TAROT: Towards Essentially Domain-Invariant Robustness with Theoretical Justification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Dongyoon and Lee, Jihu and Kim, Yongdai}, title = {TAROT: Towards Essentially Domain-Invariant Robustness with Theoretical Justification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25780-25789} }
Foundations of the Theory of Performance-Based Ranking-
[pdf]
[supp]
[bibtex]@InProceedings{Pierard_2025_CVPR, author = {Pi\'erard, S\'ebastien and Halin, Ana{\"\i}s and Cioppa, Anthony and Deliege, Adrien and Van Droogenbroeck, Marc}, title = {Foundations of the Theory of Performance-Based Ranking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14293-14302} }
Unveiling the Mist over 3D Vision-Language Understanding: Object-centric Evaluation with Chain-of-Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Jiangyong and Jia, Baoxiong and Wang, Yan and Zhu, Ziyu and Linghu, Xiongkun and Li, Qing and Zhu, Song-Chun and Huang, Siyuan}, title = {Unveiling the Mist over 3D Vision-Language Understanding: Object-centric Evaluation with Chain-of-Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24570-24581} }
Generating Multimodal Driving Scenes via Next-Scene Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Yanhao and Zhang, Haoyang and Lin, Tianwei and Huang, Lichao and Luo, Shujie and Wu, Rui and Qiu, Congpei and Ke, Wei and Zhang, Tong}, title = {Generating Multimodal Driving Scenes via Next-Scene Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6844-6853} }
BIGS: Bimanual Category-agnostic Interaction Reconstruction from Monocular Videos via 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{On_2025_CVPR, author = {On, Jeongwan and Gwak, Kyeonghwan and Kang, Gunyoung and Cha, Junuk and Hwang, Soohyun and Hwang, Hyein and Baek, Seungryul}, title = {BIGS: Bimanual Category-agnostic Interaction Reconstruction from Monocular Videos via 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17437-17447} }
APT: Adaptive Personalized Training for Diffusion Models with Limited Data-
[pdf]
[supp]
[bibtex]@InProceedings{Chae_2025_CVPR, author = {Chae, JungWoo and Kim, Jiyoon and Choi, JaeWoong and Kim, Kyungyul and Hwang, Sangheum}, title = {APT: Adaptive Personalized Training for Diffusion Models with Limited Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28619-28628} }
Symmetry Strikes Back: From Single-Image Symmetry Detection to 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xiang and Huang, Zixuan and Thai, Anh and Rehg, James M.}, title = {Symmetry Strikes Back: From Single-Image Symmetry Detection to 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {743-752} }
Frequency-Biased Synergistic Design for Image Compression and Compensation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jiaming and Zheng, Qi and Liu, Zihao and Zhong, Yilian and Liu, Peiye and Liu, Tao and Xu, Shusong and Lu, Yanheng and Li, Sicheng and Niu, Dimin and Fan, Yibo}, title = {Frequency-Biased Synergistic Design for Image Compression and Compensation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12820-12829} }
PosterMaker: Towards High-Quality Product Poster Generation with Accurate Text Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Yifan and Lin, Zihang and Liu, Chuanbin and Zhou, Min and Ge, Tiezheng and Zheng, Bo and Xie, Hongtao}, title = {PosterMaker: Towards High-Quality Product Poster Generation with Accurate Text Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8083-8093} }
Sparse Voxels Rasterization: Real-time High-fidelity Radiance Field Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Cheng and Choe, Jaesung and Loop, Charles and Ma, Wei-Chiu and Wang, Yu-Chiang Frank}, title = {Sparse Voxels Rasterization: Real-time High-fidelity Radiance Field Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16187-16196} }
Rethinking Personalized Aesthetics Assessment: Employing Physique Aesthetics Assessment as An Exemplification-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2025_CVPR, author = {Zhong, Haobin and He, Shuai and Ming, Anlong and Ma, Huadong}, title = {Rethinking Personalized Aesthetics Assessment: Employing Physique Aesthetics Assessment as An Exemplification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2935-2944} }
You See it, You Got it: Learning 3D Creation on Pose-Free Videos at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Baorui and Gao, Huachen and Deng, Haoge and Luo, Zhengxiong and Huang, Tiejun and Tang, Lulu and Wang, Xinlong}, title = {You See it, You Got it: Learning 3D Creation on Pose-Free Videos at Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2016-2029} }
MambaIC: State Space Models for High-Performance Learned Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_CVPR, author = {Zeng, Fanhu and Tang, Hao and Shao, Yihua and Chen, Siyu and Shao, Ling and Wang, Yan}, title = {MambaIC: State Space Models for High-Performance Learned Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18041-18050} }
SCAP: Transductive Test-Time Adaptation via Supportive Clique-based Attribute Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chenyu and Xu, Kunlun and Liu, Zichen and Peng, Yuxin and Zhou, Jiahuan}, title = {SCAP: Transductive Test-Time Adaptation via Supportive Clique-based Attribute Prompting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30032-30041} }
Instant Gaussian Stream: Fast and Generalizable Streaming of Dynamic Scene Reconstruction via Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Jinbo and Peng, Rui and Wang, Zhiyan and Tang, Luyang and Yang, Jiayu and Liang, Jie and Wu, Jiahao and Wang, Ronggang}, title = {Instant Gaussian Stream: Fast and Generalizable Streaming of Dynamic Scene Reconstruction via Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16520-16531} }
Locality-Aware Zero-Shot Human-Object Interaction Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Sanghyun and Jung, Deunsol and Cho, Minsu}, title = {Locality-Aware Zero-Shot Human-Object Interaction Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20190-20200} }
PEACE: Empowering Geologic Map Holistic Understanding with MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yangyu and Gao, Tianyi and Xu, Haoran and Zhao, Qihao and Song, Yang and Gui, Zhipeng and Lv, Tengchao and Chen, Hao and Cui, Lei and Li, Scarlett and Wei, Furu}, title = {PEACE: Empowering Geologic Map Holistic Understanding with MLLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3899-3908} }
Tracktention: Leveraging Point Tracking to Attend Videos Faster and Better-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Zihang and Vedaldi, Andrea}, title = {Tracktention: Leveraging Point Tracking to Attend Videos Faster and Better}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22809-22819} }
ConceptGuard: Continual Personalized Text-to-Image Generation with Forgetting and Confusion Mitigation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Zirun and Jin, Tao}, title = {ConceptGuard: Continual Personalized Text-to-Image Generation with Forgetting and Confusion Mitigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2945-2954} }
Two by Two: Learning Multi-Task Pairwise Objects Assembly for Generalizable Robot Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2025_CVPR, author = {Qi, Yu and Ju, Yuanchen and Wei, Tianming and Chu, Chi and Wong, Lawson L.S. and Xu, Huazhe}, title = {Two by Two: Learning Multi-Task Pairwise Objects Assembly for Generalizable Robot Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17383-17393} }
SGFormer: Satellite-Ground Fusion for 3D Semantic Scene Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Xiyue and Hu, Jiarui and Hu, Junjie and Bao, Hujun and Zhang, Guofeng}, title = {SGFormer: Satellite-Ground Fusion for 3D Semantic Scene Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11929-11938} }
MARVEL-40M+: Multi-Level Visual Elaboration for High-Fidelity Text-to-3D Content Creation-
[pdf]
[supp]
[bibtex]@InProceedings{Sinha_2025_CVPR, author = {Sinha, Sankalp and Khan, Mohammad Sadil and Usama, Muhammad and Sam, Shino and Stricker, Didier and Ali, Sk Aziz and Afzal, Muhammad Zeshan}, title = {MARVEL-40M+: Multi-Level Visual Elaboration for High-Fidelity Text-to-3D Content Creation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8105-8116} }
Random Conditioning for Diffusion Model Compression with Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Dohyun and Park, Sehwan and Han, Geonhee and Kim, Seung Wook and Seo, Paul Hongsuck}, title = {Random Conditioning for Diffusion Model Compression with Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18607-18618} }
Hierarchical Gaussian Mixture Model Splatting for Efficient and Part Controllable 3D Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Qitong and Feng, Mingtao and Wu, Zijie and Dong, Weisheng and Wu, Fangfang and Wang, Yaonan and Mian, Ajmal}, title = {Hierarchical Gaussian Mixture Model Splatting for Efficient and Part Controllable 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11104-11114} }
ERUPT: Efficient Rendering with Unposed Patch Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shugaev_2025_CVPR, author = {Shugaev, Maxim V. and Chen, Vincent and Karrenbach, Maxim and Ashley, Kyle and Kennedy, Bridget and Cuntoor, Naresh P.}, title = {ERUPT: Efficient Rendering with Unposed Patch Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6057-6067} }
Rethinking End-to-End 2D to 3D Scene Segmentation in Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Runsong and Qiu, Shi and Liu, Zhengzhe and Hui, Ka-Hei and Wu, Qianyi and Heng, Pheng-Ann and Fu, Chi-Wing}, title = {Rethinking End-to-End 2D to 3D Scene Segmentation in Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3656-3665} }
Quad-Pixel Image Defocus Deblurring: A New Benchmark and Model-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Hang and Xie, Yin and Peng, Xiaoxiu and Sun, Lihu and Su, Wenkai and Yang, Xiaodong and Liu, Chengming}, title = {Quad-Pixel Image Defocus Deblurring: A New Benchmark and Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5709-5719} }
DocVLM: Make Your VLM an Efficient Reader-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nacson_2025_CVPR, author = {Nacson, Mor Shpigel and Aberdam, Aviad and Ganz, Roy and Ben Avraham, Elad and Golts, Alona and Kittenplon, Yair and Mazor, Shai and Litman, Ron}, title = {DocVLM: Make Your VLM an Efficient Reader}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29005-29015} }
Revisiting Source-Free Domain Adaptation: Insights into Representativeness, Generalization, and Variety-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Ronghang and Hu, Mengxuan and Zhuang, Weiming and Lyu, Lingjuan and Yu, Xiang and Li, Sheng}, title = {Revisiting Source-Free Domain Adaptation: Insights into Representativeness, Generalization, and Variety}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25688-25697} }
Adaptive Unimodal Regulation for Balanced Multimodal Information Acquisition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Chengxiang and Wei, Yake and Yang, Zequn and Hu, Di}, title = {Adaptive Unimodal Regulation for Balanced Multimodal Information Acquisition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25854-25863} }
Heterogeneous Skeleton-Based Action Representation Learning-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Hongsong and Ma, Xiaoyan and Kuang, Jidong and Gui, Jie}, title = {Heterogeneous Skeleton-Based Action Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19154-19164} }
FLARE: Feed-forward Geometry, Appearance and Camera Estimation from Uncalibrated Sparse Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shangzhan and Wang, Jianyuan and Xu, Yinghao and Xue, Nan and Rupprecht, Christian and Zhou, Xiaowei and Shen, Yujun and Wetzstein, Gordon}, title = {FLARE: Feed-forward Geometry, Appearance and Camera Estimation from Uncalibrated Sparse Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21936-21947} }
Improving Gaussian Splatting with Localized Points Management-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Haosen and Zhang, Chenhao and Wang, Wenqing and Volino, Marco and Hilton, Adrian and Zhang, Li and Zhu, Xiatian}, title = {Improving Gaussian Splatting with Localized Points Management}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21696-21705} }
GEAL: Generalizable 3D Affordance Learning with Cross-Modal Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Dongyue and Kong, Lingdong and Huang, Tianxin and Lee, Gim Hee}, title = {GEAL: Generalizable 3D Affordance Learning with Cross-Modal Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1680-1690} }
Dynamic Derivation and Elimination: Audio Visual Segmentation with Enhanced Audio Semantics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Chen and Yang, Liying and Li, Peike and Wang, Dadong and Li, Lincheng and Yu, Xin}, title = {Dynamic Derivation and Elimination: Audio Visual Segmentation with Enhanced Audio Semantics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3131-3141} }
AnyMap: Learning a General Camera Model for Structure-from-Motion with Unknown Distortion in Dynamic Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Cin_2025_CVPR, author = {Cin, Andrea Porfiri Dal and Dikov, Georgi and Ju, Jihong and Ghafoorian, Mohsen}, title = {AnyMap: Learning a General Camera Model for Structure-from-Motion with Unknown Distortion in Dynamic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16674-16684} }
ESC: Erasing Space Concept for Knowledge Deletion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Tae-Young and Park, Sundong and Jeon, Minwoo and Hwang, Hyoseok and Park, Gyeong-Moon}, title = {ESC: Erasing Space Concept for Knowledge Deletion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5010-5019} }
Language Guided Concept Bottleneck Models for Interpretable Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Lu and Han, Haoyu and Tao, Zhe and Yao, Hantao and Xu, Changsheng}, title = {Language Guided Concept Bottleneck Models for Interpretable Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14976-14986} }
One-Way Ticket: Time-Independent Unified Encoder for Distilling Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Senmao and Wang, Lei and Wang, Kai and Liu, Tao and Xie, Jiehang and van de Weijer, Joost and Khan, Fahad Shahbaz and Yang, Shiqi and Wang, Yaxing and Yang, Jian}, title = {One-Way Ticket: Time-Independent Unified Encoder for Distilling Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23563-23574} }
Domain Adaptive Diabetic Retinopathy Grading with Model Absence and Flowing Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2025_CVPR, author = {Su, Wenxin and Tang, Song and Liu, Xiaofeng and Yi, Xiaojing and Ye, Mao and Zu, Chunxiao and Li, Jiahao and Zhu, Xiatian}, title = {Domain Adaptive Diabetic Retinopathy Grading with Model Absence and Flowing Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28337-28346} }
Temporal Separation with Entropy Regularization for Knowledge Distillation in Spiking Neural Networks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Kairong and Yu, Chengting and Zhang, Tianqing and Zhao, Xiaochen and Yang, Shu and Wang, Hongwei and Zhang, Qiang and Xu, Qi}, title = {Temporal Separation with Entropy Regularization for Knowledge Distillation in Spiking Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8806-8816} }
LoRASculpt: Sculpting LoRA for Harmonizing General and Specialized Knowledge in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Jian and Huang, Wenke and Wan, Guancheng and Yang, Qu and Ye, Mang}, title = {LoRASculpt: Sculpting LoRA for Harmonizing General and Specialized Knowledge in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26170-26180} }
SEAL: Semantic Attention Learning for Long Video Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Lan and Chen, Yujia and Tran, Du and Boddeti, Vishnu Naresh and Chu, Wen-Sheng}, title = {SEAL: Semantic Attention Learning for Long Video Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26192-26201} }
Re-HOLD: Video Hand Object Interaction Reenactment via adaptive Layout-instructed Diffusion Model-
[pdf]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Yingying and Yang, Quanwei and Wang, Kaisiyuan and Zhou, Hang and Li, Yingying and Feng, Haocheng and Ding, Errui and Wu, Yu and Wang, Jingdong}, title = {Re-HOLD: Video Hand Object Interaction Reenactment via adaptive Layout-instructed Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17550-17560} }
Theoretical Insights in Model Inversion Robustness and Conditional Entropy Maximization for Collaborative Inference Systems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Song and Yu, Yi and Yang, Wenhan and Ding, Meiwen and Chen, Zhuo and Duan, Ling-Yu and Kot, Alex C. and Jiang, Xudong}, title = {Theoretical Insights in Model Inversion Robustness and Conditional Entropy Maximization for Collaborative Inference Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8753-8763} }
Odd-One-Out: Anomaly Detection by Comparing with Neighbors-
[pdf]
[supp]
[bibtex]@InProceedings{Bhunia_2025_CVPR, author = {Bhunia, Ankan and Li, Changjian and Bilen, Hakan}, title = {Odd-One-Out: Anomaly Detection by Comparing with Neighbors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20395-20404} }
SCFlow2: Plug-and-Play Object Pose Refiner with Shape-Constraint Scene Flow-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Qingyuan and Song, Rui and Li, Jiaojiao and Cheng, Kerui and Ferstl, David and Hu, Yinlin}, title = {SCFlow2: Plug-and-Play Object Pose Refiner with Shape-Constraint Scene Flow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22045-22054} }
D^3CTTA: Domain-Dependent Decorrelation for Continual Test-Time Adaption of 3D LiDAR Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jichun and Jiang, Haiyong and Song, Haoxuan and Xiao, Jun and Gong, Dong}, title = {D{\textasciicircum}3CTTA: Domain-Dependent Decorrelation for Continual Test-Time Adaption of 3D LiDAR Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11864-11874} }
Flowing from Words to Pixels: A Noise-Free Framework for Cross-Modality Evolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Qihao and Yin, Xi and Yuille, Alan and Brown, Andrew and Singh, Mannat}, title = {Flowing from Words to Pixels: A Noise-Free Framework for Cross-Modality Evolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2755-2765} }
FlipSketch: Flipping Static Drawings to Text-Guided Sketch Animations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bandyopadhyay_2025_CVPR, author = {Bandyopadhyay, Hmrishav and Song, Yi-Zhe}, title = {FlipSketch: Flipping Static Drawings to Text-Guided Sketch Animations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28394-28404} }
Interpretable Generative Models through Post-hoc Concept Bottlenecks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kulkarni_2025_CVPR, author = {Kulkarni, Akshay and Yan, Ge and Sun, Chung-En and Oikarinen, Tuomas and Weng, Tsui-Wei}, title = {Interpretable Generative Models through Post-hoc Concept Bottlenecks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8162-8171} }
SketchAgent: Language-Driven Sequential Sketch Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vinker_2025_CVPR, author = {Vinker, Yael and Shaham, Tamar Rott and Zheng, Kristine and Zhao, Alex and E Fan, Judith and Torralba, Antonio}, title = {SketchAgent: Language-Driven Sequential Sketch Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23355-23368} }
DRAWER: Digital Reconstruction and Articulation With Environment Realism-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Hongchi and Su, Entong and Memmel, Marius and Jain, Arhan and Yu, Raymond and Mbiziwo-Tiapo, Numfor and Farhadi, Ali and Gupta, Abhishek and Wang, Shenlong and Ma, Wei-Chiu}, title = {DRAWER: Digital Reconstruction and Articulation With Environment Realism}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21771-21782} }
GoLF-NRT: Integrating Global Context and Local Geometry for Few-Shot View Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, You and Fang, Li and Zhu, Hao and Hu, Fei and Ye, Long and Ma, Zhan}, title = {GoLF-NRT: Integrating Global Context and Local Geometry for Few-Shot View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21349-21359} }
Deep Change Monitoring: A Hyperbolic Representative Learning Framework and a Dataset for Long-term Fine-grained Tree Change Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yante and Qi, Hanwen and Chen, Haoyu and Liang, Xinlian and Zhao, Guoying}, title = {Deep Change Monitoring: A Hyperbolic Representative Learning Framework and a Dataset for Long-term Fine-grained Tree Change Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27346-27356} }
A Closer Look at Time Steps is Worthy of Triple Speed-Up for Diffusion Model Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Kai and Shi, Mingjia and Zhou, Yukun and Li, Zekai and Yuan, Zhihang and Shang, Yuzhang and Peng, Xiaojiang and Zhang, Hanwang and You, Yang}, title = {A Closer Look at Time Steps is Worthy of Triple Speed-Up for Diffusion Model Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12934-12944} }
Empowering LLMs to Understand and Generate Complex Vector Graphics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Ximing and Hu, Juncheng and Liang, Guotao and Zhang, Jing and Xu, Dong and Yu, Qian}, title = {Empowering LLMs to Understand and Generate Complex Vector Graphics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19487-19497} }
PanoGS: Gaussian-based Panoptic Segmentation for 3D Open Vocabulary Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhai_2025_CVPR, author = {Zhai, Hongjia and Li, Hai and Li, Zhenzhe and Pan, Xiaokun and He, Yijia and Zhang, Guofeng}, title = {PanoGS: Gaussian-based Panoptic Segmentation for 3D Open Vocabulary Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14114-14124} }
Watermarking One for All: A Robust Watermarking Scheme Against Partial Image Theft-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Gaozhi and Cao, Silu and Qian, Zhenxing and Zhang, Xinpeng and Li, Sheng and Peng, Wanli}, title = {Watermarking One for All: A Robust Watermarking Scheme Against Partial Image Theft}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8225-8234} }
ITA-MDT: Image-Timestep-Adaptive Masked Diffusion Transformer Framework for Image-Based Virtual Try-On-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2025_CVPR, author = {Hong, Ji Woo and Ton, Tri and Pham, Trung X. and Koo, Gwanhyeong and Yoon, Sunjae and Yoo, Chang D.}, title = {ITA-MDT: Image-Timestep-Adaptive Masked Diffusion Transformer Framework for Image-Based Virtual Try-On}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28284-28294} }
MultiVENT 2.0: A Massive Multilingual Benchmark for Event-Centric Video Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Kriz_2025_CVPR, author = {Kriz, Reno and Sanders, Kate and Etter, David and Murray, Kenton and Carpenter, Cameron and Recknor, Hannah and Guallar-Blasco, Jimena and Martin, Alexander and Yang, Eugene and Van Durme, Benjamin}, title = {MultiVENT 2.0: A Massive Multilingual Benchmark for Event-Centric Video Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24149-24158} }
VolFormer: Explore More Comprehensive Cube Interaction for Hyperspectral Image Restoration and Beyond-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Dabing and Gao, Zheng}, title = {VolFormer: Explore More Comprehensive Cube Interaction for Hyperspectral Image Restoration and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28091-28101} }
Minding Fuzzy Regions: A Data-driven Alternating Learning Paradigm for Stable Lesion Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Lexin and Xu, Yunyang and Ma, Xiang and Li, Xuemei and Zhang, Caiming}, title = {Minding Fuzzy Regions: A Data-driven Alternating Learning Paradigm for Stable Lesion Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10425-10434} }
BizGen: Advancing Article-level Visual Text Rendering for Infographics Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Yuyang and Xiao, Shishi and Wu, Keming and Liao, Qisheng and Chen, Bohan and Lin, Kevin and Huang, Danqing and Li, Ji and Yuan, Yuhui}, title = {BizGen: Advancing Article-level Visual Text Rendering for Infographics Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23615-23624} }
MLVU: Benchmarking Multi-task Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Junjie and Shu, Yan and Zhao, Bo and Wu, Boya and Liang, Zhengyang and Xiao, Shitao and Qin, Minghao and Yang, Xi and Xiong, Yongping and Zhang, Bo and Huang, Tiejun and Liu, Zheng}, title = {MLVU: Benchmarking Multi-task Long Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13691-13701} }
Recovering Dynamic 3D Sketches from Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Jaeah and Choi, Changwoon and Kim, Young Min and Park, Jaesik}, title = {Recovering Dynamic 3D Sketches from Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12423-12432} }
IM-Zero: Instance-level Motion Controllable Video Generation in a Zero-shot Manner-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yuyang and Chen, Yabo and Ding, Li and Zhang, Xiaopeng and Dai, Wenrui and Zou, Junni and Xiong, Hongkai and Tian, Qi}, title = {IM-Zero: Instance-level Motion Controllable Video Generation in a Zero-shot Manner}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7265-7275} }
EigenGS Representation: From Eigenspace to Gaussian Image Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tai_2025_CVPR, author = {Tai, Lo-Wei and Li, Ching-En and Chen, Cheng-Lin and Tsai, Chih-Jung and Chen, Hwann-Tzong and Liu, Tyng-Luh}, title = {EigenGS Representation: From Eigenspace to Gaussian Image Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13487-13496} }
Link-based Contrastive Learning for One-Shot Unsupervised Domain Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yue and Bin, Mingyue and Zhang, Yuyang and Wang, Zhongyuan and Han, Zhen and Liang, Chao}, title = {Link-based Contrastive Learning for One-Shot Unsupervised Domain Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4916-4926} }
SmartCLIP: Modular Vision-language Alignment with Identification Guarantees-
[pdf]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Shaoan and Lingjing, Lingjing and Zheng, Yujia and Yao, Yu and Tang, Zeyu and Xing, Eric P. and Chen, Guangyi and Zhang, Kun}, title = {SmartCLIP: Modular Vision-language Alignment with Identification Guarantees}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29780-29790} }
UniMamba: Unified Spatial-Channel Representation Learning with Group-Efficient Mamba for LiDAR-based 3D Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Xin and Su, Haisheng and Liu, Kai and Ma, Cong and Wu, Wei and HUI, Fei and Yan, Junchi}, title = {UniMamba: Unified Spatial-Channel Representation Learning with Group-Efficient Mamba for LiDAR-based 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1407-1417} }
MaSS13K: A Matting-level Semantic Segmentation Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Chenxi and Li, Minghan and Zeng, Hui and Luo, Jun and Zhang, Lei}, title = {MaSS13K: A Matting-level Semantic Segmentation Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14046-14056} }
Rethinking the Adversarial Robustness of Multi-Exit Neural Networks in an Attack-Defense Game-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Keyizhi and Zhang, Chi and Chen, Zhan and Wang, Zhongyuan and Xiao, Chunxia and Liang, Chao}, title = {Rethinking the Adversarial Robustness of Multi-Exit Neural Networks in an Attack-Defense Game}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10265-10274} }
Enhancing Testing-Time Robustness for Trusted Multi-View Classification in the Wild-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Wei and Chen, Yufei and Yue, Xiaodong}, title = {Enhancing Testing-Time Robustness for Trusted Multi-View Classification in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15508-15517} }
Q-DiT: Accurate Post-Training Quantization for Diffusion Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Lei and Meng, Yuan and Tang, Chen and Ma, Xinzhu and Jiang, Jingyan and Wang, Xin and Wang, Zhi and Zhu, Wenwu}, title = {Q-DiT: Accurate Post-Training Quantization for Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28306-28315} }
ROD-MLLM: Towards More Reliable Object Detection in Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Heng and Ren, Yuqiang and Yan, Ke and Ding, Shouhong and Hao, Yongtao}, title = {ROD-MLLM: Towards More Reliable Object Detection in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14358-14368} }
RoboGround: Robotic Manipulation with Grounded Vision-Language Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Haifeng and Chen, Xinyi and Chen, Yilun and Li, Hao and Han, Xiaoshen and Wang, Zehan and Wang, Tai and Pang, Jiangmiao and Zhao, Zhou}, title = {RoboGround: Robotic Manipulation with Grounded Vision-Language Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22540-22550} }
VideoGuide: Improving Video Diffusion Models without Training Through a Teacher's Guide-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Dohun and Kim, Bryan Sangwoo and Park, Geon Yeong and Ye, Jong Chul}, title = {VideoGuide: Improving Video Diffusion Models without Training Through a Teacher's Guide}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2599-2608} }
Improving Transferable Targeted Attacks with Feature Tuning Mixup-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Kaisheng and Dai, Xuelong and Li, Yanjie and Wang, Dong and Xiao, Bin}, title = {Improving Transferable Targeted Attacks with Feature Tuning Mixup}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25802-25811} }
OmniStereo: Real-time Omnidireactional Depth Estimation with Multiview Fisheye Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Jiaxi and Wang, Yushen and Meng, Haitao and Hou, Zuoxun and Chang, Yi and Chen, Gang}, title = {OmniStereo: Real-time Omnidireactional Depth Estimation with Multiview Fisheye Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1003-1012} }
DroneSplat: 3D Gaussian Splatting for Robust 3D Reconstruction from In-the-Wild Drone Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Jiadong and Gao, Yu and Yang, Dianyi and Yan, Liqi and Yue, Yufeng and Yang, Yi}, title = {DroneSplat: 3D Gaussian Splatting for Robust 3D Reconstruction from In-the-Wild Drone Imagery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {833-843} }
SDGOCC: Semantic and Depth-Guided Bird's-Eye View Transformation for 3D Multimodal Occupancy Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2025_CVPR, author = {Duan, ZaiPeng and Dang, ChenXu and Hu, Xuzhong and An, Pei and Ding, Junfeng and Zhan, Jie and Xu, YunBiao and Ma, Jie}, title = {SDGOCC: Semantic and Depth-Guided Bird's-Eye View Transformation for 3D Multimodal Occupancy Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6751-6760} }
DrivingSphere: Building a High-fidelity 4D World for Closed-loop Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Tianyi and Wu, Dongming and Han, Wencheng and Jiang, Junpeng and Zhou, Xia and Zhan, Kun and Xu, Cheng-zhong and Shen, Jianbing}, title = {DrivingSphere: Building a High-fidelity 4D World for Closed-loop Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27531-27541} }
nnWNet: Rethinking the Use of Transformers in Biomedical Image Segmentation and Calling for a Unified Evaluation Benchmark-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yanfeng and Li, Lingrui and Lu, Le and Xu, Minfeng}, title = {nnWNet: Rethinking the Use of Transformers in Biomedical Image Segmentation and Calling for a Unified Evaluation Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20852-20862} }
Efficient Video Face Enhancement with Enhanced Spatial-Temporal Consistency-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yutong and Teng, Jiajie and Cao, Jiajiong and Li, Yuming and Ma, Chenguang and Xu, Hongteng and Luo, Dixin}, title = {Efficient Video Face Enhancement with Enhanced Spatial-Temporal Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2183-2193} }
VELOCITI: Benchmarking Video-Language Compositional Reasoning with Strict Entailment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saravanan_2025_CVPR, author = {Saravanan, Darshana and Gupta, Varun and Singh, Darshan and Khan, Zeeshan and Gandhi, Vineet and Tapaswi, Makarand}, title = {VELOCITI: Benchmarking Video-Language Compositional Reasoning with Strict Entailment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18914-18924} }
Seeing is Not Believing: Adversarial Natural Object Optimization for Hard-Label 3D Scene Attacks-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Daizong and Hu, Wei}, title = {Seeing is Not Believing: Adversarial Natural Object Optimization for Hard-Label 3D Scene Attacks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11886-11897} }
IDProtector: An Adversarial Noise Encoder to Protect Against ID-Preserving Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Yiren and Yang, Pei and Ci, Hai and Shou, Mike Zheng}, title = {IDProtector: An Adversarial Noise Encoder to Protect Against ID-Preserving Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3019-3028} }
HuPerFlow: A Comprehensive Benchmark for Human vs. Machine Motion Estimation Comparison-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Yung-Hao and Sun, Zitang and Fukiage, Taiki and Nishida, Shin'ya}, title = {HuPerFlow: A Comprehensive Benchmark for Human vs. Machine Motion Estimation Comparison}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22799-22808} }
LoTUS: Large-Scale Machine Unlearning with a Taste of Uncertainty-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Spartalis_2025_CVPR, author = {Spartalis, Christoforos N. and Semertzidis, Theodoros and Gavves, Efstratios and Daras, Petros}, title = {LoTUS: Large-Scale Machine Unlearning with a Taste of Uncertainty}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10046-10055} }
SleeperMark: Towards Robust Watermark against Fine-Tuning Text-to-image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zilan and Guo, Junfeng and Zhu, Jiacheng and Li, Yiming and Huang, Heng and Chen, Muhao and Tu, Zhengzhong}, title = {SleeperMark: Towards Robust Watermark against Fine-Tuning Text-to-image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8213-8224} }
Lessons and Insights from a Unifying Study of Parameter-Efficient Fine-Tuning (PEFT) in Visual Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mai_2025_CVPR, author = {Mai, Zheda and Zhang, Ping and Tu, Cheng-Hao and Chen, Hong-You and Nguyen, Quang-Huy and Zhang, Li and Chao, Wei-Lun}, title = {Lessons and Insights from a Unifying Study of Parameter-Efficient Fine-Tuning (PEFT) in Visual Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14845-14857} }
Pippo: High-Resolution Multi-View Humans from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kant_2025_CVPR, author = {Kant, Yash and Weber, Ethan and Kim, Jin Kyu and Khirodkar, Rawal and Zhaoen, Su and Martinez, Julieta and Gilitschenski, Igor and Saito, Shunsuke and Bagautdinov, Timur}, title = {Pippo: High-Resolution Multi-View Humans from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16418-16429} }
H2ST: Hierarchical Two-Sample Tests for Continual Out-of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuhang and Zhao, Wenjie and Guo, Yunhui}, title = {H2ST: Hierarchical Two-Sample Tests for Continual Out-of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15413-15423} }
MetaWriter: Personalized Handwritten Text Recognition Using Meta-Learned Prompt Tuning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Wenhao and Gu, Li and Suen, Chingyee Yee and Wang, Yang}, title = {MetaWriter: Personalized Handwritten Text Recognition Using Meta-Learned Prompt Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23494-23504} }
Subnet-Aware Dynamic Supernet Training for Neural Architecture Search-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeon_2025_CVPR, author = {Jeon, Jeimin and Oh, Youngmin and Lee, Junghyup and Baek, Donghyeon and Kim, Dohyung and Eom, Chanho and Ham, Bumsub}, title = {Subnet-Aware Dynamic Supernet Training for Neural Architecture Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30137-30146} }
MoVE-KD: Knowledge Distillation for VLMs with Mixture of Visual Encoders-
[pdf]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Jiajun and Zhang, Yuan and Huang, Tao and Lu, Ming and Zhang, Qizhe and An, Ruichuan and Ma, Ningning and Zhang, Shanghang}, title = {MoVE-KD: Knowledge Distillation for VLMs with Mixture of Visual Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19846-19856} }
CamFreeDiff: Camera-free Image to Panorama Generation with Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Xiaoding and Tang, Shitao and Li, Kejie and Wang, Peng}, title = {CamFreeDiff: Camera-free Image to Panorama Generation with Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16408-16417} }
Improving Visual and Downstream Performance of Low-Light Enhancer with Vision Foundation Models Collaboration-
[pdf]
[supp]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Yuxuan and Wang, Haoxuan and Ling, Pengyang and Wei, Zhixiang and Chen, Huaian and Jin, Yi and Chen, Enhong}, title = {Improving Visual and Downstream Performance of Low-Light Enhancer with Vision Foundation Models Collaboration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16071-16080} }
EchoWorld: Learning Motion-Aware World Models for Echocardiography Probe Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2025_CVPR, author = {Yue, Yang and Wang, Yulin and Jiang, Haojun and Liu, Pan and Song, Shiji and Huang, Gao}, title = {EchoWorld: Learning Motion-Aware World Models for Echocardiography Probe Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25993-26003} }
Controllable Human Image Generation with Personalized Multi-Garments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2025_CVPR, author = {Choi, Yisol and Kwak, Sangkyung and Yu, Sihyun and Choi, Hyungwon and Shin, Jinwoo}, title = {Controllable Human Image Generation with Personalized Multi-Garments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28736-28747} }
FineLIP: Extending CLIP's Reach via Fine-Grained Alignment with Longer Text Inputs-
[pdf]
[supp]
[bibtex]@InProceedings{Asokan_2025_CVPR, author = {Asokan, Mothilal and Wu, Kebin and Albreiki, Fatima}, title = {FineLIP: Extending CLIP's Reach via Fine-Grained Alignment with Longer Text Inputs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14495-14504} }
Illumination Spectrum Estimation for Multispectral Images via Surface Reflectance Modeling and Spatial-Spectral Feature Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Oh_2025_CVPR, author = {Oh, Hyejin and Kim, Woo-Shik and Lee, Sangyoon and Park, YungKyung and Kang, Je-Won}, title = {Illumination Spectrum Estimation for Multispectral Images via Surface Reflectance Modeling and Spatial-Spectral Feature Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2215-2225} }
UHD-processer: Unified UHD Image Restoration with Progressive Frequency Learning and Degradation-aware Prompts-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yidi and Li, Dong and Fu, Xueyang and Lu, Xin and Huang, Jie and Zha, Zheng-Jun}, title = {UHD-processer: Unified UHD Image Restoration with Progressive Frequency Learning and Degradation-aware Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23121-23130} }
Divot: Diffusion Powers Video Tokenizer for Comprehension and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2025_CVPR, author = {Ge, Yuying and Li, Yizhuo and Ge, Yixiao and Shan, Ying}, title = {Divot: Diffusion Powers Video Tokenizer for Comprehension and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13606-13617} }
Towards Zero-Shot Anomaly Detection and Reasoning with Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Jiacong and Lo, Shao-Yuan and Safaei, Bardia and Patel, Vishal M. and Dwivedi, Isht}, title = {Towards Zero-Shot Anomaly Detection and Reasoning with Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20370-20382} }
Towards Explainable and Unprecedented Accuracy in Matching Challenging Finger Crease Patterns-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zhenyu and Dong, Chengdong and Kumar, Ajay}, title = {Towards Explainable and Unprecedented Accuracy in Matching Challenging Finger Crease Patterns}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6212-6221} }
Neural Hierarchical Decomposition for Single Image Plant Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhihao and Cheng, Zhanglin and Yokoya, Naoto}, title = {Neural Hierarchical Decomposition for Single Image Plant Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {733-742} }
GBC-Splat: Generalizable Gaussian-Based Clothed Human Digitalization under Sparse RGB Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Tu_2025_CVPR, author = {Tu, Hanzhang and Liao, Zhanfeng and Zhou, Boyao and Zheng, Shunyuan and Zhou, Xilong and Zhang, Liuxin and Wang, QianYing and Liu, Yebin}, title = {GBC-Splat: Generalizable Gaussian-Based Clothed Human Digitalization under Sparse RGB Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26377-26387} }
AC3D: Analyzing and Improving 3D Camera Control in Video Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bahmani_2025_CVPR, author = {Bahmani, Sherwin and Skorokhodov, Ivan and Qian, Guocheng and Siarohin, Aliaksandr and Menapace, Willi and Tagliasacchi, Andrea and Lindell, David B. and Tulyakov, Sergey}, title = {AC3D: Analyzing and Improving 3D Camera Control in Video Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22875-22889} }
A Unified Model for Compressed Sensing MRI Across Undersampling Patterns-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jatyani_2025_CVPR, author = {Jatyani, Armeet Singh and Wang, Jiayun and Chandrashekar, Aditi and Wu, Zihui and Liu-Schiaffini, Miguel and Tolooshams, Bahareh and Anandkumar, Anima}, title = {A Unified Model for Compressed Sensing MRI Across Undersampling Patterns}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26004-26013} }
Video-Guided Foley Sound Generation with Multimodal Controls-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Ziyang and Seetharaman, Prem and Russell, Bryan and Nieto, Oriol and Bourgin, David and Owens, Andrew and Salamon, Justin}, title = {Video-Guided Foley Sound Generation with Multimodal Controls}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18770-18781} }
Dual-Agent Optimization framework for Cross-Domain Few-Shot Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zhaoyang and Wang, Yuan and Li, Wangkai and Zhang, Tianzhu and Liu, Xiang}, title = {Dual-Agent Optimization framework for Cross-Domain Few-Shot Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9849-9859} }
SACB-Net: Spatial-awareness Convolutions for Medical Image Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Xinxing and Zhang, Tianyang and Lu, Wenqi and Meng, Qingjie and Frangi, Alejandro F. and Duan, Jinming}, title = {SACB-Net: Spatial-awareness Convolutions for Medical Image Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5227-5237} }
Text Embedding is Not All You Need: Attention Control for Text-to-Image Semantic Alignment with Text Self-Attention Maps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Jeeyung and Esmaeili, Erfan and Qiu, Qiang}, title = {Text Embedding is Not All You Need: Attention Control for Text-to-Image Semantic Alignment with Text Self-Attention Maps}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8031-8040} }
DCEvo: Discriminative Cross-Dimensional Evolutionary Learning for Infrared and Visible Image Fusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jinyuan and Zhang, Bowei and Mei, Qingyun and Li, Xingyuan and Zou, Yang and Jiang, Zhiying and Ma, Long and Liu, Risheng and Fan, Xin}, title = {DCEvo: Discriminative Cross-Dimensional Evolutionary Learning for Infrared and Visible Image Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2226-2235} }
TSD-SR: One-Step Diffusion with Target Score Distillation for Real-World Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Linwei and Fan, Qingnan and Guo, Yihong and Wang, Zhonghao and Zhang, Qi and Chen, Jinwei and Luo, Yawei and Zou, Changqing}, title = {TSD-SR: One-Step Diffusion with Target Score Distillation for Real-World Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23174-23184} }
AIpparel: A Multimodal Foundation Model for Digital Garments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nakayama_2025_CVPR, author = {Nakayama, Kiyohiro and Ackermann, Jan and Kesdogan, Timur Levent and Zheng, Yang and Korosteleva, Maria and Sorkine-Hornung, Olga and Guibas, Leonidas J. and Yang, Guandao and Wetzstein, Gordon}, title = {AIpparel: A Multimodal Foundation Model for Digital Garments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8138-8149} }
Fast3R: Towards 3D Reconstruction of 1000+ Images in One Forward Pass-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jianing and Sax, Alexander and Liang, Kevin J. and Henaff, Mikael and Tang, Hao and Cao, Ang and Chai, Joyce and Meier, Franziska and Feiszli, Matt}, title = {Fast3R: Towards 3D Reconstruction of 1000+ Images in One Forward Pass}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21924-21935} }
StyleStudio: Text-Driven Style Transfer with Selective Control of Style Elements-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2025_CVPR, author = {Lei, Mingkun and Song, Xue and Zhu, Beier and Wang, Hao and Zhang, Chi}, title = {StyleStudio: Text-Driven Style Transfer with Selective Control of Style Elements}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23443-23452} }
CTRL-O: Language-Controllable Object-Centric Visual Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Didolkar_2025_CVPR, author = {Didolkar, Aniket and Zadaianchuk, Andrii and Awal, Rabiul and Seitzer, Maximilian and Gavves, Efstratios and Agrawal, Aishwarya}, title = {CTRL-O: Language-Controllable Object-Centric Visual Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29523-29533} }
PO3AD: Predicting Point Offsets toward Better 3D Point Cloud Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Jianan and Zhao, Weiguang and Yang, Xi and Cheng, Guangliang and Huang, Kaizhu}, title = {PO3AD: Predicting Point Offsets toward Better 3D Point Cloud Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1353-1362} }
Text Augmented Correlation Transformer For Few-shot Classification & Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Nandam_2025_CVPR, author = {Nandam, Srinivasa Rao and Atito, Sara and Feng, Zhenhua and Kittler, Josef and Awais, Muhammad}, title = {Text Augmented Correlation Transformer For Few-shot Classification \& Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25357-25366} }
F^3OCUS - Federated Finetuning of Vision-Language Foundation Models with Optimal Client Layer Updating Strategy via Multi-objective Meta-Heuristics-
[pdf]
[supp]
[bibtex]@InProceedings{Saha_2025_CVPR, author = {Saha, Pramit and Wagner, Felix and Mishra, Divyanshu and Peng, Can and Thakur, Anshul and Clifton, David A. and Kamnitsas, Konstantinos and Noble, J. Alison}, title = {F{\textasciicircum}3OCUS - Federated Finetuning of Vision-Language Foundation Models with Optimal Client Layer Updating Strategy via Multi-objective Meta-Heuristics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20006-20017} }
ICT: Image-Object Cross-Level Trusted Intervention for Mitigating Object Hallucination in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Junzhe and Zhang, Tianshu and Huang, Shiyu and Niu, Yuwei and Zhang, Linfeng and Wen, Lijie and Hu, Xuming}, title = {ICT: Image-Object Cross-Level Trusted Intervention for Mitigating Object Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4209-4221} }
PreciseCam: Precise Camera Control for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bernal-Berdun_2025_CVPR, author = {Bernal-Berdun, Edurne and Serrano, Ana and Masia, Belen and Gadelha, Matheus and Hold-Geoffroy, Yannick and Sun, Xin and Gutierrez, Diego}, title = {PreciseCam: Precise Camera Control for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2724-2733} }
3D Occupancy Prediction with Low-Resolution Queries via Prototype-aware View Transformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Oh_2025_CVPR, author = {Oh, Gyeongrok and Kim, Sungjune and Ko, Heeju and Chi, Hyung-gun and Kim, Jinkyu and Lee, Dongwook and Ji, Daehyun and Choi, Sungjoon and Jang, Sujin and Kim, Sangpil}, title = {3D Occupancy Prediction with Low-Resolution Queries via Prototype-aware View Transformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17134-17144} }
Unified Dense Prediction of Video Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Lehan and Qi, Lu and Li, Xiangtai and Li, Sheng and Jampani, Varun and Yang, Ming-Hsuan}, title = {Unified Dense Prediction of Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28963-28973} }
Can Large Vision-Language Models Correct Semantic Grounding Errors By Themselves?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_CVPR, author = {Liao, Yuan-Hong and Mahmood, Rafid and Fidler, Sanja and Acuna, David}, title = {Can Large Vision-Language Models Correct Semantic Grounding Errors By Themselves?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14667-14678} }
SET: Spectral Enhancement for Tiny Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Huixin and Wang, Runqi and Li, Yanjing and Yang, Linlin and Lin, Shaohui and Cao, Xianbin and Zhang, Baochang}, title = {SET: Spectral Enhancement for Tiny Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4713-4723} }
g3D-LF: Generalizable 3D-Language Feature Fields for Embodied Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zihan and Lee, Gim Hee}, title = {g3D-LF: Generalizable 3D-Language Feature Fields for Embodied Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14191-14202} }
Towards Million-Scale Adversarial Robustness Evaluation With Stronger Individual Attacks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Yong and Zheng, Weijie and Huang, Hanxun and Ye, Guangnan and Ma, Xingjun}, title = {Towards Million-Scale Adversarial Robustness Evaluation With Stronger Individual Attacks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30702-30711} }
Temporal Action Detection Model Compression by Progressive Block Drop-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xiaoyong and Guo, Yong and Liang, Jiaming and Zhuang, Sitong and Zeng, Runhao and Hu, Xiping}, title = {Temporal Action Detection Model Compression by Progressive Block Drop}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29225-29236} }
Differentiable Inverse Rendering with Interpretable Basis BRDFs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chung_2025_CVPR, author = {Chung, Hoon-Gyu and Choi, Seokjun and Baek, Seung-Hwan}, title = {Differentiable Inverse Rendering with Interpretable Basis BRDFs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {475-484} }
EquiPose: Exploiting Permutation Equivariance for Relative Camera Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuzhen and Dong, Qiulei}, title = {EquiPose: Exploiting Permutation Equivariance for Relative Camera Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1127-1137} }
Face Forgery Video Detection via Temporal Forgery Cue Unraveling-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Zonghui and Liu, Yingjie and Zhang, Jie and Zheng, Haiyong and Shan, Shiguang}, title = {Face Forgery Video Detection via Temporal Forgery Cue Unraveling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7396-7405} }
Temporally Consistent Object-Centric Learning by Contrasting Slots-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Manasyan_2025_CVPR, author = {Manasyan, Anna and Seitzer, Maximilian and Radovic, Filip and Martius, Georg and Zadaianchuk, Andrii}, title = {Temporally Consistent Object-Centric Learning by Contrasting Slots}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5401-5411} }
MC^2: Multi-concept Guidance for Customized Multi-concept Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Jiaxiu and Zhang, Yabo and Feng, Kailai and Wu, Xiaohe and Li, Wenbo and Pei, Renjing and Li, Fan and Zuo, Wangmeng}, title = {MC{\textasciicircum}2: Multi-concept Guidance for Customized Multi-concept Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2802-2812} }
UniReal: Universal Image Generation and Editing via Learning Real-world Dynamics-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xi and Zhang, Zhifei and Zhang, He and Zhou, Yuqian and Kim, Soo Ye and Liu, Qing and Li, Yijun and Zhang, Jianming and Zhao, Nanxuan and Wang, Yilin and Ding, Hui and Lin, Zhe and Zhao, Hengshuang}, title = {UniReal: Universal Image Generation and Editing via Learning Real-world Dynamics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12501-12511} }
Pursuing Temporal-Consistent Video Virtual Try-On via Dynamic Pose Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Dong and Zhong, Wenqi and Yu, Wei and Pan, Yingwei and Zhang, Dingwen and Yao, Ting and Han, Junwei and Mei, Tao}, title = {Pursuing Temporal-Consistent Video Virtual Try-On via Dynamic Pose Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22648-22657} }
Exploring Contextual Attribute Density in Referring Expression Counting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhicheng and Pan, Zhiyu and Peng, Zhan and Cheng, Jian and Xiao, Liwen and Jiang, Wei and Cao, Zhiguo}, title = {Exploring Contextual Attribute Density in Referring Expression Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19587-19596} }
DINOv2 Meets Text: A Unified Framework for Image- and Pixel-Level Vision-Language Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Jose_2025_CVPR, author = {Jose, Cijo and Moutakanni, Th\'eo and Kang, Dahyun and Baldassarre, Federico and Darcet, Timoth\'ee and Xu, Hu and Li, Daniel and Szafraniec, Marc and Ramamonjisoa, Micha\"el and Oquab, Maxime and Sim\'eoni, Oriane and Vo, Huy V. and Labatut, Patrick and Bojanowski, Piotr}, title = {DINOv2 Meets Text: A Unified Framework for Image- and Pixel-Level Vision-Language Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24905-24916} }
Learning Affine Correspondences by Integrating Geometric Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Pengju and Guan, Banglei and Yu, Zhenbao and Shang, Yang and Yu, Qifeng and Barath, Daniel}, title = {Learning Affine Correspondences by Integrating Geometric Constraints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27038-27048} }
UCOD-DPL: Unsupervised Camouflaged Object Detection via Dynamic Pseudo-label Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Weiqi and Chen, Lvhai and Kou, Huaijia and Zhang, Shengchuan and Zhang, Yan and Cao, Liujuan}, title = {UCOD-DPL: Unsupervised Camouflaged Object Detection via Dynamic Pseudo-label Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30365-30375} }
Geometry in Style: 3D Stylization via Surface Normal Deformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dinh_2025_CVPR, author = {Dinh, Nam Anh and Lang, Itai and Kim, Hyunwoo and Stein, Oded and Hanocka, Rana}, title = {Geometry in Style: 3D Stylization via Surface Normal Deformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28456-28467} }
Multi-modal Vision Pre-training for Medical Image Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rui_2025_CVPR, author = {Rui, Shaohao and Chen, Lingzhi and Tang, Zhenyu and Wang, Lilong and Liu, Mianxin and Zhang, Shaoting and Wang, Xiaosong}, title = {Multi-modal Vision Pre-training for Medical Image Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5164-5174} }
SegMAN: Omni-scale Context Modeling with State Space Models and Local Attention for Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Yunxiang and Lou, Meng and Yu, Yizhou}, title = {SegMAN: Omni-scale Context Modeling with State Space Models and Local Attention for Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19077-19087} }
STEP: Enhancing Video-LLMs' Compositional Reasoning by Spatio-Temporal Graph-guided Self-Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2025_CVPR, author = {Qiu, Haiyi and Gao, Minghe and Qian, Long and Pan, Kaihang and Yu, Qifan and Li, Juncheng and Wang, Wenjie and Tang, Siliang and Zhuang, Yueting and Chua, Tat-Seng}, title = {STEP: Enhancing Video-LLMs' Compositional Reasoning by Spatio-Temporal Graph-guided Self-Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3284-3294} }
OmniFlow: Any-to-Any Generation with Multi-Modal Rectified Flows-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Shufan and Kallidromitis, Konstantinos and Gokul, Akash and Liao, Zichun and Kato, Yusuke and Kozuka, Kazuki and Grover, Aditya}, title = {OmniFlow: Any-to-Any Generation with Multi-Modal Rectified Flows}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13178-13188} }
PVC: Progressive Visual Token Compression for Unified Image and Video Processing in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Chenyu and Dong, Xuan and Zhu, Xizhou and Su, Weijie and Wang, Jiahao and Tian, Hao and Chen, Zhe and Wang, Wenhai and Lu, Lewei and Dai, Jifeng}, title = {PVC: Progressive Visual Token Compression for Unified Image and Video Processing in Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24939-24949} }
LIM: Large Interpolator Model for Dynamic Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sabathier_2025_CVPR, author = {Sabathier, Remy and Mitra, Niloy J. and Novotny, David}, title = {LIM: Large Interpolator Model for Dynamic Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6154-6164} }
Multiple Object Tracking as ID Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Ruopeng and Qi, Ji and Wang, Limin}, title = {Multiple Object Tracking as ID Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27883-27893} }
AutoPresent: Designing Structured Visuals from Scratch-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2025_CVPR, author = {Ge, Jiaxin and Wang, Zora Zhiruo and Zhou, Xuhui and Peng, Yi-Hao and Subramanian, Sanjay and Tan, Qinyue and Sap, Maarten and Suhr, Alane and Fried, Daniel and Neubig, Graham and Darrell, Trevor}, title = {AutoPresent: Designing Structured Visuals from Scratch}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2902-2911} }
SLAM3R: Real-Time Dense Scene Reconstruction from Monocular RGB Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuzheng and Dong, Siyan and Wang, Shuzhe and Yin, Yingda and Yang, Yanchao and Fan, Qingnan and Chen, Baoquan}, title = {SLAM3R: Real-Time Dense Scene Reconstruction from Monocular RGB Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16651-16662} }
PIDLoc: Cross-View Pose Optimization Network Inspired by PID Controllers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Wooju and Park, Juhye and Hong, Dasol and Sung, Changki and Seo, Youngwoo and Kang, DongWan and Myung, Hyun}, title = {PIDLoc: Cross-View Pose Optimization Network Inspired by PID Controllers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21981-21990} }
VisionArena: 230k Real World User-VLM Conversations with Preference Labels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chou_2025_CVPR, author = {Chou, Christopher and Dunlap, Lisa and Mashita, Koki and Mandal, Krishna and Darrell, Trevor and Stoica, Ion and Gonzalez, Joseph E. and Chiang, Wei-Lin}, title = {VisionArena: 230k Real World User-VLM Conversations with Preference Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3877-3887} }
FAM Diffusion: Frequency and Attention Modulation for High-Resolution Image Generation with Stable Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Haosen and Bulat, Adrian and Hadji, Isma and Pham, Hai X. and Zhu, Xiatian and Tzimiropoulos, Georgios and Martinez, Brais}, title = {FAM Diffusion: Frequency and Attention Modulation for High-Resolution Image Generation with Stable Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2459-2468} }
DreamOmni: Unified Image Generation and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Bin and Zhang, Yuechen and Li, Jingyao and Wang, Chengyao and Wang, Yitong and Wu, Xinglong and Yu, Bei and Jia, Jiaya}, title = {DreamOmni: Unified Image Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28533-28543} }
Hash3D: Training-free Acceleration for 3D Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Xingyi and Liu, Songhua and Wang, Xinchao}, title = {Hash3D: Training-free Acceleration for 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21481-21491} }
SemGeoMo: Dynamic Contextual Human Motion Generation with Semantic and Geometric Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cong_2025_CVPR, author = {Cong, Peishan and Wang, Ziyi and Ma, Yuexin and Yue, Xiangyu}, title = {SemGeoMo: Dynamic Contextual Human Motion Generation with Semantic and Geometric Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17561-17570} }
MultiGO: Towards Multi-level Geometry Learning for Monocular 3D Textured Human Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Gangjian and Yao, Nanjie and Zhang, Shunsi and Zhao, Hanfeng and Pang, Guoliang and Shu, Jian and Wang, Hao}, title = {MultiGO: Towards Multi-level Geometry Learning for Monocular 3D Textured Human Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {338-347} }
Generative Photomontage-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Sean J. and Kumari, Nupur and Shamir, Ariel and Zhu, Jun-Yan}, title = {Generative Photomontage}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7931-7941} }
Multi-view Reconstruction via SfM-guided Monocular Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Haoyu and Zhu, He and Peng, Sida and Lin, Haotong and Yan, Yunzhi and Xie, Tao and Wang, Wenguan and Zhou, Xiaowei and Bao, Hujun}, title = {Multi-view Reconstruction via SfM-guided Monocular Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5272-5282} }
Learning Hazing to Dehazing: Towards Realistic Haze Generation for Real-World Image Dehazing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ruiyi and Zheng, Yushuo and Zhang, Zicheng and Li, Chunyi and Liu, Shuaicheng and Zhai, Guangtao and Liu, Xiaohong}, title = {Learning Hazing to Dehazing: Towards Realistic Haze Generation for Real-World Image Dehazing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23091-23100} }
HuMoCon: Concept Discovery for Human Motion Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Qihang and Tang, Chengcheng and Tekin, Bugra and Ma, Shugao and Yang, Yanchao}, title = {HuMoCon: Concept Discovery for Human Motion Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7179-7190} }
RUBIK: A Structured Benchmark for Image Matching across Geometric Challenges-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Loiseau_2025_CVPR, author = {Loiseau, Thibaut and Bourmaud, Guillaume}, title = {RUBIK: A Structured Benchmark for Image Matching across Geometric Challenges}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27070-27080} }
Fast and Accurate Gigapixel Pathological Image Classification with Hierarchical Distillation Multi-Instance Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Jiuyang and Jiang, Junjun and Jiang, Kui and Li, Jiahan and Zhang, Yongbing}, title = {Fast and Accurate Gigapixel Pathological Image Classification with Hierarchical Distillation Multi-Instance Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30818-30828} }
FreeScene: Mixed Graph Diffusion for 3D Scene Synthesis from Free Prompts-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2025_CVPR, author = {Bai, Tongyuan and Bai, Wangyuanfan and Chen, Dong and Wu, Tieru and Li, Manyi and Ma, Rui}, title = {FreeScene: Mixed Graph Diffusion for 3D Scene Synthesis from Free Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5893-5903} }
Rethinking Correspondence-based Category-Level Object Pose Estimation-
[pdf]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Huan and Yang, Wenfei and Zhang, Shifeng and Zhang, Tianzhu}, title = {Rethinking Correspondence-based Category-Level Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1170-1179} }
Curriculum Direct Preference Optimization for Diffusion and Consistency Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Croitoru_2025_CVPR, author = {Croitoru, Florinel-Alin and Hondru, Vlad and Ionescu, Radu Tudor and Sebe, Nicu and Shah, Mubarak}, title = {Curriculum Direct Preference Optimization for Diffusion and Consistency Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2824-2834} }
IncEventGS: Pose-Free Gaussian Splatting from a Single Event Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Jian and Dong, Chengrui and Chen, Xuanhua and Liu, Peidong}, title = {IncEventGS: Pose-Free Gaussian Splatting from a Single Event Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26933-26942} }
OpenMIBOOD: Open Medical Imaging Benchmarks for Out-Of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gutbrod_2025_CVPR, author = {Gutbrod, Max and Rauber, David and Nunes, Danilo Weber and Palm, Christoph}, title = {OpenMIBOOD: Open Medical Imaging Benchmarks for Out-Of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25874-25886} }
Detecting Open World Objects via Partial Attribute Assignment-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Muli and Goenawan, Gabriel James and Qin, Huaiyuan and Han, Kai and Peng, Xi and Yang, Yanhua and Zhu, Hongyuan}, title = {Detecting Open World Objects via Partial Attribute Assignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20318-20328} }
FactCheXcker: Mitigating Measurement Hallucinations in Chest X-ray Report Generation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Heiman_2025_CVPR, author = {Heiman, Alice and Zhang, Xiaoman and Chen, Emma and Kim, Sung Eun and Rajpurkar, Pranav}, title = {FactCheXcker: Mitigating Measurement Hallucinations in Chest X-ray Report Generation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30787-30796} }
Neural Inverse Rendering from Propagating Light-
[pdf]
[supp]
[bibtex]@InProceedings{Malik_2025_CVPR, author = {Malik, Anagh and Attal, Benjamin and Xie, Andrew and O'Toole, Matthew and Lindell, David B.}, title = {Neural Inverse Rendering from Propagating Light}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10534-10544} }
When the Future Becomes the Past: Taming Temporal Correspondence for Self-supervised Video Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yang and Xu, Qianqian and Wen, Peisong and Dai, Siran and Huang, Qingming}, title = {When the Future Becomes the Past: Taming Temporal Correspondence for Self-supervised Video Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24033-24044} }
Personalized Preference Fine-tuning of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dang_2025_CVPR, author = {Dang, Meihua and Singh, Anikait and Zhou, Linqi and Ermon, Stefano and Song, Jiaming}, title = {Personalized Preference Fine-tuning of Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8020-8030} }
DecoupledGaussian: Object-Scene Decoupling for Physics-Based Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Miaowei and Zhang, Yibo and Xu, Weiwei and Ma, Rui and Zou, Changqing and Morris, Daniel}, title = {DecoupledGaussian: Object-Scene Decoupling for Physics-Based Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11361-11372} }
UniPose: A Unified Multimodal Framework for Human Pose Comprehension, Generation and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yiheng and Hou, Ruibing and Chang, Hong and Shan, Shiguang and Chen, Xilin}, title = {UniPose: A Unified Multimodal Framework for Human Pose Comprehension, Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27805-27815} }
POMP: Physics-consistent Motion Generative Model through Phase Manifolds-
[pdf]
[supp]
[bibtex]@InProceedings{Ji_2025_CVPR, author = {Ji, Bin and Pan, Ye and Liu, Zhimeng and Tan, Shuai and Jin, Xiaogang and Yang, Xiaokang}, title = {POMP: Physics-consistent Motion Generative Model through Phase Manifolds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22690-22701} }
NN-Former: Rethinking Graph Structure in Neural Architecture Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Ruihan and Zhang, Haokui and Wang, Yaowei and Zeng, Wei and Zhang, Shiliang}, title = {NN-Former: Rethinking Graph Structure in Neural Architecture Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10004-10014} }
DashGaussian: Optimizing 3D Gaussian Splatting in 200 Seconds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Youyu and Jiang, Junjun and Jiang, Kui and Tang, Xiao and Li, Zhihao and Liu, Xianming and Nie, Yinyu}, title = {DashGaussian: Optimizing 3D Gaussian Splatting in 200 Seconds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11146-11155} }
Reasoning to Attend: Try to Understand How <SEG> Token Works-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qian_2025_CVPR, author = {Qian, Rui and Yin, Xin and Dou, Dejing}, title = {Reasoning to Attend: Try to Understand How \ensuremath{<}SEG\ensuremath{>} Token Works}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24722-24731} }
ReSpec: Relevance and Specificity Grounded Online Filtering for Learning on Video-Text Data Streams-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Chris Dongjoo and Moon, Jihwan and Moon, Sangwoo and Yun, Heeseung and Lee, Sihaeng and Kembhavi, Aniruddha and Lee, Soonyoung and Kim, Gunhee and Lee, Sangho and Clark, Christopher}, title = {ReSpec: Relevance and Specificity Grounded Online Filtering for Learning on Video-Text Data Streams}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29040-29049} }
A Unified Image-Dense Annotation Generation Model for Underwater Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Hongkai and Liang, Dingkang and Qi, Zhenghao and Bai, Xiang}, title = {A Unified Image-Dense Annotation Generation Model for Underwater Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {961-970} }
PACT: Pruning and Clustering-Based Token Reduction for Faster Visual Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dhouib_2025_CVPR, author = {Dhouib, Mohamed and Buscaldi, Davide and Vanier, Sonia and Shabou, Aymen}, title = {PACT: Pruning and Clustering-Based Token Reduction for Faster Visual Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14582-14592} }
R-SCoRe: Revisiting Scene Coordinate Regression for Robust Large-Scale Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Xudong and Wang, Fangjinhua and Galliani, Silvano and Vogel, Christoph and Pollefeys, Marc}, title = {R-SCoRe: Revisiting Scene Coordinate Regression for Robust Large-Scale Visual Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11536-11546} }
DynRefer: Delving into Region-level Multimodal Tasks via Dynamic Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yuzhong and Liu, Feng and Liu, Yue and Liao, Mingxiang and Gong, Chen and Ye, Qixiang and Wan, Fang}, title = {DynRefer: Delving into Region-level Multimodal Tasks via Dynamic Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24742-24752} }
Playing the Fool: Jailbreaking LLMs and Multimodal LLMs with Out-of-Distribution Strategy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_CVPR, author = {Jeong, Joonhyun and Bae, Seyun and Jung, Yeonsung and Hwang, Jaeryong and Yang, Eunho}, title = {Playing the Fool: Jailbreaking LLMs and Multimodal LLMs with Out-of-Distribution Strategy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29937-29946} }
Style Evolving along Chain-of-Thought for Unknown-Domain Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zihao and Wu, Aming and Han, Yahong}, title = {Style Evolving along Chain-of-Thought for Unknown-Domain Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14225-14234} }
NTR-Gaussian: Nighttime Dynamic Thermal Reconstruction with 4D Gaussian Splatting Based on Thermodynamics-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Kun and Liu, Yuxiang and Cui, Zeyu and Liu, Yu and Zhang, Maojun and Yan, Shen and Wang, Qing}, title = {NTR-Gaussian: Nighttime Dynamic Thermal Reconstruction with 4D Gaussian Splatting Based on Thermodynamics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {691-700} }
OmniSplat: Taming Feed-Forward 3D Gaussian Splatting for Omnidirectional Images with Editable Capabilities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Suyoung and Chung, Jaeyoung and Kim, Kihoon and Huh, Jaeyoo and Lee, Gunhee and Lee, Minsoo and Lee, Kyoung Mu}, title = {OmniSplat: Taming Feed-Forward 3D Gaussian Splatting for Omnidirectional Images with Editable Capabilities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16356-16365} }
VideoWorld: Exploring Knowledge Learning from Unlabeled Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Zhongwei and Wei, Yunchao and Guo, Xun and Zhao, Yao and Kang, Bingyi and Feng, Jiashi and Jin, Xiaojie}, title = {VideoWorld: Exploring Knowledge Learning from Unlabeled Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29029-29039} }
FSHNet: Fully Sparse Hybrid Network for 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Shuai and Cui, Mingyue and Li, Boyang and Liang, Quanmin and Hong, Tinghe and Huang, Kai and Shan, Yunxiao and Huang, Kai}, title = {FSHNet: Fully Sparse Hybrid Network for 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8900-8909} }
3D-SLNR: A Super Lightweight Neural Representation for Large-scale 3D Mapping-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Chenhui and Tang, Fulin and An, Ning and Wu, Yihong}, title = {3D-SLNR: A Super Lightweight Neural Representation for Large-scale 3D Mapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27233-27242} }
UniVAD: A Training-free Unified Model for Few-shot Visual Anomaly Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Zhaopeng and Zhu, Bingke and Zhu, Guibo and Chen, Yingying and Tang, Ming and Wang, Jinqiao}, title = {UniVAD: A Training-free Unified Model for Few-shot Visual Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15194-15203} }
STINR: Deciphering Spatial Transcriptomics via Implicit Neural Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Yisi and Zhao, Xile and Ye, Kai and Meng, Deyu}, title = {STINR: Deciphering Spatial Transcriptomics via Implicit Neural Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25930-25939} }
Remote Photoplethysmography in Real-World and Extreme Lighting Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2025_CVPR, author = {Shao, Hang and Luo, Lei and Qian, Jianjun and Yan, Mengkai and Chen, Shuo and Yang, Jian}, title = {Remote Photoplethysmography in Real-World and Extreme Lighting Scenarios}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10858-10867} }
Multi-Modal Contrastive Masked Autoencoders: A Two-Stage Progressive Pre-training Approach for RGBD Datasets-
[pdf]
[supp]
[bibtex]@InProceedings{Jamal_2025_CVPR, author = {Jamal, Muhammad Abdullah and Mohareri, Omid}, title = {Multi-Modal Contrastive Masked Autoencoders: A Two-Stage Progressive Pre-training Approach for RGBD Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17947-17957} }
JTD-UAV: MLLM-Enhanced Joint Tracking and Description Framework for Anti-UAV Systems-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yifan and Zhao, Jian and Fan, Zhaoxin and Zhang, Xin and Wu, Xuecheng and Zhang, Yudian and Jin, Lei and Li, Xinyue and Wang, Gang and Jia, Mengxi and Hu, Ping and Zhu, Zheng and Li, Xuelong}, title = {JTD-UAV: MLLM-Enhanced Joint Tracking and Description Framework for Anti-UAV Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1633-1644} }
HaWoR: World-Space Hand Motion Reconstruction from Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinglei and Deng, Jiankang and Ma, Chao and Potamias, Rolandos Alexandros}, title = {HaWoR: World-Space Hand Motion Reconstruction from Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1805-1815} }
Font-Agent: Enhancing Font Understanding with Large Language Models-
[pdf]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Yingxin and Xu, Cuijie and Shi, Haitian and Yang, Guoqing and Li, Xiaoning and Luo, Zhiming and Li, Shaozi}, title = {Font-Agent: Enhancing Font Understanding with Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19670-19680} }
Secret Lies in Color: Enhancing AI-Generated Images Detection with Color Distribution Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Jia_2025_CVPR, author = {Jia, Zexi and Huang, Chuanwei and Zhu, Yeshuang and Fei, Hongyan and Duan, Xiaoyue and Yuan, Zhiqiang and Deng, Ying and Zhang, Jiapei and Zhang, Jinchao and Zhou, Jie}, title = {Secret Lies in Color: Enhancing AI-Generated Images Detection with Color Distribution Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13445-13454} }
RADIOv2.5: Improved Baselines for Agglomerative Vision Foundation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Heinrich_2025_CVPR, author = {Heinrich, Greg and Ranzinger, Mike and Yin, Hongxu and Lu, Yao and Kautz, Jan and Tao, Andrew and Catanzaro, Bryan and Molchanov, Pavlo}, title = {RADIOv2.5: Improved Baselines for Agglomerative Vision Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22487-22497} }
High Temporal Consistency through Semantic Similarity Propagation in Semi-Supervised Video Semantic Segmentation for Autonomous Flight-
[pdf]
[supp]
[bibtex]@InProceedings{Vincent_2025_CVPR, author = {Vincent, C\'edric and Kim, Taehyoung and Mee{\ss}, Henri}, title = {High Temporal Consistency through Semantic Similarity Propagation in Semi-Supervised Video Semantic Segmentation for Autonomous Flight}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1461-1471} }
Cross-Modal and Uncertainty-Aware Agglomeration for Open-Vocabulary 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jinlong and Saltori, Cristiano and Poiesi, Fabio and Sebe, Nicu}, title = {Cross-Modal and Uncertainty-Aware Agglomeration for Open-Vocabulary 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19390-19400} }
Generative Gaussian Splatting for Unbounded 3D City Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Haozhe and Chen, Zhaoxi and Hong, Fangzhou and Liu, Ziwei}, title = {Generative Gaussian Splatting for Unbounded 3D City Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6111-6120} }
SVLTA: Benchmarking Vision-Language Temporal Alignment via Synthetic Video Situation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Hao and Wu, Bo and Lu, Yan and Mao, Zhendong}, title = {SVLTA: Benchmarking Vision-Language Temporal Alignment via Synthetic Video Situation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13798-13809} }
Mixture of Submodules for Domain Adaptive Person Search-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Minsu and Kim, Seungryong and Sohn, Kwanghoon}, title = {Mixture of Submodules for Domain Adaptive Person Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13990-14001} }
Unsupervised Discovery of Facial Landmarks and Head Pose-
[pdf]
[supp]
[bibtex]@InProceedings{Tourani_2025_CVPR, author = {Tourani, Satyajit and Tourani, Siddharth and Mahmood, Arif and Khan, Muhammad Haris}, title = {Unsupervised Discovery of Facial Landmarks and Head Pose}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21192-21202} }
Instruct-CLIP: Improving Instruction-Guided Image Editing with Automated Data Refinement Using Contrastive Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Sherry X. and Sra, Misha and Sen, Pradeep}, title = {Instruct-CLIP: Improving Instruction-Guided Image Editing with Automated Data Refinement Using Contrastive Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28513-28522} }
Stabilizing and Accelerating Autofocus with Expert Trajectory Regularized Deep Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Shouhang and Li, Chenglin and Jiang, Yuankun and Wei, Li and Kan, Nuowen and Zheng, Ziyang and Dai, Wenrui and Zou, Junni and Xiong, Hongkai}, title = {Stabilizing and Accelerating Autofocus with Expert Trajectory Regularized Deep Reinforcement Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26440-26450} }
SharpDepth: Sharpening Metric Depth Predictions Using Diffusion Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pham_2025_CVPR, author = {Pham, Duc-Hai and Do, Tung and Nguyen, Phong and Hua, Binh-Son and Nguyen, Khoi and Nguyen, Rang}, title = {SharpDepth: Sharpening Metric Depth Predictions Using Diffusion Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17060-17069} }
Repurposing Stable Diffusion Attention for Training-Free Unsupervised Interactive Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karmann_2025_CVPR, author = {Karmann, Markus and Urfalioglu, Onay}, title = {Repurposing Stable Diffusion Attention for Training-Free Unsupervised Interactive Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24518-24528} }
GO-N3RDet: Geometry Optimized NeRF-enhanced 3D Object Detector-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zechuan and Yu, Hongshan and Ding, Yihao and Qiao, Jinhao and Azam, Basim and Akhtar, Naveed}, title = {GO-N3RDet: Geometry Optimized NeRF-enhanced 3D Object Detector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27211-27221} }
DPSeg: Dual-Prompt Cost Volume Learning for Open-Vocabulary Semantic Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Ziyu and Li, Xiaoguang and Shi, Lingjia and Imanpour, Nasrin and Wang, Song}, title = {DPSeg: Dual-Prompt Cost Volume Learning for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25346-25356} }
EvEnhancer: Empowering Effectiveness, Efficiency and Generalizability for Continuous Space-Time Video Super-Resolution with Events-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Shuoyan and Li, Feng and Tang, Shengeng and Zhao, Yao and Bai, Huihui}, title = {EvEnhancer: Empowering Effectiveness, Efficiency and Generalizability for Continuous Space-Time Video Super-Resolution with Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17755-17766} }
Seeing A 3D World in A Grain of Sand-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yufan and Ji, Yu and Guo, Yu and Ye, Jinwei}, title = {Seeing A 3D World in A Grain of Sand}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11187-11196} }
Simulator HC: Regression-based Online Simulation of Starting Problem-Solution Pairs for Homotopy Continuation in Geometric Vision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xinyue and Dai, Zijia and Xu, Wanting and Kneip, Laurent}, title = {Simulator HC: Regression-based Online Simulation of Starting Problem-Solution Pairs for Homotopy Continuation in Geometric Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27103-27112} }
Dynamic Integration of Task-Specific Adapters for Class Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiashuo and Wang, Shaokun and Qian, Bo and He, Yuhang and Wei, Xing and Wang, Qiang and Gong, Yihong}, title = {Dynamic Integration of Task-Specific Adapters for Class Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30545-30555} }
MoFlow: One-Step Flow Matching for Human Trajectory Forecasting via Implicit Maximum Likelihood Estimation based Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Yuxiang and Yan, Qi and Wang, Lele and Li, Ke and Liao, Renjie}, title = {MoFlow: One-Step Flow Matching for Human Trajectory Forecasting via Implicit Maximum Likelihood Estimation based Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17282-17293} }
EgoPressure: A Dataset for Hand Pressure and Pose Estimation in Egocentric Vision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yiming and Kwon, Taein and Streli, Paul and Pollefeys, Marc and Holz, Christian}, title = {EgoPressure: A Dataset for Hand Pressure and Pose Estimation in Egocentric Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27727-27738} }
DiverseFlow: Sample-Efficient Diverse Mode Coverage in Flows-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Morshed_2025_CVPR, author = {Morshed, Mashrur M. and Boddeti, Vishnu}, title = {DiverseFlow: Sample-Efficient Diverse Mode Coverage in Flows}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23303-23312} }
Reason-before-Retrieve: One-Stage Reflective Chain-of-Thoughts for Training-Free Zero-Shot Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Yuanmin and Zhang, Jue and Qin, Xiaoting and Yu, Jing and Gou, Gaopeng and Xiong, Gang and Lin, Qingwei and Rajmohan, Saravan and Zhang, Dongmei and Wu, Qi}, title = {Reason-before-Retrieve: One-Stage Reflective Chain-of-Thoughts for Training-Free Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14400-14410} }
UniGraspTransformer: Simplified Policy Distillation for Scalable Dexterous Robotic Grasping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Wenbo and Wei, Fangyun and Zhou, Lei and Chen, Xi and Luo, Lin and Yi, Xiaohan and Zhang, Yizhong and Liang, Yaobo and Xu, Chang and Lu, Yan and Yang, Jiaolong and Guo, Baining}, title = {UniGraspTransformer: Simplified Policy Distillation for Scalable Dexterous Robotic Grasping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12199-12208} }
GeoMM: On Geodesic Perspective for Multi-modal Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mei_2025_CVPR, author = {Mei, Shibin and Wang, Hang and Ni, Bingbing}, title = {GeoMM: On Geodesic Perspective for Multi-modal Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4776-4786} }
VISCO: Benchmarking Fine-Grained Critique and Correction Towards Self-Improvement in Visual Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Xueqing and Ding, Yuheng and Li, Bingxuan and Lu, Pan and Yin, Da and Chang, Kai-Wei and Peng, Nanyun}, title = {VISCO: Benchmarking Fine-Grained Critique and Correction Towards Self-Improvement in Visual Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9527-9537} }
MaskGWM: A Generalizable Driving World Model with Video Mask Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2025_CVPR, author = {Ni, Jingcheng and Guo, Yuxin and Liu, Yichen and Chen, Rui and Lu, Lewei and Wu, Zehuan}, title = {MaskGWM: A Generalizable Driving World Model with Video Mask Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22381-22391} }
3D-MVP: 3D Multiview Pretraining for Manipulation-
[pdf]
[bibtex]@InProceedings{Qian_2025_CVPR, author = {Qian, Shengyi and Mo, Kaichun and Blukis, Valts and Fouhey, David F. and Fox, Dieter and Goyal, Ankit}, title = {3D-MVP: 3D Multiview Pretraining for Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22530-22539} }
Enhanced OoD Detection through Cross-Modal Alignment of Multi-Modal Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Jeonghyeon and Hwang, Sangheum}, title = {Enhanced OoD Detection through Cross-Modal Alignment of Multi-Modal Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29979-29988} }
Adaptive Dropout: Unleashing Dropout across Layers for Generalizable Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Hang and Huang, Jie and Yu, Wei and Tan, Jiangtong and Zou, Zhen and Zhao, Feng}, title = {Adaptive Dropout: Unleashing Dropout across Layers for Generalizable Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7513-7523} }
Breaking the Memory Barrier of Contrastive Loss via Tile-Based Strategy-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Zesen and Zhang, Hang and Li, Kehan and Leng, Sicong and Hu, Zhiqiang and Wu, Fei and Zhao, Deli and Li, Xin and Bing, Lidong}, title = {Breaking the Memory Barrier of Contrastive Loss via Tile-Based Strategy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10036-10045} }
Mimir: Improving Video Diffusion Models for Precise Text Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2025_CVPR, author = {Tan, Shuai and Gong, Biao and Feng, Yutong and Zheng, Kecheng and Zheng, Dandan and Shi, Shuwei and Shen, Yujun and Chen, Jingdong and Yang, Ming}, title = {Mimir: Improving Video Diffusion Models for Precise Text Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23978-23988} }
UCM-VeID V2: A Richer Dataset and A Pre-training Method for UAV Cross-Modality Vehicle Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xingyue and Qi, Jiahao and Chen, Chen and Bin, KangCheng and Zhong, Ping}, title = {UCM-VeID V2: A Richer Dataset and A Pre-training Method for UAV Cross-Modality Vehicle Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22286-22295} }
Learning Phase Distortion with Selective State Space Models for Video Turbulence Mitigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xingguang and Chimitt, Nicholas and Wang, Xijun and Yuan, Yu and Chan, Stanley H.}, title = {Learning Phase Distortion with Selective State Space Models for Video Turbulence Mitigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2127-2138} }
RoboPEPP: Vision-Based Robot Pose and Joint Angle Estimation through Embedding Predictive Pre-Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Goswami_2025_CVPR, author = {Goswami, Raktim Gautam and Krishnamurthy, Prashanth and LeCun, Yann and Khorrami, Farshad}, title = {RoboPEPP: Vision-Based Robot Pose and Joint Angle Estimation through Embedding Predictive Pre-Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6930-6939} }
Distraction is All You Need for Multimodal Large Language Model Jailbreaking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Zuopeng and Fan, Jiluan and Yan, Anli and Gao, Erdun and Lin, Xin and Li, Tao and Mo, Kanghua and Dong, Changyu}, title = {Distraction is All You Need for Multimodal Large Language Model Jailbreaking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9467-9476} }
Apollo: An Exploration of Video Understanding in Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zohar_2025_CVPR, author = {Zohar, Orr and Wang, Xiaohan and Dubois, Yann and Mehta, Nikhil and Xiao, Tong and Hansen-Estruch, Philippe and Yu, Licheng and Wang, Xiaofang and Juefei-Xu, Felix and Zhang, Ning and Yeung-Levy, Serena and Xia, Xide}, title = {Apollo: An Exploration of Video Understanding in Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18891-18901} }
Skip Tuning: Pre-trained Vision-Language Models are Effective and Efficient Adapters Themselves-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Shihan and Zhang, Ji and Zeng, Pengpeng and Gao, Lianli and Song, Jingkuan and Shen, Heng Tao}, title = {Skip Tuning: Pre-trained Vision-Language Models are Effective and Efficient Adapters Themselves}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14723-14732} }
PatchDPO: Patch-level DPO for Finetuning-free Personalized Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Qihan and Chan, Long and Liu, Jinlong and He, Wanggui and Jiang, Hao and Song, Mingli and Song, Jie}, title = {PatchDPO: Patch-level DPO for Finetuning-free Personalized Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18369-18378} }
Learning to Normalize on the SPD Manifold under Bures-Wasserstein Geometry-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Rui and Jin, Shaocheng and Chen, Ziheng and Luo, Xiaoqing and Wu, Xiao-Jun}, title = {Learning to Normalize on the SPD Manifold under Bures-Wasserstein Geometry}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8289-8298} }
SAMWISE: Infusing Wisdom in SAM2 for Text-Driven Video Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cuttano_2025_CVPR, author = {Cuttano, Claudia and Trivigno, Gabriele and Rosi, Gabriele and Masone, Carlo and Averta, Giuseppe}, title = {SAMWISE: Infusing Wisdom in SAM2 for Text-Driven Video Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3395-3405} }
MegaSaM: Accurate, Fast and Robust Structure and Motion from Casual Dynamic Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zhengqi and Tucker, Richard and Cole, Forrester and Wang, Qianqian and Jin, Linyi and Ye, Vickie and Kanazawa, Angjoo and Holynski, Aleksander and Snavely, Noah}, title = {MegaSaM: Accurate, Fast and Robust Structure and Motion from Casual Dynamic Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10486-10496} }
BEVDiffuser: Plug-and-Play Diffusion Model for BEV Denoising with Ground-Truth Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Xin and Yaman, Burhaneddin and Cheng, Sheng and Tao, Feng and Mallik, Abhirup and Ren, Liu}, title = {BEVDiffuser: Plug-and-Play Diffusion Model for BEV Denoising with Ground-Truth Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1495-1504} }
GeoAvatar: Geometrically-Consistent Multi-Person Avatar Reconstruction from Sparse Multi-View Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Soohyun and Kim, Seoyeon and Lee, HeeKyung and Jeong, Won-Sik and Lee, Joo Ho}, title = {GeoAvatar: Geometrically-Consistent Multi-Person Avatar Reconstruction from Sparse Multi-View Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21138-21147} }
FinePhys: Fine-grained Human Action Generation by Explicitly Incorporating Physical Laws for Effective Skeletal Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2025_CVPR, author = {Shao, Dian and Shi, Mingfei and Xu, Shengda and Chen, Haodong and Huang, Yongle and Wang, Binglu}, title = {FinePhys: Fine-grained Human Action Generation by Explicitly Incorporating Physical Laws for Effective Skeletal Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1905-1916} }
DiET-GS: Diffusion Prior and Event Stream-Assisted Motion Deblurring 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Seungjun and Lee, Gim Hee}, title = {DiET-GS: Diffusion Prior and Event Stream-Assisted Motion Deblurring 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21739-21749} }
Speedy-Splat: Fast 3D Gaussian Splatting with Sparse Pixels and Sparse Primitives-
[pdf]
[supp]
[bibtex]@InProceedings{Hanson_2025_CVPR, author = {Hanson, Alex and Tu, Allen and Lin, Geng and Singla, Vasu and Zwicker, Matthias and Goldstein, Tom}, title = {Speedy-Splat: Fast 3D Gaussian Splatting with Sparse Pixels and Sparse Primitives}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21537-21546} }
SeedVR: Seeding Infinity in Diffusion Transformer Towards Generic Video Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jianyi and Lin, Zhijie and Wei, Meng and Zhao, Yang and Yang, Ceyuan and Loy, Chen Change and Jiang, Lu}, title = {SeedVR: Seeding Infinity in Diffusion Transformer Towards Generic Video Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2161-2172} }
Beyond Generation: A Diffusion-based Low-level Feature Extractor for Detecting AI-generated Images-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2025_CVPR, author = {Zhong, Nan and Chen, Haoyu and Xu, Yiran and Qian, Zhenxing and Zhang, Xinpeng}, title = {Beyond Generation: A Diffusion-based Low-level Feature Extractor for Detecting AI-generated Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8258-8268} }
Robust-MVTON: Learning Cross-Pose Feature Alignment and Fusion for Robust Multi-View Virtual Try-On-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Nannan and Li, Yijiang and Du, Dong and Chong, Zheng and Sun, Zhengwentai and Zeng, Jianhao and Dai, Yusheng and Xie, Zhengyu and Zhu, Hairui and Han, Xiaoguang}, title = {Robust-MVTON: Learning Cross-Pose Feature Alignment and Fusion for Robust Multi-View Virtual Try-On}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16029-16039} }
Omnia de EgoTempo: Benchmarking Temporal Understanding of Multi-Modal LLMs in Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Plizzari_2025_CVPR, author = {Plizzari, Chiara and Tonioni, Alessio and Xian, Yongqin and Kulshrestha, Achin and Tombari, Federico}, title = {Omnia de EgoTempo: Benchmarking Temporal Understanding of Multi-Modal LLMs in Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24129-24138} }
ODHSR: Online Dense 3D Reconstruction of Humans and Scenes from Monocular Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zetong and Kaufmann, Manuel and Xue, Lixin and Song, Jie and Oswald, Martin R.}, title = {ODHSR: Online Dense 3D Reconstruction of Humans and Scenes from Monocular Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21824-21835} }
Identity-Preserving Text-to-Video Generation by Frequency Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Shenghai and Huang, Jinfa and He, Xianyi and Ge, Yunyang and Shi, Yujun and Chen, Liuhan and Luo, Jiebo and Yuan, Li}, title = {Identity-Preserving Text-to-Video Generation by Frequency Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12978-12988} }
FreeGave: 3D Physics Learning from Dynamic Videos by Gaussian Velocity-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jinxi and Song, Ziyang and Zhou, Siyuan and Yang, Bo}, title = {FreeGave: 3D Physics Learning from Dynamic Videos by Gaussian Velocity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12433-12443} }
SpiritSight Agent: Advanced GUI Agent with One Look-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zhiyuan and Cheng, Ziming and Pan, Junting and Hou, Zhaohui and Zhan, Mingjie}, title = {SpiritSight Agent: Advanced GUI Agent with One Look}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29490-29500} }
Zero-Shot Monocular Scene Flow Estimation in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Yiqing and Badki, Abhishek and Su, Hang and Tompkin, James and Gallo, Orazio}, title = {Zero-Shot Monocular Scene Flow Estimation in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21031-21044} }
MG-MotionLLM: A Unified Framework for Motion Comprehension and Generation across Multiple Granularities-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Bizhu and Xie, Jinheng and Shen, Keming and Kong, Zhe and Ren, Jianfeng and Bai, Ruibin and Qu, Rong and Shen, Linlin}, title = {MG-MotionLLM: A Unified Framework for Motion Comprehension and Generation across Multiple Granularities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27849-27858} }
Retaining Knowledge and Enhancing Long-Text Representations in CLIP through Dual-Teacher Distillation-
[pdf]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Yuheng and Wen, Changsong and Peng, Zelin and jiaye, Li and Zhu, Siyu}, title = {Retaining Knowledge and Enhancing Long-Text Representations in CLIP through Dual-Teacher Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24895-24904} }
MMRL: Multi-Modal Representation Learning for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Yuncheng and Gu, Xiaodong}, title = {MMRL: Multi-Modal Representation Learning for Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25015-25025} }
Optical-Flow Guided Prompt Optimization for Coherent Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2025_CVPR, author = {Nam, Hyelin and Kim, Jaemin and Lee, Dohun and Ye, Jong Chul}, title = {Optical-Flow Guided Prompt Optimization for Coherent Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7837-7846} }
MOS: Modeling Object-Scene Associations in Generalized Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Zhengyuan and Ma, Jinpeng and Sun, Zhimin and Yi, Ran and Song, Haichuan and Tan, Xin and Ma, Lizhuang}, title = {MOS: Modeling Object-Scene Associations in Generalized Category Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15118-15128} }
Anchor-Aware Similarity Cohesion in Target Frames Enables Predicting Temporal Moment Boundaries in 2D-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2025_CVPR, author = {Tan, Jiawei and Wang, Hongxing and Weng, Junwu and Li, Jiaxin and Ou, Zhilong and Dang, Kang}, title = {Anchor-Aware Similarity Cohesion in Target Frames Enables Predicting Temporal Moment Boundaries in 2D}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24180-24189} }
Test-time Augmentation Improves Efficiency in Conformal Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shanmugam_2025_CVPR, author = {Shanmugam, Divya and Lu, Helen and Sankaranarayanan, Swami and Guttag, John}, title = {Test-time Augmentation Improves Efficiency in Conformal Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20622-20631} }
Breaking the Low-Rank Dilemma of Linear Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Qihang and Huang, Huaibo and He, Ran}, title = {Breaking the Low-Rank Dilemma of Linear Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25271-25280} }
StoryGPT-V: Large Language Models as Consistent Story Visualizers-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Xiaoqian and Elhoseiny, Mohamed}, title = {StoryGPT-V: Large Language Models as Consistent Story Visualizers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13273-13283} }
Code-as-Monitor: Constraint-aware Visual Programming for Reactive and Proactive Robotic Failure Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Enshen and Su, Qi and Chi, Cheng and Zhang, Zhizheng and Wang, Zhongyuan and Huang, Tiejun and Sheng, Lu and Wang, He}, title = {Code-as-Monitor: Constraint-aware Visual Programming for Reactive and Proactive Robotic Failure Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6919-6929} }
Embracing Collaboration Over Competition: Condensing Multiple Prompts for Visual In-Context Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jinpeng and Luo, Tianci and Zha, Yaohua and Feng, Yan and Luo, Ruisheng and Chen, Bin and Dai, Tao and Chen, Long and Wang, Yaowei and Xia, Shu-Tao}, title = {Embracing Collaboration Over Competition: Condensing Multiple Prompts for Visual In-Context Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25156-25165} }
Rethinking Reconstruction and Denoising in the Dark: New Perspective, General Architecture and Beyond-
[pdf]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Tengyu and Ma, Long and Li, Ziye and Wang, Yuetong and Liu, Jinyuan and Xu, Chengpei and Liu, Risheng}, title = {Rethinking Reconstruction and Denoising in the Dark: New Perspective, General Architecture and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2323-2332} }
Edge-SD-SR: Low Latency and Parameter Efficient On-device Super-Resolution with Stable Diffusion via Bidirectional Conditioning-
[pdf]
[bibtex]@InProceedings{Hadji_2025_CVPR, author = {Hadji, Isma and Noroozi, Mehdi and Escorcia, Victor and Zaganidis, Anestis and Martinez, Brais and Tzimiropoulos, Georgios}, title = {Edge-SD-SR: Low Latency and Parameter Efficient On-device Super-Resolution with Stable Diffusion via Bidirectional Conditioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12789-12798} }
Unity in Diversity: Video Editing via Gradient-Latent Purification-
[pdf]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Junyu and Yang, Kunlin and Yao, Xuan and Hu, Yufan}, title = {Unity in Diversity: Video Editing via Gradient-Latent Purification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23401-23411} }
Revealing Key Details to See Differences: A Novel Prototypical Perspective for Skeleton-based Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Hongda and Liu, Yunfan and Ren, Min and Wang, Hao and Wang, Yunlong and Sun, Zhenan}, title = {Revealing Key Details to See Differences: A Novel Prototypical Perspective for Skeleton-based Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29248-29257} }
Finsler Multi-Dimensional Scaling: Manifold Learning for Asymmetric Dimensionality Reduction and Embedding-
[pdf]
[supp]
[bibtex]@InProceedings{Dages_2025_CVPR, author = {Dag\`es, Thomas and Weber, Simon and Lin, Ya-Wei Eileen and Talmon, Ronen and Cremers, Daniel and Lindenbaum, Michael and Bruckstein, Alfred M. and Kimmel, Ron}, title = {Finsler Multi-Dimensional Scaling: Manifold Learning for Asymmetric Dimensionality Reduction and Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25842-25853} }
VideoEspresso: A Large-Scale Chain-of-Thought Dataset for Fine-Grained Video Reasoning via Core Frame Selection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Songhao and Huang, Wei and Shi, Hairong and Zhuo, Le and Su, Xiu and Zhang, Shifeng and Zhou, Xu and Qi, Xiaojuan and Liao, Yue and Liu, Si}, title = {VideoEspresso: A Large-Scale Chain-of-Thought Dataset for Fine-Grained Video Reasoning via Core Frame Selection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26181-26191} }
INFP: Audio-Driven Interactive Head Generation in Dyadic Conversations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yongming and Zhang, Longhao and Rong, Zhengkun and Hu, Tianshu and Liang, Shuang and Ge, Zhipeng}, title = {INFP: Audio-Driven Interactive Head Generation in Dyadic Conversations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10667-10677} }
Federated Learning with Domain Shift Eraser-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zheng and Wang, Zihui and Wang, Zheng and Fan, Xiaoliang and Wang, Cheng}, title = {Federated Learning with Domain Shift Eraser}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4978-4987} }
Cross-Modal Distillation for 2D/3D Multi-Object Discovery from 2D Motion-
[pdf]
[supp]
[bibtex]@InProceedings{Lahlali_2025_CVPR, author = {Lahlali, Saad and Kara, Sandra and Ammar, Hejer and Chabot, Florian and Granger, Nicolas and Le Borgne, Herv\'e and Pham, Quoc-Cuong}, title = {Cross-Modal Distillation for 2D/3D Multi-Object Discovery from 2D Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24529-24538} }
DiTCtrl: Exploring Attention Control in Multi-Modal Diffusion Transformer for Tuning-Free Multi-Prompt Longer Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Minghong and Cun, Xiaodong and Li, Xiaoyu and Liu, Wenze and Zhang, Zhaoyang and Zhang, Yong and Shan, Ying and Yue, Xiangyu}, title = {DiTCtrl: Exploring Attention Control in Multi-Modal Diffusion Transformer for Tuning-Free Multi-Prompt Longer Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7763-7772} }
Bridge Frame and Event: Common Spatiotemporal Fusion for High-Dynamic Scene Optical Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Hanyu and Wang, Haonan and Liu, Haoyue and Duan, Yuxing and Chang, Yi and Yan, Luxin}, title = {Bridge Frame and Event: Common Spatiotemporal Fusion for High-Dynamic Scene Optical Flow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27904-27913} }
EVPGS: Enhanced View Prior Guidance for Splatting-based Extrapolated View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiahe and Wang, Feiyu and Qu, Xiaochao and Wu, Chengjing and Liu, Luoqi and Liu, Ting}, title = {EVPGS: Enhanced View Prior Guidance for Splatting-based Extrapolated View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16398-16407} }
GREAT: Geometry-Intention Collaborative Inference for Open-Vocabulary 3D Object Affordance Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2025_CVPR, author = {Shao, Yawen and Zhai, Wei and Yang, Yuhang and Luo, Hongchen and Cao, Yang and Zha, Zheng-Jun}, title = {GREAT: Geometry-Intention Collaborative Inference for Open-Vocabulary 3D Object Affordance Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17326-17336} }
Link to the Past: Temporal Propagation for Fast 3D Human Reconstruction from Monocular Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Marchellus_2025_CVPR, author = {Marchellus, Matthew and Noor, Nadhira and Park, In Kyu}, title = {Link to the Past: Temporal Propagation for Fast 3D Human Reconstruction from Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6190-6199} }
Inversion Circle Interpolation: Diffusion-based Image Augmentation for Data-scarce Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yanghao and Chen, Long}, title = {Inversion Circle Interpolation: Diffusion-based Image Augmentation for Data-scarce Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25560-25569} }
Deterministic Certification of Graph Neural Networks against Graph Poisoning Attacks with Arbitrary Perturbations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiate and Pang, Meng and Dong, Yun and Wang, Binghui}, title = {Deterministic Certification of Graph Neural Networks against Graph Poisoning Attacks with Arbitrary Perturbations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5020-5029} }
A3: Few-shot Prompt Learning of Unlearnable Examples with Cross-Modal Adversarial Feature Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xuan and Gao, Xitong and Liao, Dongping and Qin, Tianrui and Lu, Yu-liang and Xu, Cheng-zhong}, title = {A3: Few-shot Prompt Learning of Unlearnable Examples with Cross-Modal Adversarial Feature Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9507-9516} }
Adapting Pre-trained 3D Models for Point Cloud Video Understanding via Cross-frame Spatio-temporal Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Lv_2025_CVPR, author = {Lv, Baixuan and Zha, Yaohua and Dai, Tao and Yuerong, Xue and Chen, Ke and Xia, Shu-Tao}, title = {Adapting Pre-trained 3D Models for Point Cloud Video Understanding via Cross-frame Spatio-temporal Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12413-12422} }
MASH-VLM: Mitigating Action-Scene Hallucination in Video-LLMs through Disentangled Spatial-Temporal Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Bae_2025_CVPR, author = {Bae, Kyungho and Kim, Jinhyung and Lee, Sihaeng and Lee, Soonyoung and Lee, Gunhee and Choi, Jinwoo}, title = {MASH-VLM: Mitigating Action-Scene Hallucination in Video-LLMs through Disentangled Spatial-Temporal Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13744-13753} }
UWAV: Uncertainty-weighted Weakly-supervised Audio-Visual Video Parsing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Yung-Hsuan and Ebbers, Janek and Wang, Yu-Chiang Frank and Germain, Fran\c{c}ois and Jones, Michael Jeffrey and Chatterjee, Moitreya}, title = {UWAV: Uncertainty-weighted Weakly-supervised Audio-Visual Video Parsing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13561-13570} }
Mosaic of Modalities: A Comprehensive Benchmark for Multimodal Graph Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jing and Zhou, Yuhang and Qian, Shengyi and He, Zhongmou and Zhao, Tong and Shah, Neil and Koutra, Danai}, title = {Mosaic of Modalities: A Comprehensive Benchmark for Multimodal Graph Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14215-14224} }
TSP-Mamba: The Travelling Salesman Problem Meets Mamba for Image Super-resolution and Beyond-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Kun and Lin, Xinyu and Lu, Jiangbo}, title = {TSP-Mamba: The Travelling Salesman Problem Meets Mamba for Image Super-resolution and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28134-28143} }
MVPaint: Synchronized Multi-View Diffusion for Painting Anything 3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Wei and Mu, Juncheng and Zeng, Xianfang and Chen, Xin and Pang, Anqi and Zhang, Chi and Wang, Zhibin and Fu, Bin and Yu, Gang and Liu, Ziwei and Pan, Liang}, title = {MVPaint: Synchronized Multi-View Diffusion for Painting Anything 3D}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {585-594} }
FirePlace: Geometric Refinements of LLM Common Sense Reasoning for 3D Object Placement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Ian and Bao, Yanan and Truong, Karen and Zhou, Howard and Schmid, Cordelia and Guibas, Leonidas and Fathi, Alireza}, title = {FirePlace: Geometric Refinements of LLM Common Sense Reasoning for 3D Object Placement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13466-13476} }
RENO: Real-Time Neural Compression for 3D LiDAR Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2025_CVPR, author = {You, Kang and Chen, Tong and Ding, Dandan and Asif, M. Salman and Ma, Zhan}, title = {RENO: Real-Time Neural Compression for 3D LiDAR Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22172-22181} }
End-to-End Implicit Neural Representations for Classification-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gielisse_2025_CVPR, author = {Gielisse, Alexander and van Gemert, Jan}, title = {End-to-End Implicit Neural Representations for Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18728-18737} }
ASAP: Advancing Semantic Alignment Promotes Multi-Modal Manipulation Detecting and Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhenxing and Wang, Yaxiong and Cheng, Lechao and Zhong, Zhun and Guo, Dan and Wang, Meng}, title = {ASAP: Advancing Semantic Alignment Promotes Multi-Modal Manipulation Detecting and Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4005-4014} }
UNICL-SAM: Uncertainty-Driven In-Context Segmentation with Part Prototype Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Sheng_2025_CVPR, author = {Sheng, Dianmo and Chen, Dongdong and Tan, Zhentao and Liu, Qiankun and Chu, Qi and Gong, Tao and Liu, Bin and Han, Jing and Tu, Wenbin and Xu, Shengwei and Yu, Nenghai}, title = {UNICL-SAM: Uncertainty-Driven In-Context Segmentation with Part Prototype Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20201-20211} }
Layered Motion Fusion: Lifting Motion Segmentation to 3D in Egocentric Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Tschernezki_2025_CVPR, author = {Tschernezki, Vadim and Larlus, Diane and Laina, Iro and Vedaldi, Andrea}, title = {Layered Motion Fusion: Lifting Motion Segmentation to 3D in Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17637-17648} }
FADE: Frequency-Aware Diffusion Model Factorization for Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yixuan and Wang, Haolin and Ma, Shilin and Zhao, Wenliang and Tang, Yansong and Chen, Lei and Zhou, Jie}, title = {FADE: Frequency-Aware Diffusion Model Factorization for Video Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28426-28435} }
MotiF: Making Text Count in Image Animation with Motion Focal Loss-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shijie and Azadi, Samaneh and Girdhar, Rohit and Rambhatla, Saketh and Sun, Chen and Yin, Xi}, title = {MotiF: Making Text Count in Image Animation with Motion Focal Loss}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7773-7783} }
Towards Explicit Geometry-Reflectance Collaboration for Generalized LiDAR Segmentation in Adverse Weather-
[pdf]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Longyu and Hu, Ping and Yuan, Shangbo and Zhang, Lu and Liu, Jun and Shen, Hengtao and Zhu, Xiaofeng}, title = {Towards Explicit Geometry-Reflectance Collaboration for Generalized LiDAR Segmentation in Adverse Weather}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {139-149} }
Data Synthesis with Diverse Styles for Face Recognition via 3DMM-Guided Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mi_2025_CVPR, author = {Mi, Yuxi and Zhong, Zhizhou and Huang, Yuge and Yuan, Qiuyang and Zhao, Xuan and Xu, Jianqing and Ding, Shouhong and Wang, Shaoming and Guo, Rizen and Zhou, Shuigeng}, title = {Data Synthesis with Diverse Styles for Face Recognition via 3DMM-Guided Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21203-21214} }
Diffusion Self-Distillation for Zero-Shot Customized Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Shengqu and Chan, Eric Ryan and Zhang, Yunzhi and Guibas, Leonidas and Wu, Jiajun and Wetzstein, Gordon}, title = {Diffusion Self-Distillation for Zero-Shot Customized Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18434-18443} }
Uncertainty-guided Perturbation for Image Super-Resolution Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Leheng and You, Weiyi and Shi, Kexuan and Gu, Shuhang}, title = {Uncertainty-guided Perturbation for Image Super-Resolution Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17980-17989} }
Geometric Knowledge-Guided Localized Global Distribution Alignment for Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Yanbiao and Dai, Wei and Huang, Wenke and Chen, Jiayi}, title = {Geometric Knowledge-Guided Localized Global Distribution Alignment for Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20958-20968} }
Towards Human-Understandable Multi-Dimensional Concept Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Grobrugge_2025_CVPR, author = {Grobr\"ugge, Arne and K\"uhl, Niklas and Satzger, Gerhard and Spitzer, Philipp}, title = {Towards Human-Understandable Multi-Dimensional Concept Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20018-20027} }
GlyphMastero: A Glyph Encoder for High-Fidelity Scene Text Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Tong and Liu, Ting and Qu, Xiaochao and Wu, Chengjing and Liu, Luoqi and Hu, Xiaolin}, title = {GlyphMastero: A Glyph Encoder for High-Fidelity Scene Text Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28523-28532} }
ConText-CIR: Learning from Concepts in Text for Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Eric and Kolouju, Pranavi and Pless, Robert and Stylianou, Abby and Jacobs, Nathan}, title = {ConText-CIR: Learning from Concepts in Text for Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19638-19648} }
MaskGaussian: Adaptive 3D Gaussian Representation from Probabilistic Masks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yifei and Zhong, Zhihang and Zhan, Yifan and Xu, Sheng and Sun, Xiao}, title = {MaskGaussian: Adaptive 3D Gaussian Representation from Probabilistic Masks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {681-690} }
Perturb-and-Revise: Flexible 3D Editing with Generative Trajectories-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2025_CVPR, author = {Hong, Susung and Karras, Johanna and Martin-Brualla, Ricardo and Kemelmacher-Shlizerman, Ira}, title = {Perturb-and-Revise: Flexible 3D Editing with Generative Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16293-16303} }
Birth and Death of a Rose-
[pdf]
[arXiv]
[bibtex]@InProceedings{Geng_2025_CVPR, author = {Geng, Chen and Zhang, Yunzhi and Wu, Shangzhe and Wu, Jiajun}, title = {Birth and Death of a Rose}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26102-26113} }
Learning Compatible Multi-Prize Subnetworks for Asymmetric Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Yushuai and Zhou, Zikun and Jiang, Dongmei and Wang, Yaowei and Yu, Jun and Lu, Guangming and Pei, Wenjie}, title = {Learning Compatible Multi-Prize Subnetworks for Asymmetric Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15255-15264} }
SoundVista: Novel-View Ambient Sound Synthesis via Visual-Acoustic Binding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Mingfei and Gebru, Israel D. and Ananthabhotla, Ishwarya and Richardt, Christian and Markovic, Dejan and Sandakly, Jake and Krenn, Steven and Keebler, Todd and Shlizerman, Eli and Richard, Alexander}, title = {SoundVista: Novel-View Ambient Sound Synthesis via Visual-Acoustic Binding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8331-8341} }
CLIP Under the Microscope: A Fine-Grained Analysis of Multi-Object Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Abbasi_2025_CVPR, author = {Abbasi, Reza and Nazari, Ali and Sefid, Aminreza and Banayeeanzade, Mohammadali and Rohban, Mohammad Hossein and Baghshah, Mahdieh Soleymani}, title = {CLIP Under the Microscope: A Fine-Grained Analysis of Multi-Object Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9308-9317} }
MetricGrids: Arbitrary Nonlinear Approximation with Elementary Metric Grids based Implicit Neural Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shu and Gao, Yanbo and Li, Shuai and Lv, Chong and Cai, Xun and Li, Chuankun and Yuan, Hui and Zhang, Jinglin}, title = {MetricGrids: Arbitrary Nonlinear Approximation with Elementary Metric Grids based Implicit Neural Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21381-21391} }
Navigating Image Restoration with VAR's Distribution Alignment Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Siyang and Zheng, Naishan and Huang, Jie and Zhao, Feng}, title = {Navigating Image Restoration with VAR's Distribution Alignment Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7559-7569} }
MovieBench: A Hierarchical Movie Level Dataset for Long Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Weijia and Liu, Mingyu and Zhu, Zeyu and Xia, Xi and Feng, Haoen and Wang, Wen and Lin, Kevin Qinghong and Shen, Chunhua and Shou, Mike Zheng}, title = {MovieBench: A Hierarchical Movie Level Dataset for Long Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28984-28994} }
Dissecting and Mitigating Diffusion Bias via Mechanistic Interpretability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Yingdong and Li, Changming and Wang, Yifan and Zhao, Yongxiang and Pang, Anqi and Yang, Sibei and Yu, Jingyi and Ren, Kan}, title = {Dissecting and Mitigating Diffusion Bias via Mechanistic Interpretability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8192-8202} }
Graph Neural Network Combining Event Stream and Periodic Aggregation for Low-Latency Event-based Vision-
[pdf]
[bibtex]@InProceedings{Dampfhoffer_2025_CVPR, author = {Dampfhoffer, Manon and Mesquida, Thomas and Joubert, Damien and Dalgaty, Thomas and Vivet, Pascal and Posch, Christoph}, title = {Graph Neural Network Combining Event Stream and Periodic Aggregation for Low-Latency Event-based Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6909-6918} }
Be More Specific: Evaluating Object-centric Realism in Synthetic Images-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Anqi and Corneanu, Ciprian and Feng, Qianli and Giannone, Giorgio and Martinez, Aleix}, title = {Be More Specific: Evaluating Object-centric Realism in Synthetic Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28842-28851} }
Correlative and Discriminative Label Grouping for Multi-Label Visual Prompt Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Lei-Lei and Xu, Shuo and Xie, Ming-Kun and Wang, Lei and Sun, Dengdi and Zhao, Haifeng}, title = {Correlative and Discriminative Label Grouping for Multi-Label Visual Prompt Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25434-25443} }
LoRACLR: Contrastive Adaptation for Customization of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Simsar_2025_CVPR, author = {Simsar, Enis and Hofmann, Thomas and Tombari, Federico and Yanardag, Pinar}, title = {LoRACLR: Contrastive Adaptation for Customization of Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13189-13198} }
ArtFormer: Controllable Generation of Diverse 3D Articulated Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2025_CVPR, author = {Su, Jiayi and Feng, Youhe and Li, Zheng and Song, Jinhua and He, Yangfan and Ren, Botao and Xu, Botian}, title = {ArtFormer: Controllable Generation of Diverse 3D Articulated Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1894-1904} }
Opportunistic Single-Photon Time of Flight-
[pdf]
[supp]
[bibtex]@InProceedings{Nousias_2025_CVPR, author = {Nousias, Sotiris and Wei, Mian and Xiao, Howard and Wu, Maxx and Athar, Shahmeer and Wang, Kevin J. and Malik, Anagh and Barmherzig, David A. and Lindell, David B. and Kutulakos, Kyros N.}, title = {Opportunistic Single-Photon Time of Flight}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15852-15862} }
Bridging Gait Recognition and Large Language Models Sequence Modeling-
[pdf]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Shaopeng and Wang, Jilong and Hou, Saihui and Liu, Xu and Cao, Chunshui and Wang, Liang and Huang, Yongzhen}, title = {Bridging Gait Recognition and Large Language Models Sequence Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3460-3469} }
Argus: Vision-Centric Reasoning with Grounded Chain-of-Thought-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Man_2025_CVPR, author = {Man, Yunze and Huang, De-An and Liu, Guilin and Sheng, Shiwei and Liu, Shilong and Gui, Liang-Yan and Kautz, Jan and Wang, Yu-Xiong and Yu, Zhiding}, title = {Argus: Vision-Centric Reasoning with Grounded Chain-of-Thought}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14268-14280} }
Bootstrap Your Own Views: Masked Ego-Exo Modeling for Fine-grained View-invariant Video Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Jungin and Lee, Jiyoung and Sohn, Kwanghoon}, title = {Bootstrap Your Own Views: Masked Ego-Exo Modeling for Fine-grained View-invariant Video Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13661-13670} }
SFDM: Robust Decomposition of Geometry and Reflectance for Realistic Face Rendering from Sparse-view Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Daisheng and Hu, Jiangbei and Xu, Baixin and Dai, Yuxin and Qian, Chen and He, Ying}, title = {SFDM: Robust Decomposition of Geometry and Reflectance for Realistic Face Rendering from Sparse-view Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26409-26419} }
DiSRT-In-Bed: Diffusion-Based Sim-to-Real Transfer Framework for In-Bed Human Mesh Recovery-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Jing and Zheng, Ce and Jeni, Laszlo A. and Erickson, Zackory}, title = {DiSRT-In-Bed: Diffusion-Based Sim-to-Real Transfer Framework for In-Bed Human Mesh Recovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1829-1838} }
Ouroboros3D: Image-to-3D Generation via 3D-aware Recursive Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2025_CVPR, author = {Wen, Hao and Huang, Zehuan and Wang, Yaohui and Chen, Xinyuan and Sheng, Lu}, title = {Ouroboros3D: Image-to-3D Generation via 3D-aware Recursive Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21631-21641} }
QMambaBSR: Burst Image Super-Resolution with Query State Space Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Di_2025_CVPR, author = {Di, Xin and Peng, Long and Xia, Peizhe and Li, Wenbo and Pei, Renjing and Cao, Yang and Wang, Yang and Zha, Zheng-Jun}, title = {QMambaBSR: Burst Image Super-Resolution with Query State Space Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23080-23090} }
Encapsulated Composition of Text-to-Image and Text-to-Video Models for High-Quality Video Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2025_CVPR, author = {Su, Tongtong and Wang, Chengyu and Liu, Bingyan and Huang, Jun and Lu, Dongming}, title = {Encapsulated Composition of Text-to-Image and Text-to-Video Models for High-Quality Video Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18209-18218} }
Multi-Group Proportional Representations for Text-to-Image Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2025_CVPR, author = {Jung, Sangwon and Oesterling, Alex and Verdun, Claudio Mayrink and Vithana, Sajani and Moon, Taesup and Calmon, Flavio P.}, title = {Multi-Group Proportional Representations for Text-to-Image Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23744-23754} }
Towards Generalizable Trajectory Prediction using Dual-Level Representation Learning and Adaptive Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Messaoud_2025_CVPR, author = {Messaoud, Kaouther and Cord, Matthieu and Alahi, Alexandre}, title = {Towards Generalizable Trajectory Prediction using Dual-Level Representation Learning and Adaptive Prompting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27564-27574} }
CoMatcher: Multi-View Collaborative Feature Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jintao and Xia, Zimin and Dong, Mingyue and Shen, Shuhan and Yue, Linwei and Zheng, Xianwei}, title = {CoMatcher: Multi-View Collaborative Feature Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21970-21980} }
COUNTS: Benchmarking Object Detectors and Multimodal Large Language Models under Distribution Shifts-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiansheng and Zhang, Xingxuan and Zou, Hao and Guo, Yige and Xu, Renzhe and Liu, Yilong and Zhu, Chuzhao and He, Yue and Cui, Peng}, title = {COUNTS: Benchmarking Object Detectors and Multimodal Large Language Models under Distribution Shifts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9186-9198} }
Retrieving Semantics from the Deep: an RAG Solution for Gesture Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mughal_2025_CVPR, author = {Mughal, M. Hamza and Dabral, Rishabh and Scholman, Merel C.J. and Demberg, Vera and Theobalt, Christian}, title = {Retrieving Semantics from the Deep: an RAG Solution for Gesture Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16578-16588} }
HOT: Hadamard-based Optimized Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Seonggon and Shin, Juncheol and Woo, Seung-taek and Park, Eunhyeok}, title = {HOT: Hadamard-based Optimized Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4787-4796} }
Towards a Universal Synthetic Video Detector: From Face or Background Manipulations to Fully AI-Generated Content-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kundu_2025_CVPR, author = {Kundu, Rohit and Xiong, Hao and Mohanty, Vishal and Balachandran, Athula and Roy-Chowdhury, Amit K.}, title = {Towards a Universal Synthetic Video Detector: From Face or Background Manipulations to Fully AI-Generated Content}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28050-28060} }
TokenFlow: Unified Image Tokenizer for Multimodal Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Liao and Zhang, Huichao and Liu, Yiheng and Wang, Xu and Jiang, Yi and Gao, Yiming and Ye, Hu and Du, Daniel K. and Yuan, Zehuan and Wu, Xinglong}, title = {TokenFlow: Unified Image Tokenizer for Multimodal Understanding and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2545-2555} }
Improving Personalized Search with Regularized Low-Rank Parameter Updates-
[pdf]
[supp]
[bibtex]@InProceedings{Ryan_2025_CVPR, author = {Ryan, Fiona and Sivic, Josef and Heilbron, Fabian Caba and Hoffman, Judy and Rehg, James M. and Russell, Bryan}, title = {Improving Personalized Search with Regularized Low-Rank Parameter Updates}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19748-19757} }
A Focused Human Body Model for Accurate Anthropometric Measurements Extraction-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Shuhang and Huang, Xianliang and Zhong, Zhizhou and Guan, Juhong and Zhou, Shuigeng}, title = {A Focused Human Body Model for Accurate Anthropometric Measurements Extraction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22658-22667} }
SnapGen-V: Generating a Five-Second Video within Five Seconds on a Mobile Device-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Yushu and Zhang, Zhixing and Li, Yanyu and Xu, Yanwu and Kag, Anil and Sui, Yang and Coskun, Huseyin and Ma, Ke and Lebedev, Aleksei and Hu, Ju and Metaxas, Dimitris N. and Wang, Yanzhi and Tulyakov, Sergey and Ren, Jian}, title = {SnapGen-V: Generating a Five-Second Video within Five Seconds on a Mobile Device}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2479-2490} }
Adapting Dense Matching for Homography Estimation with Grid-based Acceleration-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Kaining and Deng, Yuxin and Ma, Jiayi and Favaro, Paolo}, title = {Adapting Dense Matching for Homography Estimation with Grid-based Acceleration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6294-6303} }
HyperLoRA: Parameter-Efficient Adaptive Generation for Portrait Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Mengtian and Chen, Jinshu and Feng, Wanquan and Li, Bingchuan and Dai, Fei and Zhao, Songtao and He, Qian}, title = {HyperLoRA: Parameter-Efficient Adaptive Generation for Portrait Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13114-13123} }
ACE: Anti-Editing Concept Erasure in Text-to-Image Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zihao and Wei, Yuxiang and Li, Fan and Pei, Renjing and Xu, Hang and Zuo, Wangmeng}, title = {ACE: Anti-Editing Concept Erasure in Text-to-Image Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23505-23515} }
EchoMatch: Partial-to-Partial Shape Matching via Correspondence Reflection-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Yizheng and Ehm, Viktoria and Roetzer, Paul and El Amrani, Nafie and Gao, Maolin and Bernard, Florian and Cremers, Daniel}, title = {EchoMatch: Partial-to-Partial Shape Matching via Correspondence Reflection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11665-11675} }
CoSDH: Communication-Efficient Collaborative Perception via Supply-Demand Awareness and Intermediate-Late Hybridization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Junhao and Zhang, Yanan and Cai, Zhi and Huang, Di}, title = {CoSDH: Communication-Efficient Collaborative Perception via Supply-Demand Awareness and Intermediate-Late Hybridization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6834-6843} }
Stereo Anywhere: Robust Zero-Shot Deep Stereo Matching Even Where Either Stereo or Mono Fail-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bartolomei_2025_CVPR, author = {Bartolomei, Luca and Tosi, Fabio and Poggi, Matteo and Mattoccia, Stefano}, title = {Stereo Anywhere: Robust Zero-Shot Deep Stereo Matching Even Where Either Stereo or Mono Fail}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1013-1027} }
Order-Robust Class Incremental Learning: Graph-Driven Dynamic Similarity Grouping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Guannan and Li, Yujie and Wang, Xiangkun and Zhang, Junbo and Li, Tianrui and Yang, Xin}, title = {Order-Robust Class Incremental Learning: Graph-Driven Dynamic Similarity Grouping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4894-4904} }
Where the Devil Hides: Deepfake Detectors Can No Longer Be Trusted-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Shuaiwei and Dong, Junyu and Li, Yuezun}, title = {Where the Devil Hides: Deepfake Detectors Can No Longer Be Trusted}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8764-8774} }
Synthetic-to-Real Self-supervised Robust Depth Estimation via Learning with Motion and Structure Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Weilong and Li, Ming and Li, Haipeng and Shao, Shuwei and Tan, Robby T.}, title = {Synthetic-to-Real Self-supervised Robust Depth Estimation via Learning with Motion and Structure Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21880-21890} }
CaMuViD: Calibration-Free Multi-View Detection-
[pdf]
[bibtex]@InProceedings{Daryani_2025_CVPR, author = {Daryani, Amir Etefaghi and Bhutta, M. Usman Maqbool and Hernandez, Byron and Medeiros, Henry}, title = {CaMuViD: Calibration-Free Multi-View Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1220-1229} }
Prosody-Enhanced Acoustic Pre-training and Acoustic-Disentangled Prosody Adapting for Movie Dubbing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhedong and Li, Liang and Yan, Chenggang and Liu, Chunshan and van den Hengel, Anton and Qi, Yuankai}, title = {Prosody-Enhanced Acoustic Pre-training and Acoustic-Disentangled Prosody Adapting for Movie Dubbing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {172-182} }
Hierarchical Knowledge Prompt Tuning for Multi-task Test-Time Adaptation-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Qiang and Zhao, Mengsheng and Liu, Jiawei and Zhang, Fanrui and Xu, Yongchao and Zha, Zheng-Jun}, title = {Hierarchical Knowledge Prompt Tuning for Multi-task Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30524-30533} }
Cross-Modal Interactive Perception Network with Mamba for Lung Tumor Segmentation in PET-CT Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mei_2025_CVPR, author = {Mei, Jie and Lin, Chenyu and Qiu, Yu and Wang, Yaonan and Zhang, Hui and Wang, Ziyang and Dai, Dong}, title = {Cross-Modal Interactive Perception Network with Mamba for Lung Tumor Segmentation in PET-CT Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15653-15662} }
LaTexBlend: Scaling Multi-concept Customized Generation with Latent Textual Blending-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Jian and Yu, Zhenbo and Shen, Yang and Fu, Zhenyong and Yang, Jian}, title = {LaTexBlend: Scaling Multi-concept Customized Generation with Latent Textual Blending}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23585-23594} }
DejaVid: Encoder-Agnostic Learned Temporal Matching for Video Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Ho_2025_CVPR, author = {Ho, Darryl and Madden, Samuel}, title = {DejaVid: Encoder-Agnostic Learned Temporal Matching for Video Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24023-24032} }
HVI: A New Color Space for Low-light Image Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Qingsen and Feng, Yixu and Zhang, Cheng and Pang, Guansong and Shi, Kangbiao and Wu, Peng and Dong, Wei and Sun, Jinqiu and Zhang, Yanning}, title = {HVI: A New Color Space for Low-light Image Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5678-5687} }
DualPM: Dual Posed-Canonical Point Maps for 3D Shape and Pose Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kaye_2025_CVPR, author = {Kaye, Ben and Jakab, Tomas and Wu, Shangzhe and Ruprecht, Christian and Vedaldi, Andrea}, title = {DualPM: Dual Posed-Canonical Point Maps for 3D Shape and Pose Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6425-6435} }
SuperPC: A Single Diffusion Model for Point Cloud Completion, Upsampling, Denoising, and Colorization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Yi and Zhao, Zhipeng and Su, Shaoshu and Golluri, Sharath and Zheng, Haoze and Yao, Runmao and Wang, Chen}, title = {SuperPC: A Single Diffusion Model for Point Cloud Completion, Upsampling, Denoising, and Colorization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16953-16964} }
One Diffusion to Generate Them All-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Le_2025_CVPR, author = {Le, Duong H. and Pham, Tuan and Lee, Sangho and Clark, Christopher and Kembhavi, Aniruddha and Mandt, Stephan and Krishna, Ranjay and Lu, Jiasen}, title = {One Diffusion to Generate Them All}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2671-2682} }
Let's Verify and Reinforce Image Generation Step by Step-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Renrui and Tong, Chengzhuo and Zhao, Zhizheng and Guo, Ziyu and Zhang, Haoquan and Zhang, Manyuan and Liu, Jiaming and Gao, Peng and Li, Hongsheng}, title = {Let's Verify and Reinforce Image Generation Step by Step}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28662-28672} }
All-Optical Nonlinear Diffractive Deep Network for Ultrafast Image Denoising-
[pdf]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Xiaoling and Lee, Zhemg and Ye, Wei and Xie, Rui and Zhang, Wenbo and Peng, Guanju and Li, Zongze and Zhang, Shikun}, title = {All-Optical Nonlinear Diffractive Deep Network for Ultrafast Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28221-28231} }
Maintaining Consistent Inter-Class Topology in Continual Test-Time Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Ni_2025_CVPR, author = {Ni, Chenggong and Lyu, Fan and Tan, Jiayao and Hu, Fuyuan and Yao, Rui and Zhou, Tao}, title = {Maintaining Consistent Inter-Class Topology in Continual Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15319-15328} }
UNOPose: Unseen Object Pose Estimation with an Unposed RGB-D Reference Image-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xingyu and Wang, Gu and Zhang, Ruida and Zhang, Chenyangguang and Tombari, Federico and Ji, Xiangyang}, title = {UNOPose: Unseen Object Pose Estimation with an Unposed RGB-D Reference Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22023-22034} }
CoSER: Towards Consistent Dense Multiview Text-to-Image Generator for 3D Creation-
[pdf]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Bonan and Zhang, Zicheng and Yang, Xingyi and Wang, Xinchao}, title = {CoSER: Towards Consistent Dense Multiview Text-to-Image Generator for 3D Creation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2880-2890} }
HybridMQA: Exploring Geometry-Texture Interactions for Colored Mesh Quality Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sarvestani_2025_CVPR, author = {Sarvestani, Armin Shafiee and Tang, Sheyang and Wang, Zhou}, title = {HybridMQA: Exploring Geometry-Texture Interactions for Colored Mesh Quality Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21414-21424} }
Generalized Gaussian Entropy Model for Point Cloud Attribute Compression with Dynamic Likelihood Intervals-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Changhao}, title = {Generalized Gaussian Entropy Model for Point Cloud Attribute Compression with Dynamic Likelihood Intervals}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11779-11788} }
Self-Learning Hyperspectral and Multispectral Image Fusion via Adaptive Residual Guided Subspace Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jian and Wang, He and Xu, Yang and Wu, Zebin and Wei, Zhihui}, title = {Self-Learning Hyperspectral and Multispectral Image Fusion via Adaptive Residual Guided Subspace Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17862-17871} }
SIR-DIFF: Sparse Image Sets Restoration with Multi-View Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2025_CVPR, author = {Mao, Yucheng and Wang, Boyang and Kulkarni, Nilesh and Park, Jeong Joon}, title = {SIR-DIFF: Sparse Image Sets Restoration with Multi-View Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21620-21630} }
StickMotion: Generating 3D Human Motions by Drawing a Stickman-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Tao and Wu, Zhihua and He, Qiaozhi and Chu, Jiaming and Qian, Ling and Cheng, Yu and Xing, Junliang and Zhao, Jian and Jin, Lei}, title = {StickMotion: Generating 3D Human Motions by Drawing a Stickman}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12370-12379} }
Reversible Decoupling Network for Single Image Reflection Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Hao and Li, Mingjia and Hu, Qiming and Guo, Xiaojie}, title = {Reversible Decoupling Network for Single Image Reflection Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26430-26439} }
Hierarchical Features Matter: A Deep Exploration of Progressive Parameterization Method for Dataset Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_CVPR, author = {Zhong, Xinhao and Fang, Hao and Chen, Bin and Gu, Xulin and Qiu, Meikang and Qi, Shuhan and Xia, Shu-Tao}, title = {Hierarchical Features Matter: A Deep Exploration of Progressive Parameterization Method for Dataset Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30462-30471} }
Enduring, Efficient and Robust Trajectory Prediction Attack in Autonomous Driving via Optimization-Driven Multi-Frame Perturbation Framework-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Yi and Han, Weizhen and Wu, Libing and Liu, Bingyi and Wang, Enshu and Zhang, Zhuangzhuang}, title = {Enduring, Efficient and Robust Trajectory Prediction Attack in Autonomous Driving via Optimization-Driven Multi-Frame Perturbation Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17229-17238} }
GLASS: Guided Latent Slot Diffusion for Object-Centric Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Singh_2025_CVPR, author = {Singh, Krishnakant and Schaub-Meyer, Simone and Roth, Stefan}, title = {GLASS: Guided Latent Slot Diffusion for Object-Centric Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28673-28683} }
UNEM: UNrolled Generalized EM for Transductive Few-Shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Long and Shakeri, Fereshteh and Sadraoui, Aymen and Kaaniche, Mounir and Pesquet, Jean-Christophe and Ben Ayed, Ismail}, title = {UNEM: UNrolled Generalized EM for Transductive Few-Shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9665-9675} }
SASep: Saliency-Aware Structured Separation of Geometry and Feature for Open Set Learning on Point Clouds-
[pdf]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Jinfeng and Li, Xianzhi and Tang, Yuan and Han, Xu and Yu, Qiao and Hao, Yixue and Hu, Long and Chen, Min}, title = {SASep: Saliency-Aware Structured Separation of Geometry and Feature for Open Set Learning on Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27295-27304} }
Low-Biased General Annotated Dataset Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Dengyang and Wang, Haoyu and Zhang, Lei and Wei, Wei and Dai, Guang and Wang, Mengmeng and Wang, Jingdong and Zhang, Yanning}, title = {Low-Biased General Annotated Dataset Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25113-25123} }
G3Flow: Generative 3D Semantic Flow for Pose-aware and Generalizable Object Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Tianxing and Mu, Yao and Liang, Zhixuan and Chen, Zanxin and Peng, Shijia and Chen, Qiangyu and Xu, Mingkun and Hu, Ruizhen and Zhang, Hongyuan and Li, Xuelong and Luo, Ping}, title = {G3Flow: Generative 3D Semantic Flow for Pose-aware and Generalizable Object Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1735-1744} }
Generative Hard Example Augmentation for Semantic Point Cloud Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Qi and Peng, Jibin and Huang, Zhao and Feng, Wei and Lin, Di}, title = {Generative Hard Example Augmentation for Semantic Point Cloud Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22205-22214} }
Toward Generalized Image Quality Assessment: Relaxing the Perfect Reference Quality Assumption-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Du and Wu, Tianhe and Ma, Kede and Zhang, Lei}, title = {Toward Generalized Image Quality Assessment: Relaxing the Perfect Reference Quality Assumption}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12742-12752} }
Explaining Domain Shifts in Language: Concept Erasing for Interpretable Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_CVPR, author = {Zeng, Zequn and Su, Yudi and Sun, Jianqiao and Wen, Tiansheng and Zhang, Hao and Wang, Zhengjue and Chen, Bo and Liu, Hongwei and Ma, Jiawei}, title = {Explaining Domain Shifts in Language: Concept Erasing for Interpretable Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9517-9526} }
Hazy Low-Quality Satellite Video Restoration Via Learning Optimal Joint Degradation Patterns and Continuous-Scale Super-Resolution Reconstruction-
[pdf]
[bibtex]@InProceedings{Ni_2025_CVPR, author = {Ni, Ning and Zhang, Libao}, title = {Hazy Low-Quality Satellite Video Restoration Via Learning Optimal Joint Degradation Patterns and Continuous-Scale Super-Resolution Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12690-12699} }
Textured Gaussians for Enhanced 3D Scene Appearance Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chao_2025_CVPR, author = {Chao, Brian and Tseng, Hung-Yu and Porzi, Lorenzo and Gao, Chen and Li, Tuotuo and Li, Qinbo and Saraf, Ayush and Huang, Jia-Bin and Kopf, Johannes and Wetzstein, Gordon and Kim, Changil}, title = {Textured Gaussians for Enhanced 3D Scene Appearance Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8964-8974} }
NeighborRetr: Balancing Hub Centrality in Cross-Modal Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Zengrong and Wang, Zheng and Qian, Tianwen and Mu, Pan and Chan, Sixian and Bai, Cong}, title = {NeighborRetr: Balancing Hub Centrality in Cross-Modal Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9263-9273} }
ETAP: Event-based Tracking of Any Point-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hamann_2025_CVPR, author = {Hamann, Friedhelm and Gehrig, Daniel and Febryanto, Filbert and Daniilidis, Kostas and Gallego, Guillermo}, title = {ETAP: Event-based Tracking of Any Point}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27186-27196} }
Beyond Sight: Towards Cognitive Alignment in LVLM via Enriched Visual Knowledge-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yaqi and Yin, Yuanyang and Li, Lin and Lin, Mingan and Huang, Victor Shea-Jay and Chen, Siwei and Chen, Weipeng and Yin, Baoqun and Zhou, Zenan and Zhang, Wentao}, title = {Beyond Sight: Towards Cognitive Alignment in LVLM via Enriched Visual Knowledge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24950-24959} }
Global-Local Tree Search in VLMs for 3D Indoor Scene Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Wei and Qi, Mengshi and Ma, Huadong}, title = {Global-Local Tree Search in VLMs for 3D Indoor Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8975-8984} }
Volumetric Surfaces: Representing Fuzzy Geometries with Layered Meshes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Esposito_2025_CVPR, author = {Esposito, Stefano and Chen, Anpei and Reiser, Christian and Bul\`o, Samuel Rota and Porzi, Lorenzo and Schwarz, Katja and Richardt, Christian and Zollh\"ofer, Michael and Kontschieder, Peter and Geiger, Andreas}, title = {Volumetric Surfaces: Representing Fuzzy Geometries with Layered Meshes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21370-21380} }
Overcoming Shortcut Problem in VLM for Robust Out-of-Distribution Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Zhuo and Xiang, Xiang and Liang, Yifan}, title = {Overcoming Shortcut Problem in VLM for Robust Out-of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15402-15412} }
GFlowVLM: Enhancing Multi-step Reasoning in Vision-Language Models with Generative Flow Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_CVPR, author = {Kang, Haoqiang and Sachdeva, Enna and Gupta, Piyush and Bae, Sangjae and Lee, Kwonjoon}, title = {GFlowVLM: Enhancing Multi-step Reasoning in Vision-Language Models with Generative Flow Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3815-3825} }
STEPS: Sequential Probability Tensor Estimation for Text-to-Image Hard Prompt Search-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2025_CVPR, author = {Qiu, Yuning and Wang, Andong and Li, Chao and Huang, Haonan and Zhou, Guoxu and Zhao, Qibin}, title = {STEPS: Sequential Probability Tensor Estimation for Text-to-Image Hard Prompt Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28640-28650} }
RoboSpatial: Teaching Spatial Understanding to 2D and 3D Vision-Language Models for Robotics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Chan Hee and Blukis, Valts and Tremblay, Jonathan and Tyree, Stephen and Su, Yu and Birchfield, Stan}, title = {RoboSpatial: Teaching Spatial Understanding to 2D and 3D Vision-Language Models for Robotics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15768-15780} }
VIRES: Video Instance Repainting via Sketch and Text Guided Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Weng_2025_CVPR, author = {Weng, Shuchen and Zheng, Haojie and Zhang, Peixuan and Hong, Yuchen and Jiang, Han and Li, Si and Shi, Boxin}, title = {VIRES: Video Instance Repainting via Sketch and Text Guided Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28416-28425} }
MAP: Unleashing Hybrid Mamba-Transformer Vision Backbone's Potential with Masked Autoregressive Pretraining-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yunze and Yi, Li}, title = {MAP: Unleashing Hybrid Mamba-Transformer Vision Backbone's Potential with Masked Autoregressive Pretraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9676-9685} }
Segment Any-Quality Images with Generative Latent Space Enhancement-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Guangqian and Guo, Yong and Yu, Xuehui and Li, Wenbo and Wang, Yaoxing and Gao, Shan}, title = {Segment Any-Quality Images with Generative Latent Space Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2366-2376} }
BG-Triangle: Bezier Gaussian Triangle for 3D Vectorization and Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Minye and Dai, Haizhao and Yao, Kaixin and Tuytelaars, Tinne and Yu, Jingyi}, title = {BG-Triangle: Bezier Gaussian Triangle for 3D Vectorization and Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16197-16207} }
MIMO: Controllable Character Video Synthesis with Spatial Decomposed Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Men_2025_CVPR, author = {Men, Yifang and Yao, Yuan and Cui, Miaomiao and Bo, Liefeng}, title = {MIMO: Controllable Character Video Synthesis with Spatial Decomposed Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21181-21191} }
TKG-DM: Training-free Chroma Key Content Generation Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Morita_2025_CVPR, author = {Morita, Ryugo and Frolov, Stanislav and Moser, Brian Bernhard and Shirakawa, Takahiro and Watanabe, Ko and Dengel, Andreas and Zhou, Jinjia}, title = {TKG-DM: Training-free Chroma Key Content Generation Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13031-13040} }
Lift3D Policy: Lifting 2D Foundation Models for Robust 3D Robotic Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Jia_2025_CVPR, author = {Jia, Yueru and Liu, Jiaming and Chen, Sixiang and Gu, Chenyang and Wang, Zhilve and Luo, Longzan and Li, Xiaoqi and Wang, Pengwei and Wang, Zhongyuan and Zhang, Renrui and Zhang, Shanghang}, title = {Lift3D Policy: Lifting 2D Foundation Models for Robust 3D Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17347-17358} }
Multi-View Pose-Agnostic Change Localization with Zero Labels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Galappaththige_2025_CVPR, author = {Galappaththige, Chamuditha Jayanga and Lai, Jason and Windrim, Lloyd and Dansereau, Donald and Sunderhauf, Niko and Miller, Dimity}, title = {Multi-View Pose-Agnostic Change Localization with Zero Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11600-11610} }
From Sparse to Dense: Camera Relocalization with Scene-Specific Detector from Feature Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zhiwei and Yu, Hailin and Shentu, Yichun and Yuan, Jin and Zhang, Guofeng}, title = {From Sparse to Dense: Camera Relocalization with Scene-Specific Detector from Feature Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27059-27069} }
Accelerating Diffusion Transformer via Increment-Calibrated Caching with Channel-Aware Singular Value Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zhiyuan and Li, Keyi and Jia, Yifan and Ye, Le and Ma, Yufei}, title = {Accelerating Diffusion Transformer via Increment-Calibrated Caching with Channel-Aware Singular Value Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18011-18020} }
CityWalker: Learning Embodied Urban Navigation from Web-Scale Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xinhao and Li, Jintong and Jiang, Yicheng and Sujay, Niranjan and Yang, Zhicheng and Zhang, Juexiao and Abanes, John and Zhang, Jing and Feng, Chen}, title = {CityWalker: Learning Embodied Urban Navigation from Web-Scale Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6875-6885} }
A Simple yet Effective Layout Token in Large Language Models for Document Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Zhaoqing and Luo, Chuwei and Shao, Zirui and Gao, Feiyu and Xing, Hangdi and Zheng, Qi and Zhang, Ji}, title = {A Simple yet Effective Layout Token in Large Language Models for Document Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14472-14482} }
Reconstruction vs. Generation: Taming Optimization Dilemma in Latent Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_CVPR, author = {Yao, Jingfeng and Yang, Bin and Wang, Xinggang}, title = {Reconstruction vs. Generation: Taming Optimization Dilemma in Latent Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15703-15712} }
StableAnimator: High-Quality Identity-Preserving Human Image Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tu_2025_CVPR, author = {Tu, Shuyuan and Xing, Zhen and Han, Xintong and Cheng, Zhi-Qi and Dai, Qi and Luo, Chong and Wu, Zuxuan}, title = {StableAnimator: High-Quality Identity-Preserving Human Image Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21096-21106} }
Learning Visual Composition through Improved Semantic Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stone_2025_CVPR, author = {Stone, Austin and Soltau, Hagen and Geirhos, Robert and Yi, Xi and Xia, Ye and Cao, Bingyi and Chen, Kaifeng and Ogale, Abhijit and Shlens, Jonathon}, title = {Learning Visual Composition through Improved Semantic Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3740-3750} }
OODD: Test-time Out-of-Distribution Detection with Dynamic Dictionary-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Yifeng and Zhu, Lin and Sun, Zewen and Liu, Hengyu and Gu, Qinying and Ye, Nanyang}, title = {OODD: Test-time Out-of-Distribution Detection with Dynamic Dictionary}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30630-30639} }
MEAT: Multiview Diffusion Model for Human Generation on Megapixels with Mesh Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuhan and Hong, Fangzhou and Yang, Shuai and Jiang, Liming and Wu, Wayne and Loy, Chen Change}, title = {MEAT: Multiview Diffusion Model for Human Generation on Megapixels with Mesh Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11297-11306} }
Free Lunch Enhancements for Multi-modal Crowd Counting-
[pdf]
[supp]
[bibtex]@InProceedings{Meng_2025_CVPR, author = {Meng, Haoliang and Hong, Xiaopeng and Lai, Zhengqin and Shang, Miao}, title = {Free Lunch Enhancements for Multi-modal Crowd Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14013-14023} }
BIMBA: Selective-Scan Compression for Long-Range Video Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Islam_2025_CVPR, author = {Islam, Md Mohaiminul and Nagarajan, Tushar and Wang, Huiyu and Bertasius, Gedas and Torresani, Lorenzo}, title = {BIMBA: Selective-Scan Compression for Long-Range Video Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29096-29107} }
EVolSplat: Efficient Volume-based Gaussian Splatting for Urban View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miao_2025_CVPR, author = {Miao, Sheng and Huang, Jiaxin and Bai, Dongfeng and Yan, Xu and Zhou, Hongyu and Wang, Yue and Liu, Bingbing and Geiger, Andreas and Liao, Yiyi}, title = {EVolSplat: Efficient Volume-based Gaussian Splatting for Urban View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11286-11296} }
Diff2Flow: Training Flow Matching Models via Diffusion Model Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Schusterbauer_2025_CVPR, author = {Schusterbauer, Johannes and Gui, Ming and Fundel, Frank and Ommer, Bj\"orn}, title = {Diff2Flow: Training Flow Matching Models via Diffusion Model Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28347-28357} }
JanusFlow: Harmonizing Autoregression and Rectified Flow for Unified Multimodal Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Yiyang and Liu, Xingchao and Chen, Xiaokang and Liu, Wen and Wu, Chengyue and Wu, Zhiyu and Pan, Zizheng and Xie, Zhenda and Zhang, Haowei and Yu, Xingkai and Zhao, Liang and Wang, Yisong and Liu, Jiaying and Ruan, Chong}, title = {JanusFlow: Harmonizing Autoregression and Rectified Flow for Unified Multimodal Understanding and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7739-7751} }
Visual Prompting for One-shot Controllable Video Editing without Inversion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhengbo and Zhou, Yuxi and Peng, Duo and Lim, Joo-Hwee and Tu, Zhigang and Soh, De Wen and Foo, Lin Geng}, title = {Visual Prompting for One-shot Controllable Video Editing without Inversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7784-7794} }
PIDSR: Complementary Polarized Image Demosaicing and Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Shuangfan and Zhou, Chu and Lyu, Youwei and Guo, Heng and Ma, Zhanyu and Shi, Boxin and Sato, Imari}, title = {PIDSR: Complementary Polarized Image Demosaicing and Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16081-16090} }
MegaSynth: Scaling Up 3D Scene Reconstruction with Synthesized Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Hanwen and Xu, Zexiang and Xie, Desai and Chen, Ziwen and Jin, Haian and Luan, Fujun and Shu, Zhixin and Zhang, Kai and Bi, Sai and Sun, Xin and Gu, Jiuxiang and Huang, Qixing and Pavlakos, Georgios and Tan, Hao}, title = {MegaSynth: Scaling Up 3D Scene Reconstruction with Synthesized Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16441-16452} }
Prof. Robot: Differentiable Robot Rendering Without Static and Self-Collisions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ruan_2025_CVPR, author = {Ruan, Quanyuan and Lei, Jiabao and Yuan, Wenhao and Zhang, Yanglin and Lu, Dekun and Liu, Guiliang and Jia, Kui}, title = {Prof. Robot: Differentiable Robot Rendering Without Static and Self-Collisions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22562-22572} }
AVQACL: A Novel Benchmark for Audio-Visual Question Answering Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Kaixuan and Li, Xinde and Li, Xinling and Hu, Chuanfei and Wu, Guoliang}, title = {AVQACL: A Novel Benchmark for Audio-Visual Question Answering Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3252-3261} }
Flash-Split: 2D Reflection Removal with Flash Cues and Latent Diffusion Separation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Tianfu and Xie, Mingyang and Cai, Haoming and Shah, Sachin and Metzler, Christopher A.}, title = {Flash-Split: 2D Reflection Removal with Flash Cues and Latent Diffusion Separation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5688-5698} }
Attention IoU: Examining Biases in CelebA using Attention Maps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Serianni_2025_CVPR, author = {Serianni, Aaron and Zhu, Tyler and Russakovsky, Olga and Ramaswamy, Vikram V.}, title = {Attention IoU: Examining Biases in CelebA using Attention Maps}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4386-4397} }
HEIE: MLLM-Based Hierarchical Explainable AIGC Image Implausibility Evaluator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Fan and Zhen, Ru and Wang, Jianing and Zhang, Yanhao and Chen, Haoxiang and Lu, Haonan and Zhao, Sicheng and Ding, Guiguang}, title = {HEIE: MLLM-Based Hierarchical Explainable AIGC Image Implausibility Evaluator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3856-3866} }
Segment Any Motion in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Nan and Zheng, Wenzhao and Xu, Chenfeng and Keutzer, Kurt and Zhang, Shanghang and Kanazawa, Angjoo and Wang, Qianqian}, title = {Segment Any Motion in Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3406-3416} }
HandOS: 3D Hand Reconstruction in One Stage-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xingyu and Song, Zhuheng and Jiang, Xiaoke and Hu, Yaoqing and Yu, Junzhi and Zhang, Lei}, title = {HandOS: 3D Hand Reconstruction in One Stage}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17304-17314} }
Task-aware Cross-modal Feature Refinement Transformer with Large Language Models for Visual Grounding-
[pdf]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Wenbo and Xu, Zhen and Xu, Ruotao and Wu, Si and Wong, Hau-San}, title = {Task-aware Cross-modal Feature Refinement Transformer with Large Language Models for Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3931-3941} }
DropGaussian: Structural Regularization for Sparse-view Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Hyunwoo and Ryu, Gun and Kim, Wonjun}, title = {DropGaussian: Structural Regularization for Sparse-view Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21600-21609} }
All-Day Multi-Camera Multi-Target Tracking-
[pdf]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Huijie and Qiao, Yu and Zhen, Yihao and Zhao, Tinghui and Fan, Baojie and Wang, Qiang}, title = {All-Day Multi-Camera Multi-Target Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16892-16901} }
Blurred LiDAR for Sharper 3D: Robust Handheld 3D Scanning with Diffuse LiDAR and RGB-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Behari_2025_CVPR, author = {Behari, Nikhil and Young, Aaron and Somasundaram, Siddharth and Klinghoffer, Tzofi and Dave, Akshat and Raskar, Ramesh}, title = {Blurred LiDAR for Sharper 3D: Robust Handheld 3D Scanning with Diffuse LiDAR and RGB}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26954-26964} }
EnergyMoGen: Compositional Human Motion Generation with Energy-Based Diffusion Model in Latent Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jianrong and Fan, Hehe and Yang, Yi}, title = {EnergyMoGen: Compositional Human Motion Generation with Energy-Based Diffusion Model in Latent Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17592-17602} }
PatchDEMUX: A Certifiably Robust Framework for Multi-label Classifiers Against Adversarial Patches-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jacob_2025_CVPR, author = {Jacob, Dennis and Xiang, Chong and Mittal, Prateek}, title = {PatchDEMUX: A Certifiably Robust Framework for Multi-label Classifiers Against Adversarial Patches}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9944-9953} }
StarVector: Generating Scalable Vector Graphics Code from Images and Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rodriguez_2025_CVPR, author = {Rodriguez, Juan A. and Puri, Abhay and Agarwal, Shubham and Laradji, Issam H. and Rodriguez, Pau and Rajeswar, Sai and Vazquez, David and Pal, Christopher and Pedersoli, Marco}, title = {StarVector: Generating Scalable Vector Graphics Code from Images and Text}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16175-16186} }
Novel View Synthesis with Pixel-Space Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Elata_2025_CVPR, author = {Elata, Noam and Kawar, Bahjat and Ostrovsky-Berman, Yaron and Farber, Miriam and Sokolovsky, Ron}, title = {Novel View Synthesis with Pixel-Space Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26756-26766} }
Object Detection using Event Camera: A MoE Heat Conduction based Detector and A New Benchmark Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xiao and Jin, Yu and Wu, Wentao and Zhang, Wei and Zhu, Lin and Jiang, Bo and Tian, Yonghong}, title = {Object Detection using Event Camera: A MoE Heat Conduction based Detector and A New Benchmark Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29321-29330} }
EgoTextVQA: Towards Egocentric Scene-Text Aware Video Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Sheng and Xiao, Junbin and Li, Qingyun and Li, Yicong and Yang, Xun and Guo, Dan and Wang, Meng and Chua, Tat-Seng and Yao, Angela}, title = {EgoTextVQA: Towards Egocentric Scene-Text Aware Video Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3363-3373} }
Token Cropr: Faster ViTs for Quite a Few Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bergner_2025_CVPR, author = {Bergner, Benjamin and Lippert, Christoph and Mahendran, Aravindh}, title = {Token Cropr: Faster ViTs for Quite a Few Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9740-9750} }
STCOcc: Sparse Spatial-Temporal Cascade Renovation for 3D Occupancy and Scene Flow Prediction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liao_2025_CVPR, author = {Liao, Zhimin and Wei, Ping and Chen, Shuaijia and Wang, Haoxuan and Ren, Ziyang}, title = {STCOcc: Sparse Spatial-Temporal Cascade Renovation for 3D Occupancy and Scene Flow Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1516-1526} }
Document Haystacks: Vision-Language Reasoning Over Piles of 1000+ Documents-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Jun and Xu, Dannong and Fei, Junjie and Feng, Chun-Mei and Elhoseiny, Mohamed}, title = {Document Haystacks: Vision-Language Reasoning Over Piles of 1000+ Documents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24817-24826} }
Rethinking Few-Shot Adaptation of Vision-Language Models in Two Stages-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Farina_2025_CVPR, author = {Farina, Matteo and Mancini, Massimiliano and Iacca, Giovanni and Ricci, Elisa}, title = {Rethinking Few-Shot Adaptation of Vision-Language Models in Two Stages}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29989-29998} }
Resilient Sensor Fusion Under Adverse Sensor Failures via Multi-Modal Expert Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Konyul and Kim, Yecheol and Kim, Daehun and Choi, Jun Won}, title = {Resilient Sensor Fusion Under Adverse Sensor Failures via Multi-Modal Expert Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6720-6729} }
TAGA: Self-supervised Learning for Template-free Animatable Gaussian Articulated Model-
[pdf]
[supp]
[bibtex]@InProceedings{Zhai_2025_CVPR, author = {Zhai, Zhichao and Chen, Guikun and Wang, Wenguan and Zheng, Dong and Xiao, Jun}, title = {TAGA: Self-supervised Learning for Template-free Animatable Gaussian Articulated Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21159-21169} }
MambaVO: Deep Visual Odometry Based on Sequential Matching Refinement and Training Smoothing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shuo and Li, Wanting and Wang, Yongcai and Fan, Zhaoxin and Huang, Zhe and Cai, Xudong and Zhao, Jian and Li, Deying}, title = {MambaVO: Deep Visual Odometry Based on Sequential Matching Refinement and Training Smoothing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1252-1262} }
Explaining in Diffusion: Explaining a Classifier with Diffusion Semantics-
[pdf]
[supp]
[bibtex]@InProceedings{Kazimi_2025_CVPR, author = {Kazimi, Tahira and Allada, Ritika and Yanardag, Pinar}, title = {Explaining in Diffusion: Explaining a Classifier with Diffusion Semantics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14799-14809} }
Horizon-GS: Unified 3D Gaussian Splatting for Large-Scale Aerial-to-Ground Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Lihan and Ren, Kerui and Yu, Mulin and Xu, Linning and Dong, Junting and Lu, Tao and Zhao, Feng and Lin, Dahua and Dai, Bo}, title = {Horizon-GS: Unified 3D Gaussian Splatting for Large-Scale Aerial-to-Ground Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26789-26799} }
Attention Distillation: A Unified Approach to Visual Characteristics Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yang and Gao, Xu and Chen, Zichong and Huang, Hui}, title = {Attention Distillation: A Unified Approach to Visual Characteristics Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18270-18280} }
From Words to Structured Visuals: A Benchmark and Framework for Text-to-Diagram Generation and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Jingxuan and Tan, Cheng and Chen, Qi and Wu, Gaowei and Li, Siyuan and Gao, Zhangyang and Sun, Linzhuang and Yu, Bihui and Guo, Ruifeng}, title = {From Words to Structured Visuals: A Benchmark and Framework for Text-to-Diagram Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13315-13325} }
LotusFilter: Fast Diverse Nearest Neighbor Search via a Learned Cutoff Table-
[pdf]
[supp]
[bibtex]@InProceedings{Matsui_2025_CVPR, author = {Matsui, Yusuke}, title = {LotusFilter: Fast Diverse Nearest Neighbor Search via a Learned Cutoff Table}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30430-30439} }
DreamRelation: Bridging Customization and Relation Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Qingyu and Qi, Lu and Wu, Jianzong and Bai, Jinbin and Wang, Jingbo and Tong, Yunhai and Li, Xiangtai}, title = {DreamRelation: Bridging Customization and Relation Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15723-15732} }
IndoorGS: Geometric Cues Guided Gaussian Splatting for Indoor Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Ruan_2025_CVPR, author = {Ruan, Cong and Wang, Yuesong and Guan, Tao and Zhang, Bin and Ju, Lili}, title = {IndoorGS: Geometric Cues Guided Gaussian Splatting for Indoor Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {844-853} }
Point-Cache: Test-time Dynamic and Hierarchical Cache for Robust and Generalizable Point Cloud Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Hongyu and Ke, Qiuhong and Cheng, Ming and Wang, Yongcai and Li, Deying and Gou, Chenhui and Cai, Jianfei}, title = {Point-Cache: Test-time Dynamic and Hierarchical Cache for Robust and Generalizable Point Cloud Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1263-1275} }
Think Small, Act Big: Primitive Prompt Learning for Lifelong Robot Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_CVPR, author = {Yao, Yuanqi and Liu, Siao and Song, Haoming and Qu, Delin and Chen, Qizhi and Ding, Yan and Zhao, Bin and Wang, Zhigang and Li, Xuelong and Wang, Dong}, title = {Think Small, Act Big: Primitive Prompt Learning for Lifelong Robot Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22573-22583} }
Stop Walking in Circles! Bailing Out Early in Projected Gradient Descent-
[pdf]
[arXiv]
[bibtex]@InProceedings{Doldo_2025_CVPR, author = {Doldo, Philip and Everett, Derek and Khanna, Amol and Nguyen, Andre T and Raff, Edward}, title = {Stop Walking in Circles! Bailing Out Early in Projected Gradient Descent}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6373-6382} }
MoManipVLA: Transferring Vision-language-action Models for General Mobile Manipulation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Zhenyu and Zhou, Yuheng and Xu, Xiuwei and Wang, Ziwei and Yan, Haibin}, title = {MoManipVLA: Transferring Vision-language-action Models for General Mobile Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1714-1723} }
SoMA: Singular Value Decomposed Minor Components Adaptation for Domain Generalizable Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yun_2025_CVPR, author = {Yun, Seokju and Chae, Seunghye and Lee, Dongheon and Ro, Youngmin}, title = {SoMA: Singular Value Decomposed Minor Components Adaptation for Domain Generalizable Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25602-25612} }
Depth-Guided Bundle Sampling for Efficient Generalizable Neural Radiance Field Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Li and Zhu, Hao and Chen, Longlong and Hu, Fei and Ye, Long and Ma, Zhan}, title = {Depth-Guided Bundle Sampling for Efficient Generalizable Neural Radiance Field Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11217-11226} }
TinyFusion: Diffusion Transformers Learned Shallow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Gongfan and Li, Kunjun and Ma, Xinyin and Wang, Xinchao}, title = {TinyFusion: Diffusion Transformers Learned Shallow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18144-18154} }
Ref-GS: Directional Factorization for 2D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Youjia and Chen, Anpei and Wan, Yumin and Song, Zikai and Yu, Junqing and Luo, Yawei and Yang, Wei}, title = {Ref-GS: Directional Factorization for 2D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26483-26492} }
SVG-IR: Spatially-Varying Gaussian Splatting for Inverse Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Hanxiao and Gao, Yupeng and Xie, Jin and Yang, Jian and Wang, Beibei}, title = {SVG-IR: Spatially-Varying Gaussian Splatting for Inverse Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16143-16152} }
Stable-SCore: A Stable Registration-based Framework for 3D Shape Correspondence-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Haolin and Zhan, Xiaohang and Yan, Zizheng and Luo, Zhongjin and Wen, Yuxin and Han, Xiaoguang}, title = {Stable-SCore: A Stable Registration-based Framework for 3D Shape Correspondence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {917-928} }
Beyond Single-Modal Boundary: Cross-Modal Anomaly Detection through Visual Prototype and Harmonization-
[pdf]
[bibtex]@InProceedings{Mao_2025_CVPR, author = {Mao, Kai and Wei, Ping and Lian, Yiyang and Wang, Yangyang and Zheng, Nanning}, title = {Beyond Single-Modal Boundary: Cross-Modal Anomaly Detection through Visual Prototype and Harmonization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9964-9973} }
VDocRAG: Retrieval-Augmented Generation over Visually-Rich Documents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tanaka_2025_CVPR, author = {Tanaka, Ryota and Iki, Taichi and Hasegawa, Taku and Nishida, Kyosuke and Saito, Kuniko and Suzuki, Jun}, title = {VDocRAG: Retrieval-Augmented Generation over Visually-Rich Documents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24827-24837} }
Align-KD: Distilling Cross-Modal Alignment Knowledge for Mobile Vision-Language Large Model Enhancement-
[pdf]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Qianhan and Li, Wenshuo and Lin, Tong and Chen, Xinghao}, title = {Align-KD: Distilling Cross-Modal Alignment Knowledge for Mobile Vision-Language Large Model Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4178-4188} }
Pose Priors from Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Subramanian_2025_CVPR, author = {Subramanian, Sanjay and Ng, Evonne and M\"uller, Lea and Klein, Dan and Ginosar, Shiry and Darrell, Trevor}, title = {Pose Priors from Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7125-7135} }
Concept Lancet: Image Editing with Compositional Representation Transplant-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Jinqi and Ding, Tianjiao and Chan, Kwan Ho Ryan and Min, Hancheng and Callison-Burch, Chris and Vidal, Rene}, title = {Concept Lancet: Image Editing with Compositional Representation Transplant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28502-28512} }
Scaling Mesh Generation via Compressive Tokenization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Weng_2025_CVPR, author = {Weng, Haohan and Zhao, Zibo and Lei, Biwen and Yang, Xianghui and Liu, Jian and Lai, Zeqiang and Chen, Zhuo and Liu, Yuhong and Jiang, Jie and Guo, Chunchao and Zhang, Tong and Gao, Shenghua and Chen, C.L. Philip}, title = {Scaling Mesh Generation via Compressive Tokenization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11093-11103} }
Generative Densification: Learning to Densify Gaussians for High-Fidelity Generalizable 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2025_CVPR, author = {Nam, Seungtae and Sun, Xiangyu and Kang, Gyeongjin and Lee, Younggeun and Oh, Seungjun and Park, Eunbyung}, title = {Generative Densification: Learning to Densify Gaussians for High-Fidelity Generalizable 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26683-26693} }
LogoSP: Local-global Grouping of Superpoints for Unsupervised Semantic Segmentation of 3D Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zihui and Dai, Weisheng and Wen, Hongtao and Yang, Bo}, title = {LogoSP: Local-global Grouping of Superpoints for Unsupervised Semantic Segmentation of 3D Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1374-1384} }
Exploring Intrinsic Normal Prototypes within a Single Image for Universal Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Wei and Cao, Yunkang and Yao, Haiming and Zhang, Xiaotian and Lou, Jianan and Cheng, Yuqi and Shen, Weiming and Yu, Wenyong}, title = {Exploring Intrinsic Normal Prototypes within a Single Image for Universal Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9974-9983} }
Augmenting Perceptual Super-Resolution via Image Quality Predictors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Fengjia and Rangrej, Samrudhdhi B. and Aumentado-Armstrong, Tristan and Fazly, Afsaneh and Levinshtein, Alex}, title = {Augmenting Perceptual Super-Resolution via Image Quality Predictors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2311-2322} }
TurboFill: Adapting Few-step Text-to-image Model for Fast Image Inpainting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Liangbin and Pakhomov, Daniil and Wang, Zhonghao and Wu, Zongze and Chen, Ziyan and Zhou, Yuqian and Zheng, Haitian and Zhang, Zhifei and Lin, Zhe and Zhou, Jiantao and Dong, Chao}, title = {TurboFill: Adapting Few-step Text-to-image Model for Fast Image Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7613-7622} }
Stochastic Human Motion Prediction with Memory of Action Transition and Action Characteristic-
[pdf]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Jianwei and Yang, Hong and Chen, Tengyue and Hu, Jian-Fang}, title = {Stochastic Human Motion Prediction with Memory of Action Transition and Action Characteristic}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1883-1893} }
Video-MME: The First-Ever Comprehensive Evaluation Benchmark of Multi-modal LLMs in Video Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Chaoyou and Dai, Yuhan and Luo, Yongdong and Li, Lei and Ren, Shuhuai and Zhang, Renrui and Wang, Zihan and Zhou, Chenyu and Shen, Yunhang and Zhang, Mengdan and Chen, Peixian and Li, Yanwei and Lin, Shaohui and Zhao, Sirui and Li, Ke and Xu, Tong and Zheng, Xiawu and Chen, Enhong and Shan, Caifeng and He, Ran and Sun, Xing}, title = {Video-MME: The First-Ever Comprehensive Evaluation Benchmark of Multi-modal LLMs in Video Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24108-24118} }
Perception Tokens Enhance Visual Reasoning in Multimodal Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bigverdi_2025_CVPR, author = {Bigverdi, Mahtab and Luo, Zelun and Hsieh, Cheng-Yu and Shen, Ethan and Chen, Dongping and Shapiro, Linda G. and Krishna, Ranjay}, title = {Perception Tokens Enhance Visual Reasoning in Multimodal Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3836-3845} }
Are Images Indistinguishable to Humans Also Indistinguishable to Classifiers?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2025_CVPR, author = {You, Zebin and Zhang, Xinyu and Guo, Hanzhong and Wang, Jingdong and Li, Chongxuan}, title = {Are Images Indistinguishable to Humans Also Indistinguishable to Classifiers?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28790-28800} }
X-Dyna: Expressive Dynamic Human Image Animation-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2025_CVPR, author = {Chang, Di and Xu, Hongyi and Xie, You and Gao, Yipeng and Kuang, Zhengfei and Cai, Shengqu and Zhang, Chenxu and Song, Guoxian and Wang, Chao and Shi, Yichun and Chen, Zeyuan and Zhou, Shijie and Luo, Linjie and Wetzstein, Gordon and Soleymani, Mohammad}, title = {X-Dyna: Expressive Dynamic Human Image Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5499-5509} }
Understanding Multi-layered Transmission Matrices-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Levin_2025_CVPR, author = {Levin, Anat and Alterman, Marina}, title = {Understanding Multi-layered Transmission Matrices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23164-23173} }
GS-DiT: Advancing Video Generation with Dynamic 3D Gaussian Fields through Efficient Dense 3D Point Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Bian_2025_CVPR, author = {Bian, Weikang and Huang, Zhaoyang and Shi, Xiaoyu and Li, Yijin and Wang, Fu-Yun and Li, Hongsheng}, title = {GS-DiT: Advancing Video Generation with Dynamic 3D Gaussian Fields through Efficient Dense 3D Point Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21717-21727} }
AnyMoLe: Any Character Motion In-betweening Leveraging Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yun_2025_CVPR, author = {Yun, Kwan and Hong, Seokhyeon and Kim, Chaelin and Noh, Junyong}, title = {AnyMoLe: Any Character Motion In-betweening Leveraging Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27838-27848} }
Towards Optimizing Large-Scale Multi-Graph Matching in Bioimaging-
[pdf]
[supp]
[bibtex]@InProceedings{Kahl_2025_CVPR, author = {Kahl, Max and Stricker, Sebastian and Hutschenreiter, Lisa and Bernard, Florian and Rother, Carsten and Savchynskyy, Bogdan}, title = {Towards Optimizing Large-Scale Multi-Graph Matching in Bioimaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11569-11578} }
Towards Effective and Sparse Adversarial Attack on Spiking Neural Networks via Breaking Invisible Surrogate Gradients-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lun_2025_CVPR, author = {Lun, Li and Feng, Kunyu and Ni, Qinglong and Liang, Ling and Wang, Yuan and Li, Ying and Yu, Dunshan and Cui, Xiaoxin}, title = {Towards Effective and Sparse Adversarial Attack on Spiking Neural Networks via Breaking Invisible Surrogate Gradients}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3540-3551} }
Towards Understanding and Quantifying Uncertainty for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Franchi_2025_CVPR, author = {Franchi, Gianni and Belkhir, Nacim and Trong, Dat Nguyen and Xia, Guoxuan and Pilzer, Andrea}, title = {Towards Understanding and Quantifying Uncertainty for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8062-8072} }
PS-Diffusion: Photorealistic Subject-Driven Image Editing with Disentangled Control and Attention-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Weicheng and Jia, Guoli and Zhang, Zhongqi and Lin, Liang and Yang, Jufeng}, title = {PS-Diffusion: Photorealistic Subject-Driven Image Editing with Disentangled Control and Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18302-18312} }
Exploring Visual Vulnerabilities via Multi-Loss Adversarial Search for Jailbreaking Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hao_2025_CVPR, author = {Hao, Shuyang and Hooi, Bryan and Liu, Jun and Chang, Kai-Wei and Huang, Zi and Cai, Yujun}, title = {Exploring Visual Vulnerabilities via Multi-Loss Adversarial Search for Jailbreaking Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19890-19899} }
LLaVA-ST: A Multimodal Large Language Model for Fine-Grained Spatial-Temporal Understanding-
[pdf]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Hongyu and Chen, Jinyu and Wei, Ziyu and Huang, Shaofei and Hui, Tianrui and Gao, Jialin and Wei, Xiaoming and Liu, Si}, title = {LLaVA-ST: A Multimodal Large Language Model for Fine-Grained Spatial-Temporal Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8592-8603} }
Insight-V: Exploring Long-Chain Visual Reasoning with Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Yuhao and Liu, Zuyan and Sun, Hai-Long and Yang, Jingkang and Hu, Winston and Rao, Yongming and Liu, Ziwei}, title = {Insight-V: Exploring Long-Chain Visual Reasoning with Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9062-9072} }
MaIR: A Locality- and Continuity-Preserving Mamba for Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Boyun and Zhao, Haiyu and Wang, Wenxin and Hu, Peng and Gou, Yuanbiao and Peng, Xi}, title = {MaIR: A Locality- and Continuity-Preserving Mamba for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7491-7501} }
Few-shot Implicit Function Generation via Equivariance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Suizhi and Yang, Xingyi and Lu, Hongtao and Wang, Xinchao}, title = {Few-shot Implicit Function Generation via Equivariance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16262-16272} }
RSAR: Restricted State Angle Resolver and Rotated SAR Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xin and Yang, Xue and Li, Yuxuan and Yang, Jian and Cheng, Ming-Ming and Li, Xiang}, title = {RSAR: Restricted State Angle Resolver and Rotated SAR Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7416-7426} }
Dual Energy-Based Model with Open-World Uncertainty Estimation for Out-of-distribution Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Qi and Ding, Hu}, title = {Dual Energy-Based Model with Open-World Uncertainty Estimation for Out-of-distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25728-25737} }
DTGBrepGen: A Novel B-rep Generative Model through Decoupling Topology and Geometry-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jing and Fu, Yihang and Chen, Falai}, title = {DTGBrepGen: A Novel B-rep Generative Model through Decoupling Topology and Geometry}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21438-21447} }
Continuous Space-Time Video Resampling with Invertible Motion Steganography-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yuantong and Chen, Zhenzhong}, title = {Continuous Space-Time Video Resampling with Invertible Motion Steganography}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2116-2126} }
Schedule On the Fly: Diffusion Time Prediction for Faster and Better Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Zilyu and Chen, Zhiyang and Li, Tiancheng and Huang, Zemin and Luo, Weijian and Qi, Guo-Jun}, title = {Schedule On the Fly: Diffusion Time Prediction for Faster and Better Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23412-23422} }
Learning Audio-guided Video Representation with Gated Attention for Video-Text Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_CVPR, author = {Jeong, Boseung and Park, Jicheol and Kim, Sungyeon and Kwak, Suha}, title = {Learning Audio-guided Video Representation with Gated Attention for Video-Text Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26202-26211} }
ProtoDepth: Unsupervised Continual Depth Completion with Prototypes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rim_2025_CVPR, author = {Rim, Patrick and Park, Hyoungseob and Gangopadhyay, S. and Zeng, Ziyao and Chung, Younjoon and Wong, Alex}, title = {ProtoDepth: Unsupervised Continual Depth Completion with Prototypes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6304-6316} }
vesselFM: A Foundation Model for Universal 3D Blood Vessel Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wittmann_2025_CVPR, author = {Wittmann, Bastian and Wattenberg, Yannick and Amiranashvili, Tamaz and Shit, Suprosanna and Menze, Bjoern}, title = {vesselFM: A Foundation Model for Universal 3D Blood Vessel Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20874-20884} }
TASTE-Rob: Advancing Video Generation of Task-Oriented Hand-Object Interaction for Generalizable Robotic Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Hongxiang and Liu, Xingchen and Xu, Mutian and Hao, Yiming and Chen, Weikai and Han, Xiaoguang}, title = {TASTE-Rob: Advancing Video Generation of Task-Oriented Hand-Object Interaction for Generalizable Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27683-27693} }
NoT: Federated Unlearning via Weight Negation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khalil_2025_CVPR, author = {Khalil, Yasser H. and Brunswic, Leo and Lamghari, Soufiane and Li, Xu and Beitollahi, Mahdi and Chen, Xi}, title = {NoT: Federated Unlearning via Weight Negation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25759-25769} }
ParaHome: Parameterizing Everyday Home Activities Towards 3D Generative Modeling of Human-Object Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Jeonghwan and Kim, Jisoo and Na, Jeonghyeon and Joo, Hanbyul}, title = {ParaHome: Parameterizing Everyday Home Activities Towards 3D Generative Modeling of Human-Object Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1816-1828} }
Adapting to the Unknown: Training-Free Audio-Visual Event Perception with Dynamic Thresholds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shaar_2025_CVPR, author = {Shaar, Eitan and Shaulov, Ariel and Chechik, Gal and Wolf, Lior}, title = {Adapting to the Unknown: Training-Free Audio-Visual Event Perception with Dynamic Thresholds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3142-3151} }
OpenHumanVid: A Large-Scale High-Quality Dataset for Enhancing Human-Centric Video Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Hui and Xu, Mingwang and Zhan, Yun and Mu, Shan and Li, Jiaye and Cheng, Kaihui and Chen, Yuxuan and Chen, Tan and Ye, Mao and Wang, Jingdong and Zhu, Siyu}, title = {OpenHumanVid: A Large-Scale High-Quality Dataset for Enhancing Human-Centric Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7752-7762} }
Classifier-Free Guidance Inside the Attraction Basin May Cause Memorization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jain_2025_CVPR, author = {Jain, Anubhav and Kobayashi, Yuya and Shibuya, Takashi and Takida, Yuhta and Memon, Nasir and Togelius, Julian and Mitsufuji, Yuki}, title = {Classifier-Free Guidance Inside the Attraction Basin May Cause Memorization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12871-12879} }
Track Any Anomalous Object:A Granular Video Anomaly Detection Pipeline-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yuzhi and Li, Chenxin and Zhang, Haitao and Lin, Zixu and Lin, Yunlong and Liu, Hengyu and Li, Wuyang and Liu, Xinyu and Gao, Jiechao and Huang, Yue and Ding, Xinghao and Yuan, Yixuan}, title = {Track Any Anomalous Object:A Granular Video Anomaly Detection Pipeline}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8689-8699} }
RANGE: Retrieval Augmented Neural Fields for Multi-Resolution Geo-Embeddings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dhakal_2025_CVPR, author = {Dhakal, Aayush and Sastry, Srikumar and Khanal, Subash and Ahmad, Adeel and Xing, Eric and Jacobs, Nathan}, title = {RANGE: Retrieval Augmented Neural Fields for Multi-Resolution Geo-Embeddings}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24680-24689} }
Magma: A Foundation Model for Multimodal AI Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jianwei and Tan, Reuben and Wu, Qianhui and Zheng, Ruijie and Peng, Baolin and Liang, Yongyuan and Gu, Yu and Cai, Mu and Ye, Seonghyeon and Jang, Joel and Deng, Yuquan and Gao, Jianfeng}, title = {Magma: A Foundation Model for Multimodal AI Agents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14203-14214} }
SimMotionEdit: Text-Based Human Motion Editing with Motion Similarity Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zhengyuan and Cheng, Kai and Ghosh, Anindita and Bhattacharya, Uttaran and Gui, Liangyan and Bera, Aniket}, title = {SimMotionEdit: Text-Based Human Motion Editing with Motion Similarity Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27827-27837} }
Object-aware Sound Source Localization via Audio-Visual Scene Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Um_2025_CVPR, author = {Um, Sung Jin and Kim, Dongjin and Lee, Sangmin and Kim, Jung Uk}, title = {Object-aware Sound Source Localization via Audio-Visual Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8342-8351} }
Volume Tells: Dual Cycle-Consistent Diffusion for 3D Fluorescence Microscopy De-noising and Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zelin and Wang, Chenwei and Huang, Zhaoke and Ma, Yiming and Zhao, Cunming and Zhao, Zhongying and Yan, Hong}, title = {Volume Tells: Dual Cycle-Consistent Diffusion for 3D Fluorescence Microscopy De-noising and Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16091-16100} }
SerialGen: Personalized Image Generation by First Standardization Then Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Cong and Zou, Han and Yu, Ruiqi and Zhang, Yan and Zhan, Zhenpeng}, title = {SerialGen: Personalized Image Generation by First Standardization Then Personalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2847-2856} }
From Head to Tail: Efficient Black-box Model Inversion Attack via Long-tailed Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Ziang and Zhang, Hongguang and Wang, Juan and Chen, Meihui and Hu, Hongxin and Yi, Wenzhe and Xu, Xiaoyang and Yang, Mengda and Ma, Chenjun}, title = {From Head to Tail: Efficient Black-box Model Inversion Attack via Long-tailed Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29288-29298} }
Augmented Deep Contexts for Spatially Embedded Video Coding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bian_2025_CVPR, author = {Bian, Yifan and Tang, Chuanbo and Li, Li and Liu, Dong}, title = {Augmented Deep Contexts for Spatially Embedded Video Coding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2094-2104} }
SIDA: Social Media Image Deepfake Detection, Localization and Explanation with Large Multimodal Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zhenglin and Hu, Jinwei and Li, Xiangtai and He, Yiwei and Zhao, Xingyu and Peng, Bei and Wu, Baoyuan and Huang, Xiaowei and Cheng, Guangliang}, title = {SIDA: Social Media Image Deepfake Detection, Localization and Explanation with Large Multimodal Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28831-28841} }
Matrix3D: Large Photogrammetry Model All-in-One-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Yuanxun and Zhang, Jingyang and Fang, Tian and Nahmias, Jean-Daniel and Tsin, Yanghai and Quan, Long and Cao, Xun and Yao, Yao and Li, Shiwei}, title = {Matrix3D: Large Photogrammetry Model All-in-One}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11250-11263} }
Object-Centric Prompt-Driven Vision-Language-Action Model for Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xiaoqi and Xu, Jingyun and Zhang, Mingxu and Liu, Jiaming and Shen, Yan and Ponomarenko, Iaroslav and Xu, Jiahui and Heng, Liang and Huang, Siyuan and Zhang, Shanghang and Dong, Hao}, title = {Object-Centric Prompt-Driven Vision-Language-Action Model for Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27638-27648} }
Proximal Algorithm Unrolling: Flexible and Efficient Reconstruction Networks for Single-Pixel Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ping and Wang, Lishun and Qu, Gang and Wang, Xiaodong and Zhang, Yulun and Yuan, Xin}, title = {Proximal Algorithm Unrolling: Flexible and Efficient Reconstruction Networks for Single-Pixel Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {411-421} }
3DEnhancer: Consistent Multi-View Diffusion for 3D Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Yihang and Zhou, Shangchen and Lan, Yushi and Pan, Xingang and Loy, Chen Change}, title = {3DEnhancer: Consistent Multi-View Diffusion for 3D Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16430-16440} }
Investigating the Role of Weight Decay in Enhancing Nonconvex SGD-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Tao and Huang, Yuhao and Shen, Li and Xu, Kele and Wang, Bao}, title = {Investigating the Role of Weight Decay in Enhancing Nonconvex SGD}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15287-15296} }
MarkushGrapher: Joint Visual and Textual Recognition of Markush Structures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Morin_2025_CVPR, author = {Morin, Lucas and Weber, Valery and Nassar, Ahmed and Meijer, Gerhard Ingmar and Van Gool, Luc and Li, Yawei and Staar, Peter}, title = {MarkushGrapher: Joint Visual and Textual Recognition of Markush Structures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14505-14515} }
Depth Any Camera: Zero-Shot Metric Depth Estimation from Any Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Yuliang and Garg, Sparsh and Miangoleh, S. Mahdi H. and Huang, Xinyu and Ren, Liu}, title = {Depth Any Camera: Zero-Shot Metric Depth Estimation from Any Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26996-27006} }
Image Quality Assessment: From Human to Machine Preference-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Chunyi and Tian, Yuan and Ling, Xiaoyue and Zhang, Zicheng and Duan, Haodong and Wu, Haoning and Jia, Ziheng and Liu, Xiaohong and Min, Xiongkuo and Lu, Guo and Lin, Weisi and Zhai, Guangtao}, title = {Image Quality Assessment: From Human to Machine Preference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7570-7581} }
ShotAdapter: Text-to-Multi-Shot Video Generation with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kara_2025_CVPR, author = {Kara, Ozgur and Singh, Krishna Kumar and Liu, Feng and Ceylan, Duygu and Rehg, James M. and Hinz, Tobias}, title = {ShotAdapter: Text-to-Multi-Shot Video Generation with Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28405-28415} }
Context-Aware Multimodal Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Roth_2025_CVPR, author = {Roth, Karsten and Akata, Zeynep and Damen, Dima and Balazevic, Ivana and Henaff, Olivier J.}, title = {Context-Aware Multimodal Pretraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4267-4279} }
Sound Bridge: Associating Egocentric and Exocentric Videos via Audio Cues-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Sihong and Wu, Jiaxin and Wei, Xiaoyong and Cai, Yi and Jiang, Dongmei and Wang, Yaowei}, title = {Sound Bridge: Associating Egocentric and Exocentric Videos via Audio Cues}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28942-28951} }
Detecting Backdoor Attacks in Federated Learning via Direction Alignment Inspection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Jiahao and Zhang, Zikai and Hu, Rui}, title = {Detecting Backdoor Attacks in Federated Learning via Direction Alignment Inspection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20654-20664} }
OmniDocBench: Benchmarking Diverse PDF Document Parsing with Comprehensive Annotations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ouyang_2025_CVPR, author = {Ouyang, Linke and Qu, Yuan and Zhou, Hongbin and Zhu, Jiawei and Zhang, Rui and Lin, Qunshu and Wang, Bin and Zhao, Zhiyuan and Jiang, Man and Zhao, Xiaomeng and Shi, Jin and Wu, Fan and Chu, Pei and Liu, Minghao and Li, Zhenxiang and Xu, Chao and Zhang, Bo and Shi, Botian and Tu, Zhongying and He, Conghui}, title = {OmniDocBench: Benchmarking Diverse PDF Document Parsing with Comprehensive Annotations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24838-24848} }
LayoutVLM: Differentiable Optimization of 3D Layout via Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Fan-Yun and Liu, Weiyu and Gu, Siyi and Lim, Dylan and Bhat, Goutam and Tombari, Federico and Li, Manling and Haber, Nick and Wu, Jiajun}, title = {LayoutVLM: Differentiable Optimization of 3D Layout via Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29469-29478} }
Point Clouds Meets Physics: Dynamic Acoustic Field Fitting Network for Point Cloud Understanding-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Changshuo and He, Shuting and Fang, Xiang and Han, Jiawei and Liu, Zhonghang and Ning, Xin and Li, Weijun and Tiwari, Prayag}, title = {Point Clouds Meets Physics: Dynamic Acoustic Field Fitting Network for Point Cloud Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22182-22192} }
Faster Parameter-Efficient Tuning with Token Redundancy Reduction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Kwonyoung and Park, Jungin and Kim, Jin and Kwon, Hyeongjun and Sohn, Kwanghoon}, title = {Faster Parameter-Efficient Tuning with Token Redundancy Reduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30189-30198} }
BlockDance: Reuse Structurally Similar Spatio-Temporal Features to Accelerate Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Hui and Gao, Tingwei and Shao, Jie and Wu, Zuxuan}, title = {BlockDance: Reuse Structurally Similar Spatio-Temporal Features to Accelerate Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12891-12900} }
Panorama Generation From NFoV Image Done Right-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Dian and Zhang, Cheng and Wu, Xiao-Ming and Li, Cao and Lv, Chengfei and Hu, Jian-Fang and Zheng, Wei-Shi}, title = {Panorama Generation From NFoV Image Done Right}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21610-21619} }
Mamba-Adaptor: State Space Model Adaptor for Visual Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Fei and Nie, Jiahao and Tang, Yujin and Zhang, Wenkang and Zhao, Hongshen}, title = {Mamba-Adaptor: State Space Model Adaptor for Visual Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20124-20134} }
Robust Message Embedding via Attention Flow-Based Steganography-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Huayuan and Zhang, Shenzhuo and Jiang, Shiqi and Liao, Jing and Gu, Shuhang and Zheng, Dejun and Wang, Changbo and Li, Chenhui}, title = {Robust Message Embedding via Attention Flow-Based Steganography}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12840-12849} }
Task-driven Image Fusion with Learnable Fusion Loss-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2025_CVPR, author = {Bai, Haowen and Zhang, Jiangshe and Zhao, Zixiang and Wu, Yichen and Deng, Lilun and Cui, Yukun and Feng, Tao and Xu, Shuang}, title = {Task-driven Image Fusion with Learnable Fusion Loss}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7457-7468} }
Compositional Targeted Multi-Label Universal Perturbations-
[pdf]
[supp]
[bibtex]@InProceedings{Mahmood_2025_CVPR, author = {Mahmood, Hassan and Elhamifar, Ehsan}, title = {Compositional Targeted Multi-Label Universal Perturbations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20580-20591} }
PatchGuard: Adversarially Robust Anomaly Detection and Localization through Vision Transformers and Pseudo Anomalies-
[pdf]
[supp]
[bibtex]@InProceedings{Nafez_2025_CVPR, author = {Nafez, Mojtaba and Koochakian, Amirhossein and Maleki, Arad and Habibi, Jafar and Rohban, Mohammad Hossein}, title = {PatchGuard: Adversarially Robust Anomaly Detection and Localization through Vision Transformers and Pseudo Anomalies}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20383-20394} }
Sparse Point Cloud Patches Rendering via Splitting 2D Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Changfeng and Bi, Ran and Guo, Jie and Wang, Chongjun and Guo, Yanwen}, title = {Sparse Point Cloud Patches Rendering via Splitting 2D Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27285-27294} }
Distilling Monocular Foundation Model for Fine-grained Depth Completion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Yingping and Hu, Yutao and Shao, Wenqi and Fu, Ying}, title = {Distilling Monocular Foundation Model for Fine-grained Depth Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22254-22265} }
LamRA: Large Multimodal Model as Your Advanced Retrieval Assistant-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yikun and Zhang, Yajie and Cai, Jiayin and Jiang, Xiaolong and Hu, Yao and Yao, Jiangchao and Wang, Yanfeng and Xie, Weidi}, title = {LamRA: Large Multimodal Model as Your Advanced Retrieval Assistant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4015-4025} }
AniGrad: Anisotropic Gradient-Adaptive Sampling for 3D Reconstruction From Monocular Video-
[pdf]
[supp]
[bibtex]@InProceedings{Stier_2025_CVPR, author = {Stier, Noah and Rich, Alex and Sen, Pradeep and H\"ollerer, Tobias}, title = {AniGrad: Anisotropic Gradient-Adaptive Sampling for 3D Reconstruction From Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21814-21823} }
Neural Video Compression with Context Modulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Chuanbo and Li, Zhuoyuan and Bian, Yifan and Li, Li and Liu, Dong}, title = {Neural Video Compression with Context Modulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12553-12563} }
Less Attention is More: Prompt Transformer for Generalized Category Discovery-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Wei and Zhang, Baopeng and Teng, Zhu and Luo, Wenxin and Zou, Junnan and Fan, Jianping}, title = {Less Attention is More: Prompt Transformer for Generalized Category Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30322-30331} }
On-Device Self-Supervised Learning of Low-Latency Monocular Depth from Only Events-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hagenaars_2025_CVPR, author = {Hagenaars, Jesse J. and Wu, Yilun and Paredes-Valles, Federico and Stroobants, Stein and de Croon, Guido C.H.E.}, title = {On-Device Self-Supervised Learning of Low-Latency Monocular Depth from Only Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17114-17123} }
CoMM: A Coherent Interleaved Image-Text Dataset for Multimodal Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Wei and Li, Lin and Yang, Yongqi and Wen, Bin and Yang, Fan and Gao, Tingting and Wu, Yu and Chen, Long}, title = {CoMM: A Coherent Interleaved Image-Text Dataset for Multimodal Understanding and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8073-8082} }
MoGe: Unlocking Accurate Monocular Geometry Estimation for Open-Domain Images with Optimal Training Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ruicheng and Xu, Sicheng and Dai, Cassie and Xiang, Jianfeng and Deng, Yu and Tong, Xin and Yang, Jiaolong}, title = {MoGe: Unlocking Accurate Monocular Geometry Estimation for Open-Domain Images with Optimal Training Supervision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5261-5271} }
Beyond Background Shift: Rethinking Instance Replay in Continual Semantic Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Hongmei and Feng, Tingliang and Lyu, Fan and Shang, Fanhua and Liu, Hongying and Feng, Wei and Wan, Liang}, title = {Beyond Background Shift: Rethinking Instance Replay in Continual Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9839-9848} }
ScaleLSD: Scalable Deep Line Segment Detection Streamlined-
[pdf]
[supp]
[bibtex]@InProceedings{Ke_2025_CVPR, author = {Ke, Zeran and Tan, Bin and Zheng, Xianwei and Shen, Yujun and Wu, Tianfu and Xue, Nan}, title = {ScaleLSD: Scalable Deep Line Segment Detection Streamlined}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6327-6336} }
AToM: Aligning Text-to-Motion Model at Event-Level with GPT-4Vision Reward-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Haonan and Wu, Xiangzuo and Liao, Huan and Xu, Zunnan and Hu, Zhongyuan and Li, Ronghui and Zhang, Yachao and Li, Xiu}, title = {AToM: Aligning Text-to-Motion Model at Event-Level with GPT-4Vision Reward}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22746-22755} }
Revisiting MAE Pre-training for 3D Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wald_2025_CVPR, author = {Wald, Tassilo and Ulrich, Constantin and Lukyanenko, Stanislav and Goncharov, Andrei and Paderno, Alberto and Miller, Maximilian and Maerkisch, Leander and Jaeger, Paul and Maier-Hein, Klaus}, title = {Revisiting MAE Pre-training for 3D Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5186-5196} }
Learning with Noisy Triplet Correspondence for Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Shuxian and He, Changhao and Liu, Xiting and Zhou, Joey Tianyi and Peng, Xi and Hu, Peng}, title = {Learning with Noisy Triplet Correspondence for Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19628-19637} }
DoF-Gaussian: Controllable Depth-of-Field for 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Liao and Liu, Tianqi and Sun, Huiqiang and Li, Jiaqi and Cao, Zhiguo and Li, Wei and Loy, Chen Change}, title = {DoF-Gaussian: Controllable Depth-of-Field for 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26462-26471} }
Parallelized Autoregressive Visual Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuqing and Ren, Shuhuai and Lin, Zhijie and Han, Yujin and Guo, Haoyuan and Yang, Zhenheng and Zou, Difan and Feng, Jiashi and Liu, Xihui}, title = {Parallelized Autoregressive Visual Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12955-12965} }
CGMatch: A Different Perspective of Semi-supervised Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Bo and Lu, Jueqing and Tian, Yuan and Zhao, Haifeng and Chang, Yi and Du, Lan}, title = {CGMatch: A Different Perspective of Semi-supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15381-15391} }
ChatHuman: Chatting about 3D Humans with Tools-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Jing and Feng, Yao and Liu, Weiyang and Black, Michael J.}, title = {ChatHuman: Chatting about 3D Humans with Tools}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8150-8161} }
FIction: 4D Future Interaction Prediction from Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ashutosh_2025_CVPR, author = {Ashutosh, Kumar and Pavlakos, Georgios and Grauman, Kristen}, title = {FIction: 4D Future Interaction Prediction from Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17613-17625} }
D^2iT: Dynamic Diffusion Transformer for Accurate Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Jia_2025_CVPR, author = {Jia, Weinan and Huang, Mengqi and Chen, Nan and Zhang, Lei and Mao, Zhendong}, title = {D{\textasciicircum}2iT: Dynamic Diffusion Transformer for Accurate Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12860-12870} }
Scalable Autoregressive Monocular Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jinhong and Liu, Jian and Tang, Dongqi and Wang, Weiqiang and Li, Wentong and Chen, Danny and Chen, Jintai and Wu, Jian}, title = {Scalable Autoregressive Monocular Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6262-6272} }
Reconciling Stochastic and Deterministic Strategies for Zero-shot Image Restoration using Diffusion Model in Dual-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Chong and Guo, Lanqing and Fu, Zixuan and Yang, Siyuan and Cheng, Hao and Kot, Alex C. and Wen, Bihan}, title = {Reconciling Stochastic and Deterministic Strategies for Zero-shot Image Restoration using Diffusion Model in Dual}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23207-23216} }
Hierarchical Flow Diffusion for Efficient Frame Interpolation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hai_2025_CVPR, author = {Hai, Yang and Wang, Guo and Su, Tan and Jiang, Wenjie and Hu, Yinlin}, title = {Hierarchical Flow Diffusion for Efficient Frame Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22943-22952} }
Recurrence-Enhanced Vision-and-Language Transformers for Robust Multimodal Document Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Caffagni_2025_CVPR, author = {Caffagni, Davide and Sarto, Sara and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Recurrence-Enhanced Vision-and-Language Transformers for Robust Multimodal Document Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9286-9295} }
Camouflage Anything: Learning to Hide using Controlled Out-painting and Representation Engineering-
[pdf]
[supp]
[bibtex]@InProceedings{Das_2025_CVPR, author = {Das, Biplab and Gopalakrishnan, Viswanath}, title = {Camouflage Anything: Learning to Hide using Controlled Out-painting and Representation Engineering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3603-3613} }
Test-Time Fine-Tuning of Image Compression Models for Multi-Task Adaptability-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Unki and Jeong, Seongmoon and Jang, Youngchan and Park, Gyeong-Moon and Ko, Jong Hwan}, title = {Test-Time Fine-Tuning of Image Compression Models for Multi-Task Adaptability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4430-4440} }
BASKET: A Large-Scale Video Dataset for Fine-Grained Skill Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2025_CVPR, author = {Pan, Yulu and Zhang, Ce and Bertasius, Gedas}, title = {BASKET: A Large-Scale Video Dataset for Fine-Grained Skill Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28952-28962} }
AniDoc: Animation Creation Made Easier-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meng_2025_CVPR, author = {Meng, Yihao and Ouyang, Hao and Wang, Hanlin and Wang, Qiuyu and Wang, Wen and Cheng, Ka Leong and Liu, Zhiheng and Shen, Yujun and Qu, Huamin}, title = {AniDoc: Animation Creation Made Easier}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18187-18197} }
DynPose: Largely Improving the Efficiency of Human Pose Estimation by a Simple Dynamic Framework-
[pdf]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yalong and Zhao, Lin and Gong, Chen and Li, Guangyu and Wang, Di and Wang, Nannan}, title = {DynPose: Largely Improving the Efficiency of Human Pose Estimation by a Simple Dynamic Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1160-1169} }
Arbitrary-steps Image Super-resolution via Diffusion Inversion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2025_CVPR, author = {Yue, Zongsheng and Liao, Kang and Loy, Chen Change}, title = {Arbitrary-steps Image Super-resolution via Diffusion Inversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23153-23163} }
LiSu: A Dataset and Method for LiDAR Surface Normal Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Malic_2025_CVPR, author = {Mali\'c, Du\v{s}an and Fruhwirth-Reisinger, Christian and Schulter, Samuel and Possegger, Horst}, title = {LiSu: A Dataset and Method for LiDAR Surface Normal Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17039-17049} }
Dynamic Neural Surfaces for Elastic 4D Shape Representation and Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nizamani_2025_CVPR, author = {Nizamani, Awais and Laga, Hamid and Wang, Guanjin and Boussaid, Farid and Bennamoun, Mohammed and Srivastava, Anuj}, title = {Dynamic Neural Surfaces for Elastic 4D Shape Representation and Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21783-21792} }
Spk2SRImgNet: Super-Resolve Dynamic Scene from Spike Stream via Motion Aligned Collaborative Filtering-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuanlin and Zhang, Yiyang and Xiong, Ruiqin and Zhao, Jing and Zhang, Jian and Fan, Xiaopeng and Huang, Tiejun}, title = {Spk2SRImgNet: Super-Resolve Dynamic Scene from Spike Stream via Motion Aligned Collaborative Filtering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11416-11426} }
ComfyBench: Benchmarking LLM-based Agents in ComfyUI for Autonomously Designing Collaborative AI Systems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2025_CVPR, author = {Xue, Xiangyuan and Lu, Zeyu and Huang, Di and Wang, Zidong and Ouyang, Wanli and Bai, Lei}, title = {ComfyBench: Benchmarking LLM-based Agents in ComfyUI for Autonomously Designing Collaborative AI Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24614-24624} }
VideoGLaMM : A Large Multimodal Model for Pixel-Level Visual Grounding in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Munasinghe_2025_CVPR, author = {Munasinghe, Shehan and Gani, Hanan and Zhu, Wenqi and Cao, Jiale and Xing, Eric and Khan, Fahad Shahbaz and Khan, Salman}, title = {VideoGLaMM : A Large Multimodal Model for Pixel-Level Visual Grounding in Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19036-19046} }
Incomplete Multi-View Multi-label Learning via Disentangled Representation and Label Semantic Embedding-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Xu and Yin, Jun and Wen, Jie}, title = {Incomplete Multi-View Multi-label Learning via Disentangled Representation and Label Semantic Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30722-30731} }
AutoURDF: Unsupervised Robot Modeling from Point Cloud Frames Using Cluster Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Jiong and Zhang, Lechen and Lee, Kwansoo and Ning, Jialong and Goldfeder, Judah and Lipson, Hod}, title = {AutoURDF: Unsupervised Robot Modeling from Point Cloud Frames Using Cluster Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27628-27637} }
ZeroGrasp: Zero-Shot Shape Reconstruction Enabled Robotic Grasping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Iwase_2025_CVPR, author = {Iwase, Shun and Irshad, Muhammad Zubair and Liu, Katherine and Guizilini, Vitor and Lee, Robert and Ikeda, Takuya and Amma, Ayako and Nishiwaki, Koichi and Kitani, Kris and Ambrus, Rares and Zakharov, Sergey}, title = {ZeroGrasp: Zero-Shot Shape Reconstruction Enabled Robotic Grasping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17405-17415} }
Golden Cudgel Network for Real-Time Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Guoyu and Wang, Yuan and Shi, Daming and Wang, Yanzhong}, title = {Golden Cudgel Network for Real-Time Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25367-25376} }
PDFactor: Learning Tri-Perspective View Policy Diffusion Field for Multi-Task Robotic Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Jingyi and Wang, Le and Zhou, Sanping and Wang, Sen and Li, Jiayi and Sun, Haowen and Tang, Wei}, title = {PDFactor: Learning Tri-Perspective View Policy Diffusion Field for Multi-Task Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15757-15767} }
VideoTree: Adaptive Tree-based Video Representation for LLM Reasoning on Long Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ziyang and Yu, Shoubin and Stengel-Eskin, Elias and Yoon, Jaehong and Cheng, Feng and Bertasius, Gedas and Bansal, Mohit}, title = {VideoTree: Adaptive Tree-based Video Representation for LLM Reasoning on Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3272-3283} }
Multi-modal Contrastive Learning with Negative Sampling Calibration for Phenotypic Drug Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Rao_2025_CVPR, author = {Rao, Jiahua and Lin, Hanjing and Chen, Leyu and Xie, Jiancong and Zheng, Shuangjia and Yang, Yuedong}, title = {Multi-modal Contrastive Learning with Negative Sampling Calibration for Phenotypic Drug Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30752-30762} }
R-TPT: Improving Adversarial Robustness of Vision-Language Models through Test-Time Prompt Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Sheng_2025_CVPR, author = {Sheng, Lijun and Liang, Jian and Wang, Zilei and He, Ran}, title = {R-TPT: Improving Adversarial Robustness of Vision-Language Models through Test-Time Prompt Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29958-29967} }
Distinguish Then Exploit: Source-free Open Set Domain Adaptation via Weight Barcode Estimation and Sparse Label Assignment-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Weiming and Dan, Jun and Wang, Fan and Liao, Xinting and Dong, Junhao and Yu, Hua and Dong, Shunjie and Qi, Lianyong}, title = {Distinguish Then Exploit: Source-free Open Set Domain Adaptation via Weight Barcode Estimation and Sparse Label Assignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4927-4938} }
Multi-Sensor Object Anomaly Detection: Unifying Appearance, Geometry, and Internal Properties-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Wenqiao and Zheng, Bozhong and Xu, Xiaohao and Gan, Jinye and Lu, Fading and Li, Xiang and Ni, Na and Tian, Zheng and Huang, Xiaonan and Gao, Shenghua and Wu, Yingna}, title = {Multi-Sensor Object Anomaly Detection: Unifying Appearance, Geometry, and Internal Properties}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9984-9993} }
SplatFlow: Multi-View Rectified Flow Model for 3D Gaussian Splatting Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Go_2025_CVPR, author = {Go, Hyojun and Park, Byeongjun and Jang, Jiho and Kim, Jin-Young and Kwon, Soonwoo and Kim, Changick}, title = {SplatFlow: Multi-View Rectified Flow Model for 3D Gaussian Splatting Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21524-21536} }
Fancy123: One Image to High-Quality 3D Mesh Generation via Plug-and-Play Deformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Qiao and Li, Xianzhi and Tang, Yuan and Han, Xu and Hu, Long and Hao, Yixue and Chen, Min}, title = {Fancy123: One Image to High-Quality 3D Mesh Generation via Plug-and-Play Deformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {595-604} }
Dense Dispersed Structured Light for Hyperspectral 3D Imaging of Dynamic Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shin_2025_CVPR, author = {Shin, Suhyun and Yoon, Seungwoo and Maeda, Ryota and Baek, Seung-Hwan}, title = {Dense Dispersed Structured Light for Hyperspectral 3D Imaging of Dynamic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16589-16598} }
PlanarSplatting: Accurate Planar Surface Reconstruction in 3 Minutes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2025_CVPR, author = {Tan, Bin and Yu, Rui and Shen, Yujun and Xue, Nan}, title = {PlanarSplatting: Accurate Planar Surface Reconstruction in 3 Minutes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1190-1199} }
Omni-ID: Holistic Identity Representation Designed for Generative Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2025_CVPR, author = {Qian, Guocheng and Wang, Kuan-Chieh and Patashnik, Or and Heravi, Negin and Ostashev, Daniil and Tulyakov, Sergey and Cohen-Or, Daniel and Aberman, Kfir}, title = {Omni-ID: Holistic Identity Representation Designed for Generative Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8786-8795} }
MM-OR: A Large Multimodal Operating Room Dataset for Semantic Understanding of High-Intensity Surgical Environments-
[pdf]
[supp]
[bibtex]@InProceedings{Ozsoy_2025_CVPR, author = {\"Ozsoy, Ege and Pellegrini, Chantal and Czempiel, Tobias and Tristram, Felix and Yuan, Kun and Bani-Harouni, David and Eck, Ulrich and Busam, Benjamin and Keicher, Matthias and Navab, Nassir}, title = {MM-OR: A Large Multimodal Operating Room Dataset for Semantic Understanding of High-Intensity Surgical Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19378-19389} }
MIRE: Matched Implicit Neural Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Jayasundara_2025_CVPR, author = {Jayasundara, Dhananjaya and Zhao, Heng and Labate, Demetrio and Patel, Vishal M.}, title = {MIRE: Matched Implicit Neural Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8279-8288} }
AeSPa : Attention-guided Self-supervised Parallel Imaging for MRI Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Joo_2025_CVPR, author = {Joo, Jinho and Kim, Hyeseong and Won, Hyeyeon and Lee, Deukhee and Eo, Taejoon and Hwang, Dosik}, title = {AeSPa : Attention-guided Self-supervised Parallel Imaging for MRI Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5217-5226} }
Boltzmann Attention Sampling for Image Analysis with Small Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Theodore and Kiblawi, Sid and Usuyama, Naoto and Lee, Ho Hin and Preston, Sam and Poon, Hoifung and Wei, Mu}, title = {Boltzmann Attention Sampling for Image Analysis with Small Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25950-25959} }
Dora: Sampling and Benchmarking for 3D Shape Variational Auto-Encoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Rui and Zhang, Jianfeng and Liang, Yixun and Luo, Guan and Li, Weiyu and Liu, Jiarui and Li, Xiu and Long, Xiaoxiao and Feng, Jiashi and Tan, Ping}, title = {Dora: Sampling and Benchmarking for 3D Shape Variational Auto-Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16251-16261} }
Generalized Recorrupted-to-Recorrupted: Self-Supervised Learning Beyond Gaussian Noise-
[pdf]
[supp]
[bibtex]@InProceedings{Monroy_2025_CVPR, author = {Monroy, Brayan and Bacca, Jorge and Tachella, Juli\'an}, title = {Generalized Recorrupted-to-Recorrupted: Self-Supervised Learning Beyond Gaussian Noise}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28155-28164} }
Once-Tuning-Multiple-Variants: Tuning Once and Expanded as Multiple Vision-Language Model Variants-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Chong and Chen, Tao and Gan, Zhongxue}, title = {Once-Tuning-Multiple-Variants: Tuning Once and Expanded as Multiple Vision-Language Model Variants}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14712-14722} }
Dynamic Motion Blending for Versatile Motion Editing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Nan and Li, Hongjie and Yuan, Ziye and He, Zimo and Chen, Yixin and Liu, Tengyu and Zhu, Yixin and Huang, Siyuan}, title = {Dynamic Motion Blending for Versatile Motion Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22735-22745} }
StdGEN: Semantic-Decomposed 3D Character Generation from Single Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Yuze and Zhou, Yanning and Zhao, Wang and Wu, Zhongkai and Xiao, Kaiwen and Yang, Wei and Liu, Yong-Jin and Han, Xiao}, title = {StdGEN: Semantic-Decomposed 3D Character Generation from Single Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26345-26355} }
Reconstructing Animals and the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kulits_2025_CVPR, author = {Kulits, Peter and Black, Michael J. and Zuffi, Silvia}, title = {Reconstructing Animals and the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16565-16577} }
RobSense: A Robust Multi-modal Foundation Model for Remote Sensing with Static, Temporal, and Incomplete Data Adaptability-
[pdf]
[bibtex]@InProceedings{Do_2025_CVPR, author = {Do, Minh Kha and Han, Kang and Lai, Phu and Phan, Khoa T. and Xiang, Wei}, title = {RobSense: A Robust Multi-modal Foundation Model for Remote Sensing with Static, Temporal, and Incomplete Data Adaptability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7427-7436} }
Spatiotemporal Decoupling for Efficient Vision-Based Occupancy Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Jingyi and Chen, Xieyuanli and Ma, Junyi and Huang, Jiawei and Xu, Jintao and Wang, Yue and Pei, Ling}, title = {Spatiotemporal Decoupling for Efficient Vision-Based Occupancy Forecasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22338-22347} }
DiffusionDrive: Truncated Diffusion Model for End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_CVPR, author = {Liao, Bencheng and Chen, Shaoyu and Yin, Haoran and Jiang, Bo and Wang, Cheng and Yan, Sixu and Zhang, Xinbang and Li, Xiangyu and Zhang, Ying and Zhang, Qian and Wang, Xinggang}, title = {DiffusionDrive: Truncated Diffusion Model for End-to-End Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12037-12047} }
MAC-Ego3D: Multi-Agent Gaussian Consensus for Real-Time Collaborative Ego-Motion and Photorealistic 3D Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Xiaohao and Xue, Feng and Zhao, Shibo and Pan, Yike and Scherer, Sebastian and Huang, Xiaonan}, title = {MAC-Ego3D: Multi-Agent Gaussian Consensus for Real-Time Collaborative Ego-Motion and Photorealistic 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {854-863} }
FFR: Frequency Feature Rectification for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Ziqian and Zhao, Xinqiao and Wang, Xiaolei and Zhang, Quan and Xiao, Jimin}, title = {FFR: Frequency Feature Rectification for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30261-30270} }
DVHGNN: Multi-Scale Dilated Vision HGNN for Efficient Vision Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Caoshuo and Li, Tanzhe and Hu, Xiaobin and Luo, Donghao and Jin, Taisong}, title = {DVHGNN: Multi-Scale Dilated Vision HGNN for Efficient Vision Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20158-20168} }
Video-XL: Extra-Long Vision Language Model for Hour-Scale Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Shu_2025_CVPR, author = {Shu, Yan and Liu, Zheng and Zhang, Peitian and Qin, Minghao and Zhou, Junjie and Liang, Zhengyang and Huang, Tiejun and Zhao, Bo}, title = {Video-XL: Extra-Long Vision Language Model for Hour-Scale Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26160-26169} }
Reconstructing In-the-Wild Open-Vocabulary Human-Object Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2025_CVPR, author = {Wen, Boran and Huang, Dingbang and Zhang, Zichen and Zhou, Jiahong and Deng, Jianbin and Gong, Jingyu and Chen, Yulong and Ma, Lizhuang and Li, Yong-Lu}, title = {Reconstructing In-the-Wild Open-Vocabulary Human-Object Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17426-17436} }
GROVE: A Generalized Reward for Learning Open-Vocabulary Physical Skill-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2025_CVPR, author = {Cui, Jieming and Liu, Tengyu and Meng, Ziyu and Yu, Jiale and Song, Ran and Zhang, Wei and Zhu, Yixin and Huang, Siyuan}, title = {GROVE: A Generalized Reward for Learning Open-Vocabulary Physical Skill}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15781-15790} }
Sonata: Self-Supervised Learning of Reliable Point Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Xiaoyang and DeTone, Daniel and Frost, Duncan and Shen, Tianwei and Xie, Chris and Yang, Nan and Engel, Jakob and Newcombe, Richard and Zhao, Hengshuang and Straub, Julian}, title = {Sonata: Self-Supervised Learning of Reliable Point Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22193-22204} }
DriveGEN: Generalized and Robust 3D Detection in Driving via Controllable Text-to-Image Diffusion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Hongbin and Guo, Zilu and Zhang, Yifan and Niu, Shuaicheng and Li, Yafeng and Zhang, Ruimao and Cui, Shuguang and Li, Zhen}, title = {DriveGEN: Generalized and Robust 3D Detection in Driving via Controllable Text-to-Image Diffusion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27497-27507} }
GauSTAR: Gaussian Surface Tracking and Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Chengwei and Xue, Lixin and Zarate, Juan and Song, Jie}, title = {GauSTAR: Gaussian Surface Tracking and Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16543-16553} }
Training-free Dense-Aligned Diffusion Guidance for Modular Conditional Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zixuan and Peng, Duo and Chen, Feng and Yang, Yuwei and Lei, Yinjie}, title = {Training-free Dense-Aligned Diffusion Guidance for Modular Conditional Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13135-13145} }
DRiVE: Diffusion-based Rigging Empowers Generation of Versatile and Expressive Characters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Mingze and Chen, Junhao and Dong, Junting and Chen, Yurun and Jiang, Xinyu and Mao, Shiwei and Jiang, Puhua and Wang, Jingbo and Dai, Bo and Huang, Ruqi}, title = {DRiVE: Diffusion-based Rigging Empowers Generation of Versatile and Expressive Characters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21170-21180} }
Online Video Understanding: OVBench and VideoChat-Online-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zhenpeng and Li, Xinhao and Li, Jiaqi and Wang, Jing and Zeng, Xiangyu and Liang, Cheng and Wu, Tao and Chen, Xi and Li, Liang and Wang, Limin}, title = {Online Video Understanding: OVBench and VideoChat-Online}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3328-3338} }
TADFormer: Task-Adaptive Dynamic TransFormer for Efficient Multi-Task Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Baek_2025_CVPR, author = {Baek, Seungmin and Lee, Soyul and Jo, Hayeon and Choi, Hyesong and Min, Dongbo}, title = {TADFormer: Task-Adaptive Dynamic TransFormer for Efficient Multi-Task Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14858-14868} }
A Unified Approach to Interpreting Self-supervised Pre-training Methods for 3D Point Clouds via Interactions-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Qiang and Ruan, Jian and Wu, Fanghao and Chen, Yuchi and Wei, Zhihua and Shen, Wen}, title = {A Unified Approach to Interpreting Self-supervised Pre-training Methods for 3D Point Clouds via Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27315-27324} }
Enhancing SAM with Efficient Prompting and Preference Optimization for Semi-supervised Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Konwer_2025_CVPR, author = {Konwer, Aishik and Yang, Zhijian and Bas, Erhan and Xiao, Cao and Prasanna, Prateek and Bhatia, Parminder and Kass-Hout, Taha}, title = {Enhancing SAM with Efficient Prompting and Preference Optimization for Semi-supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20990-21000} }
CamPoint: Boosting Point Cloud Segmentation with Virtual Camera-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jianhui and Luo, Yizhi and Zhang, Zicheng and Nie, Xuecheng and Li, Bonan}, title = {CamPoint: Boosting Point Cloud Segmentation with Virtual Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11822-11832} }
LightLoc: Learning Outdoor LiDAR Localization at Light Speed-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Wen and Liu, Chen and Yu, Shangshu and Liu, Dunqiang and Zhou, Yin and Shen, Siqi and Wen, Chenglu and Wang, Cheng}, title = {LightLoc: Learning Outdoor LiDAR Localization at Light Speed}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6680-6689} }
MERGE: Multi-faceted Hierarchical Graph-based GNN for Gene Expression Prediction from Whole Slide Histopathology Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ganguly_2025_CVPR, author = {Ganguly, Aniruddha and Chatterjee, Debolina and Huang, Wentao and Zhang, Jie and Yurovsky, Alisa and Johnson, Travis Steele and Chen, Chao}, title = {MERGE: Multi-faceted Hierarchical Graph-based GNN for Gene Expression Prediction from Whole Slide Histopathology Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15611-15620} }
Accurate Differential Operators for Hybrid Neural Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chetan_2025_CVPR, author = {Chetan, Aditya and Yang, Guandao and Wang, Zichen and Marschner, Steve and Hariharan, Bharath}, title = {Accurate Differential Operators for Hybrid Neural Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {530-539} }
FeedEdit: Text-Based Image Editing with Dynamic Feedback Regulation-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Fengyi and Zhang, Lei and Huang, Mengqi and Mao, Zhendong}, title = {FeedEdit: Text-Based Image Editing with Dynamic Feedback Regulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2661-2670} }
Classifier-guided CLIP Distillation for Unsupervised Multi-label Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Dongseob and Shim, Hyunjung}, title = {Classifier-guided CLIP Distillation for Unsupervised Multi-label Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4661-4671} }
UMotion: Uncertainty-driven Human Motion Estimation from Inertial and Ultra-wideband Units-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Huakun and Ota, Hiroki and Wei, Xin and Hirao, Yutaro and Perusquia-Hernandez, Monica and Uchiyama, Hideaki and Kiyokawa, Kiyoshi}, title = {UMotion: Uncertainty-driven Human Motion Estimation from Inertial and Ultra-wideband Units}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7085-7094} }
STEREO: A Two-Stage Framework for Adversarially Robust Concept Erasing from Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Srivatsan_2025_CVPR, author = {Srivatsan, Koushik and Shamshad, Fahad and Naseer, Muzammal and Patel, Vishal M. and Nandakumar, Karthik}, title = {STEREO: A Two-Stage Framework for Adversarially Robust Concept Erasing from Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23765-23774} }
Scene Map-based Prompt Tuning for Navigation Instruction Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Sheng and Liu, Rui and Wang, Wenguan and Yang, Yi}, title = {Scene Map-based Prompt Tuning for Navigation Instruction Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6898-6908} }
GenVDM: Generating Vector Displacement Maps From a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Yuezhi and Chen, Qimin and Kim, Vladimir G. and Chaudhuri, Siddhartha and Huang, Qixing and Chen, Zhiqin}, title = {GenVDM: Generating Vector Displacement Maps From a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26618-26629} }
DropoutGS: Dropping Out Gaussians for Better Sparse-view Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yexing and Wang, Longguang and Chen, Minglin and Ao, Sheng and Li, Li and Guo, Yulan}, title = {DropoutGS: Dropping Out Gaussians for Better Sparse-view Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {701-710} }
Effective SAM Combination for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Minhyeok and Cho, Suhwan and Lee, Jungho and Yang, Sunghun and Choi, Heeseung and Kim, Ig-Jae and Lee, Sangyoun}, title = {Effective SAM Combination for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26081-26090} }
Bridging Modalities: Improving Universal Multimodal Retrieval by Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xin and Zhang, Yanzhao and Xie, Wen and Li, Mingxin and Dai, Ziqi and Long, Dingkun and Xie, Pengjun and Zhang, Meishan and Li, Wenjie and Zhang, Min}, title = {Bridging Modalities: Improving Universal Multimodal Retrieval by Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9274-9285} }
Enhancing Dataset Distillation via Non-Critical Region Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2025_CVPR, author = {Tran, Minh-Tuan and Le, Trung and Le, Xuan-May and Do, Thanh-Toan and Phung, Dinh}, title = {Enhancing Dataset Distillation via Non-Critical Region Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10015-10024} }
Towards Visual Discrimination and Reasoning of Real-World Physical Dynamics: Physics-Grounded Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Wenqiao and Gu, Yao and Chen, Xintao and Xu, Xiaohao and Hu, Ming and Huang, Xiaonan and Wu, Yingna}, title = {Towards Visual Discrimination and Reasoning of Real-World Physical Dynamics: Physics-Grounded Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30409-30419} }
SPA-VL: A Comprehensive Safety Preference Alignment Dataset for Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yongting and Chen, Lu and Zheng, Guodong and Gao, Yifeng and Zheng, Rui and Fu, Jinlan and Yin, Zhenfei and Jin, Senjie and Qiao, Yu and Huang, Xuanjing and Zhao, Feng and Gui, Tao and Shao, Jing}, title = {SPA-VL: A Comprehensive Safety Preference Alignment Dataset for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19867-19878} }
PUP 3D-GS: Principled Uncertainty Pruning for 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Hanson_2025_CVPR, author = {Hanson, Alex and Tu, Allen and Singla, Vasu and Jayawardhana, Mayuka and Zwicker, Matthias and Goldstein, Tom}, title = {PUP 3D-GS: Principled Uncertainty Pruning for 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5949-5958} }
UniAP: Unifying Inter- and Intra-Layer Automatic Parallelism by Mixed Integer Quadratic Programming-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Hao and Wu, Ke and Li, Jie and Li, Jun and Li, Wu-Jun}, title = {UniAP: Unifying Inter- and Intra-Layer Automatic Parallelism by Mixed Integer Quadratic Programming}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20947-20957} }
PTDiffusion: Free Lunch for Generating Optical Illusion Hidden Pictures with Phase-Transferred Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Xiang and Yang, Shuai and Liu, Jiaying}, title = {PTDiffusion: Free Lunch for Generating Optical Illusion Hidden Pictures with Phase-Transferred Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18240-18249} }
ScribbleLight: Single Image Indoor Relighting with Scribbles-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2025_CVPR, author = {Choi, Jun Myeong and Wang, Annie and Peers, Pieter and Bhattad, Anand and Sengupta, Roni}, title = {ScribbleLight: Single Image Indoor Relighting with Scribbles}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5720-5731} }
Preserving Clusters in Prompt Learning for Unsupervised Domain Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Vuong_2025_CVPR, author = {Vuong, Tung-Long and Phan, Hoang and Vo, Vy and Bui, Anh and Do, Thanh-Toan and Le, Trung and Phung, Dinh}, title = {Preserving Clusters in Prompt Learning for Unsupervised Domain Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19974-19984} }
InsightEdit: Towards Better Instruction Following for Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yingjing and Kong, Jie and Wang, Jiazhi and Pan, Xiao and Lin, Bo and Liu, Qiang}, title = {InsightEdit: Towards Better Instruction Following for Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2694-2703} }
Attend to Not Attended: Structure-then-Detail Token Merging for Post-training DiT Acceleration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Haipeng and Tang, Sheng and Cao, Juan and Zhang, Enshuo and Tang, Fan and Lee, Tong-Yee}, title = {Attend to Not Attended: Structure-then-Detail Token Merging for Post-training DiT Acceleration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18083-18092} }
Turbo3D: Ultra-fast Text-to-3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Hanzhe and Yin, Tianwei and Luan, Fujun and Hu, Yiwei and Tan, Hao and Xu, Zexiang and Bi, Sai and Tulsiani, Shubham and Zhang, Kai}, title = {Turbo3D: Ultra-fast Text-to-3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23668-23678} }
SUM Parts: Benchmarking Part-Level Semantic Segmentation of Urban Meshes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Weixiao and Nan, Liangliang and Ledoux, Hugo}, title = {SUM Parts: Benchmarking Part-Level Semantic Segmentation of Urban Meshes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24474-24484} }
One-for-More: Continual Diffusion Model for Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xiaofan and Tan, Xin and Chen, Zhuo and Zhang, Zhizhong and Zhang, Ruixin and Guo, Rizen and Jiang, Guanna and Chen, Yulong and Qu, Yanyun and Ma, Lizhuang and Xie, Yuan}, title = {One-for-More: Continual Diffusion Model for Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4766-4775} }
MODA: Motion-Drift Augmentation for Inertial Human Motion Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Yinghao and Guo, Shihui and Qin, Yipeng}, title = {MODA: Motion-Drift Augmentation for Inertial Human Motion Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27771-27781} }
Higher-Order Ratio Cycles for Fast and Globally Optimal Shape Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Roetzer_2025_CVPR, author = {Roetzer, Paul and Ehm, Viktoria and Cremers, Daniel and L\"ahner, Zorah and Bernard, Florian}, title = {Higher-Order Ratio Cycles for Fast and Globally Optimal Shape Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21793-21803} }
Omni-RGPT: Unifying Image and Video Region-level Understanding via Token Marks-
[pdf]
[supp]
[bibtex]@InProceedings{Heo_2025_CVPR, author = {Heo, Miran and Chen, Min-Hung and Huang, De-An and Liu, Sifei and Radhakrishnan, Subhashree and Kim, Seon Joo and Wang, Yu-Chiang Frank and Hachiuma, Ryo}, title = {Omni-RGPT: Unifying Image and Video Region-level Understanding via Token Marks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3919-3930} }
Hyperdimensional Uncertainty Quantification for Multimodal Uncertainty Fusion in Autonomous Vehicles Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Luke and Wang, Junyao and Mortlock, Trier and Khargonekar, Pramod and Al Faruque, Mohammad Abdullah}, title = {Hyperdimensional Uncertainty Quantification for Multimodal Uncertainty Fusion in Autonomous Vehicles Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22306-22316} }
EDM: Equirectangular Projection-Oriented Dense Kernelized Feature Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2025_CVPR, author = {Jung, Dongki and Choi, Jaehoon and Lee, Yonghan and Jeong, Somi and Lee, Taejae and Manocha, Dinesh and Yeon, Suyong}, title = {EDM: Equirectangular Projection-Oriented Dense Kernelized Feature Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6337-6347} }
EZSR: Event-based Zero-Shot Recognition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Yan and Pan, Liyuan and Li, Dongxu and Liu, Liu}, title = {EZSR: Event-based Zero-Shot Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4628-4638} }
FlowRAM: Grounding Flow Matching Policy with Region-Aware Mamba Framework for Robotic Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Sen and Wang, Le and Zhou, Sanping and Tian, Jingyi and Li, Jiayi and Sun, Haowen and Tang, Wei}, title = {FlowRAM: Grounding Flow Matching Policy with Region-Aware Mamba Framework for Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12176-12186} }
Visual Lexicon: Rich Image Features in Language Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, XuDong and Zhou, Xingyi and Fathi, Alireza and Darrell, Trevor and Schmid, Cordelia}, title = {Visual Lexicon: Rich Image Features in Language Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19736-19747} }
SVFR: A Unified Framework for Generalized Video Face Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhiyao and Chen, Xu and Xu, Chengming and Zhu, Junwei and Hu, Xiaobin and Zhang, Jiangning and Wang, Chengjie and Liu, Yuqi and Zhou, Yiyi and Ji, Rongrong}, title = {SVFR: A Unified Framework for Generalized Video Face Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7406-7415} }
Decoupling Fine Detail and Global Geometry for Compressed Depth Map Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Huan and Han, Wencheng and Shen, Jianbing}, title = {Decoupling Fine Detail and Global Geometry for Compressed Depth Map Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {951-960} }
Test-Time Visual In-Context Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Jiahao and Tonioni, Alessio and Rauschmayr, Nathalie and Tombari, Federico and Schiele, Bernt}, title = {Test-Time Visual In-Context Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19996-20005} }
Prior Does Matter: Visual Navigation via Denoising Diffusion Bridge Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Hao and Zeng, Yiming and Bi, Zetong and Wan, Zhaoliang and Huang, Junlong and Cheng, Hui}, title = {Prior Does Matter: Visual Navigation via Denoising Diffusion Bridge Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12100-12110} }
Digital Twin Catalog: A Large-Scale Photorealistic 3D Object Digital Twin Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Zhao and Chen, Ka and Lv, Zhaoyang and Yu, Hong-Xing and Zhang, Yunzhi and Zhang, Cheng and Zhu, Yufeng and Tian, Stephen and Li, Zhengqin and Moffatt, Geordie and Christofferson, Sean and Fort, James and Pan, Xiaqing and Yan, Mingfei and Wu, Jiajun and Ren, Carl Yuheng and Newcombe, Richard}, title = {Digital Twin Catalog: A Large-Scale Photorealistic 3D Object Digital Twin Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {753-763} }
SegEarth-OV: Towards Training-Free Open-Vocabulary Segmentation for Remote Sensing Images-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Kaiyu and Liu, Ruixun and Cao, Xiangyong and Bai, Xueru and Zhou, Feng and Meng, Deyu and Wang, Zhi}, title = {SegEarth-OV: Towards Training-Free Open-Vocabulary Segmentation for Remote Sensing Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10545-10556} }
MeshGen: Generating PBR Textured Mesh with Render-Enhanced Auto-Encoder and Generative Data Augmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zilong and Wang, Yikai and Sun, Wenqiang and Wang, Feng and Chen, Yiwen and Liu, Huaping}, title = {MeshGen: Generating PBR Textured Mesh with Render-Enhanced Auto-Encoder and Generative Data Augmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5835-5848} }
GIFStream: 4D Gaussian-based Immersive Video with Feature Stream-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Hao and Li, Sicheng and Gao, Xiang and Batuer, Abudouaihati and Yu, Lu and Liao, Yiyi}, title = {GIFStream: 4D Gaussian-based Immersive Video with Feature Stream}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21761-21770} }
DeClotH: Decomposable 3D Cloth and Human Body Reconstruction from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2025_CVPR, author = {Nam, Hyeongjin and Kim, Donghwan and Oh, Jeongtaek and Lee, Kyoung Mu}, title = {DeClotH: Decomposable 3D Cloth and Human Body Reconstruction from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5636-5645} }
Neuron: Learning Context-Aware Evolving Representations for Zero-Shot Skeleton Action Recognition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Yang and Guo, Jingcai and Guo, Song and Tao, Dacheng}, title = {Neuron: Learning Context-Aware Evolving Representations for Zero-Shot Skeleton Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8721-8730} }
Multi-Scale Neighborhood Occupancy Masked Autoencoder for Self-Supervised Learning in LiDAR Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Abdelsamad_2025_CVPR, author = {Abdelsamad, Mohamed and Ulrich, Michael and Glaeser, Claudius and Valada, Abhinav}, title = {Multi-Scale Neighborhood Occupancy Masked Autoencoder for Self-Supervised Learning in LiDAR Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22234-22243} }
Do We Really Need Curated Malicious Data for Safety Alignment in Multi-modal Large Language Models?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yanbo and Guan, Jiyang and Liang, Jian and He, Ran}, title = {Do We Really Need Curated Malicious Data for Safety Alignment in Multi-modal Large Language Models?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19879-19889} }
High-Fidelity Relightable Monocular Portrait Animation with Lighting-Controllable Video Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Mingtao and Xing, Guanyu and Liu, Yanli}, title = {High-Fidelity Relightable Monocular Portrait Animation with Lighting-Controllable Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {228-238} }
Plug-and-Play PPO: An Adaptive Point Prompt Optimizer Making SAM Greater-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xueyu and Wang, Rui and Lai, Yexin and Shi, Guangze and Shao, Feixue and Hao, Fang and Zhang, Jianan and Shen, Jia and Wu, Yongfei and Zheng, Wen}, title = {Plug-and-Play PPO: An Adaptive Point Prompt Optimizer Making SAM Greater}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4332-4342} }
Harnessing Global-Local Collaborative Adversarial Perturbation for Anti-Customization-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Long and Wang, Jiakai and Hao, Haojie and Qin, Haotong and Zhao, Jiejie and Liu, Xianglong}, title = {Harnessing Global-Local Collaborative Adversarial Perturbation for Anti-Customization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13414-13423} }
EchoONE: Segmenting Multiple Echocardiography Planes in One Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Jiongtong and Xue, Wufeng and Cheng, Jun and Liu, Yingying and Zhuo, Wei and Ni, Dong}, title = {EchoONE: Segmenting Multiple Echocardiography Planes in One Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5207-5216} }
Acc3D: Accelerating Single Image to 3D Diffusion Models via Edge Consistency Guided Score Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Kendong and Zhu, Zhiyu and Liu, Hui and Hou, Junhui}, title = {Acc3D: Accelerating Single Image to 3D Diffusion Models via Edge Consistency Guided Score Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18031-18040} }
EasyHOI: Unleashing the Power of Large Models for Reconstructing Hand-Object Interactions in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yumeng and Long, Xiaoxiao and Yang, Zemin and Liu, Yuan and Habermann, Marc and Theobalt, Christian and Ma, Yuexin and Wang, Wenping}, title = {EasyHOI: Unleashing the Power of Large Models for Reconstructing Hand-Object Interactions in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7037-7047} }
PLeaS - Merging Models with Permutations and Least Squares-
[pdf]
[supp]
[bibtex]@InProceedings{Nasery_2025_CVPR, author = {Nasery, Anshul and Hayase, Jonathan and Koh, Pang Wei and Oh, Sewoong}, title = {PLeaS - Merging Models with Permutations and Least Squares}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30493-30502} }
Incremental Object Keypoint Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Mingfu and Zhou, Jiahuan and Zou, Xu and Wu, Ying}, title = {Incremental Object Keypoint Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25399-25410} }
Soft Self-labeling and Potts Relaxations for Weakly-supervised Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhongwen and Boykov, Yuri}, title = {Soft Self-labeling and Potts Relaxations for Weakly-supervised Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20244-20253} }
MVSAnywhere: Zero-Shot Multi-View Stereo-
[pdf]
[arXiv]
[bibtex]@InProceedings{Izquierdo_2025_CVPR, author = {Izquierdo, Sergio and Sayed, Mohamed and Firman, Michael and Garcia-Hernando, Guillermo and Turmukhambetov, Daniyar and Civera, Javier and Mac Aodha, Oisin and Brostow, Gabriel and Watson, Jamie}, title = {MVSAnywhere: Zero-Shot Multi-View Stereo}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11493-11504} }
InteractVLM: 3D Interaction Reasoning from 2D Foundational Models-
[pdf]
[supp]
[bibtex]@InProceedings{Dwivedi_2025_CVPR, author = {Dwivedi, Sai Kumar and Anti\'c, Dimitrije and Tripathi, Shashank and Taheri, Omid and Schmid, Cordelia and Black, Michael J. and Tzionas, Dimitrios}, title = {InteractVLM: 3D Interaction Reasoning from 2D Foundational Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22605-22615} }
Patch Matters: Training-free Fine-grained Image Caption Enhancement via Local Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Ruotian and He, Haiying and Wei, Yake and Wen, Yandong and Hu, Di}, title = {Patch Matters: Training-free Fine-grained Image Caption Enhancement via Local Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3963-3973} }
Attribute-Missing Multi-view Graph Clustering-
[pdf]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Bowen and Wang, Qianqian and Ding, Zhengming and Gao, Quanxue}, title = {Attribute-Missing Multi-view Graph Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25832-25841} }
Generating 6DoF Object Manipulation Trajectories from Action Description in Egocentric Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Yoshida_2025_CVPR, author = {Yoshida, Tomoya and Kurita, Shuhei and Nishimura, Taichi and Mori, Shinsuke}, title = {Generating 6DoF Object Manipulation Trajectories from Action Description in Egocentric Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17370-17382} }
Pose-Guided Temporal Enhancement for Robust Low-Resolution Hand Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Kaixin and Ren, Pengfei and Wang, Jingyu and Sun, Haifeng and Qi, Qi and Zhuang, Zirui and Liao, Jianxin}, title = {Pose-Guided Temporal Enhancement for Robust Low-Resolution Hand Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22627-22637} }
ReDiffDet: Rotation-equivariant Diffusion Model for Oriented Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jiaqi and Ding, Zeyu and Zhou, Yong and Zhu, Hancheng and Du, Wen-Liang and Yao, Rui}, title = {ReDiffDet: Rotation-equivariant Diffusion Model for Oriented Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24429-24439} }
PosterO: Structuring Layout Trees to Enable Language Models in Generalized Content-Aware Layout Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hsu_2025_CVPR, author = {Hsu, HsiaoYuan and Peng, Yuxin}, title = {PosterO: Structuring Layout Trees to Enable Language Models in Generalized Content-Aware Layout Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8117-8127} }
BIOMEDICA: An Open Biomedical Image-Caption Archive, Dataset, and Vision-Language Models Derived from Scientific Literature-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lozano_2025_CVPR, author = {Lozano, Alejandro and Sun, Min Woo and Burgess, James and Chen, Liangyu and Nirschl, Jeffrey J. and Gu, Jeffrey and Lopez, Ivan and Aklilu, Josiah and Rau, Anita and Katzer, Austin Wolfgang and Zhang, Yuhui and Chiu, Collin and Wang, Xiaohan and Song, Alfred Seunghoon and Tibshirani, Robert and Yeung-Levy, Serena}, title = {BIOMEDICA: An Open Biomedical Image-Caption Archive, Dataset, and Vision-Language Models Derived from Scientific Literature}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19724-19735} }
Unlocking Generalization Power in LiDAR Point Cloud Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_CVPR, author = {Zeng, Zhenxuan and Wu, Qiao and Zhang, Xiyu and Wu, Lin Yuanbo and An, Pei and Yang, Jiaqi and Wang, Ji and Wang, Peng}, title = {Unlocking Generalization Power in LiDAR Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22244-22253} }
Structure-Aware Correspondence Learning for Relative Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Yihan and Yang, Wenfei and Ren, Huan and Zhang, Shifeng and Zhang, Tianzhu and Wu, Feng}, title = {Structure-Aware Correspondence Learning for Relative Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11611-11621} }
LoRA Recycle: Unlocking Tuning-Free Few-Shot Adaptability in Visual Foundation Models by Recycling Pre-Tuned LoRAs-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Zixuan and Wei, Yongxian and Shen, Li and Yuan, Chun and Tao, Dacheng}, title = {LoRA Recycle: Unlocking Tuning-Free Few-Shot Adaptability in Visual Foundation Models by Recycling Pre-Tuned LoRAs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25026-25037} }
One2Any: One-Reference 6D Pose Estimation for Any Object-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Mengya and Li, Siyuan and Chhatkuli, Ajad and Truong, Prune and Van Gool, Luc and Tombari, Federico}, title = {One2Any: One-Reference 6D Pose Estimation for Any Object}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6457-6467} }
PyTorchGeoNodes: Enabling Differentiable Shape Programs for 3D Shape Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stekovic_2025_CVPR, author = {Stekovic, Sinisa and Artykov, Arslan and Ainetter, Stefan and D'Urso, Mattia and Fraundorfer, Friedrich}, title = {PyTorchGeoNodes: Enabling Differentiable Shape Programs for 3D Shape Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16283-16292} }
Contextual AD Narration with Interleaved Multimodal Sequence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Hanlin and Tong, Zhan and Zheng, Kecheng and Shen, Yujun and Wang, Limin}, title = {Contextual AD Narration with Interleaved Multimodal Sequence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8372-8383} }
FIFA: Fine-grained Inter-frame Attention for Driver's Video Gaze Estimation-
[pdf]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Daosong and Cui, Mingyue and Huang, Kai}, title = {FIFA: Fine-grained Inter-frame Attention for Driver's Video Gaze Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18760-18769} }
MNE-SLAM: Multi-Agent Neural SLAM for Mobile Robots-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Tianchen and Shen, Guole and Xun, Chen and Yuan, Shenghai and Jin, Tongxin and Shen, Hongming and Wang, Yanbo and Wang, Jingchuan and Wang, Hesheng and Wang, Danwei and Chen, Weidong}, title = {MNE-SLAM: Multi-Agent Neural SLAM for Mobile Robots}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1485-1494} }
TensoFlow: Tensorial Flow-based Sampler for Inverse Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Chun and Wei, Xiaofei and Zhang, Li and Zhu, Xiatian}, title = {TensoFlow: Tensorial Flow-based Sampler for Inverse Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {495-504} }
FRAMES-VQA: Benchmarking Fine-Tuning Robustness across Multi-Modal Shifts in Visual Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Chengyue and Maneechotesuwan, Brisa and Chopra, Shivang and Kira, Zsolt}, title = {FRAMES-VQA: Benchmarking Fine-Tuning Robustness across Multi-Modal Shifts in Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3909-3918} }
Shape Abstraction via Marching Differentiable Support Functions-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Sunkyung and Lee, Jeongmin and Lee, Dongjun}, title = {Shape Abstraction via Marching Differentiable Support Functions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16902-16911} }
LSceneLLM: Enhancing Large 3D Scene Understanding Using Adaptive Visual Preferences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhi_2025_CVPR, author = {Zhi, Hongyan and Chen, Peihao and Li, Junyan and Ma, Shuailei and Sun, Xinyu and Xiang, Tianhang and Lei, Yinjie and Tan, Mingkui and Gan, Chuang}, title = {LSceneLLM: Enhancing Large 3D Scene Understanding Using Adaptive Visual Preferences}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3761-3771} }
Event Fields: Capturing Light Fields at High Speed, Resolution, and Dynamic Range-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Ziyuan and Zou, Zihao and Boominathan, Vivek and Chakravarthula, Praneeth and Pediredla, Adithya}, title = {Event Fields: Capturing Light Fields at High Speed, Resolution, and Dynamic Range}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26910-26920} }
HyperFree: A Channel-adaptive and Tuning-free Foundation Model for Hyperspectral Remote Sensing Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jingtao and Liu, Yingyi and Wang, Xinyu and Peng, Yunning and Sun, Chen and Wang, Shaoyu and Sun, Zhendong and Ke, Tian and Jiang, Xiao and Lu, Tangwei and Zhao, Anran and Zhong, Yanfei}, title = {HyperFree: A Channel-adaptive and Tuning-free Foundation Model for Hyperspectral Remote Sensing Imagery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23048-23058} }
Exploring Temporally-Aware Features for Point Tracking-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, In\`es Hyeonsu and Cho, Seokju and Huang, Jiahui and Yi, Jung and Lee, Joon-Young and Kim, Seungryong}, title = {Exploring Temporally-Aware Features for Point Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1962-1972} }
GBlobs: Explicit Local Structure via Gaussian Blobs for Improved Cross-Domain LiDAR-based 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Malic_2025_CVPR, author = {Mali\'c, Du\v{s}an and Fruhwirth-Reisinger, Christian and Schulter, Samuel and Possegger, Horst}, title = {GBlobs: Explicit Local Structure via Gaussian Blobs for Improved Cross-Domain LiDAR-based 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27357-27367} }
V^2Dial: Unification of Video and Visual Dialog via Multimodal Experts-
[pdf]
[supp]
[bibtex]@InProceedings{Abdessaied_2025_CVPR, author = {Abdessaied, Adnen and Rohrbach, Anna and Rohrbach, Marcus and Bulling, Andreas}, title = {V{\textasciicircum}2Dial: Unification of Video and Visual Dialog via Multimodal Experts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8637-8647} }
Detail-Preserving Latent Diffusion for Stable Shadow Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Jiamin and Zheng, Yuxin and Li, Zelong and Wang, Chi and Gu, Renshu and Xu, Weiwei and Xu, Gang}, title = {Detail-Preserving Latent Diffusion for Stable Shadow Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7592-7602} }
Scaling Down Text Encoders of Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Lifu and Liu, Daqing and Liu, Xinchen and He, Xiaodong}, title = {Scaling Down Text Encoders of Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18424-18433} }
3D Gaussian Head Avatars with Expressive Dynamic Appearances by Compact Tensorial Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yating and Wang, Xuan and Yi, Ran and Fan, Yanbo and Hu, Jichen and Zhu, Jingcheng and Ma, Lizhuang}, title = {3D Gaussian Head Avatars with Expressive Dynamic Appearances by Compact Tensorial Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21117-21126} }
MambaIRv2: Attentive State Space Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Hang and Guo, Yong and Zha, Yaohua and Zhang, Yulun and Li, Wenbo and Dai, Tao and Xia, Shu-Tao and Li, Yawei}, title = {MambaIRv2: Attentive State Space Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28124-28133} }
Floating No More: Object-Ground Reconstruction from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Man_2025_CVPR, author = {Man, Yunze and Sheng, Yichen and Zhang, Jianming and Gui, Liang-Yan and Wang, Yu-Xiong}, title = {Floating No More: Object-Ground Reconstruction from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27134-27143} }
POT: Prototypical Optimal Transport for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jian and Dai, Tianhong and Zhang, Bingfeng and Yu, Siyue and Lim, Eng Gee and Xiao, Jimin}, title = {POT: Prototypical Optimal Transport for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15055-15064} }
CrossOver: 3D Scene Cross-Modal Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sarkar_2025_CVPR, author = {Sarkar, Sayan Deb and Miksik, Ondrej and Pollefeys, Marc and Barath, Daniel and Armeni, Iro}, title = {CrossOver: 3D Scene Cross-Modal Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8985-8994} }
Rethinking Temporal Fusion with a Unified Gradient Descent View for 3D Semantic Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Dubing and Zheng, Huan and Fang, Jin and Dong, Xingping and Li, Xianfei and Liao, Wenlong and He, Tao and Peng, Pai and Shen, Jianbing}, title = {Rethinking Temporal Fusion with a Unified Gradient Descent View for 3D Semantic Occupancy Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1505-1515} }
SKE-Layout: Spatial Knowledge Enhanced Layout Generation with LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Junsheng and Cao, Nieqing and Ding, Yan and Xie, Mengying and Gu, Fuqiang and Chen, Chao}, title = {SKE-Layout: Spatial Knowledge Enhanced Layout Generation with LLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19414-19423} }
Gaussian Eigen Models for Human Heads-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zielonka_2025_CVPR, author = {Zielonka, Wojciech and Bolkart, Timo and Beeler, Thabo and Thies, Justus}, title = {Gaussian Eigen Models for Human Heads}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15930-15940} }
Scalable Video-to-Dataset Generation for Cross-Platform Mobile Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Yunseok and Song, Yeda and Sohn, Sungryull and Logeswaran, Lajanugen and Luo, Tiange and Kim, Dong-Ki and Bae, Kyunghoon and Lee, Honglak}, title = {Scalable Video-to-Dataset Generation for Cross-Platform Mobile Agents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8604-8614} }
Pattern Analogies: Learning to Perform Programmatic Image Edits by Analogy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ganeshan_2025_CVPR, author = {Ganeshan, Aditya and Groueix, Thibault and Guerrero, Paul and Mech, Radomir and Fisher, Matthew and Ritchie, Daniel}, title = {Pattern Analogies: Learning to Perform Programmatic Image Edits by Analogy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28715-28725} }
4D-Fly: Fast 4D Reconstruction from a Single Monocular Video-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Diankun and Liu, Fangfu and Hung, Yi-Hsin and Qian, Yue and Zhan, Xiaohang and Duan, Yueqi}, title = {4D-Fly: Fast 4D Reconstruction from a Single Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16663-16673} }
STAR-Edge: Structure-aware Local Spherical Curve Representation for Thin-walled Edge Extraction from Unstructured Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zikuan and Chen, Honghua and Wang, Yuecheng and Wu, Sibo and Wei, Mingqiang and Wang, Jun}, title = {STAR-Edge: Structure-aware Local Spherical Curve Representation for Thin-walled Edge Extraction from Unstructured Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27254-27263} }
Tokenize Image Patches: Global Context Fusion for Effective Haze Removal in Large Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Jiuchen and Yan, Xinyu and Xu, Qizhi and Li, Kaiqi}, title = {Tokenize Image Patches: Global Context Fusion for Effective Haze Removal in Large Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2258-2268} }
Complementary Advantages: Exploiting Cross-Field Frequency Correlation for NIR-Assisted Image Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuchen and Wang, Hongyuan and Wang, Lizhi and Wang, Xin and Zhu, Lin and Lu, Wanxuan and Huang, Hua}, title = {Complementary Advantages: Exploiting Cross-Field Frequency Correlation for NIR-Assisted Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12679-12689} }
Eval3D: Interpretable and Fine-grained Evaluation for 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duggal_2025_CVPR, author = {Duggal, Shivam and Hu, Yushi and Michel, Oscar and Kembhavi, Aniruddha and Freeman, William T. and Smith, Noah A. and Krishna, Ranjay and Torralba, Antonio and Farhadi, Ali and Ma, Wei-Chiu}, title = {Eval3D: Interpretable and Fine-grained Evaluation for 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13326-13336} }
Boosting the Dual-Stream Architecture in Ultra-High Resolution Segmentation with Resolution-Biased Uncertainty Estimation-
[pdf]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Rong and Liu, Xingyu and Shi, Jinglei and Lin, Liang and Yang, Jufeng}, title = {Boosting the Dual-Stream Architecture in Ultra-High Resolution Segmentation with Resolution-Biased Uncertainty Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25960-25970} }
DiffLO: Semantic-Aware LiDAR Odometry with Diffusion-Based Refinement-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yongshu and Liu, Chen and Zhu, Minghang and Ao, Sheng and Wen, Chenglu and Wang, Cheng}, title = {DiffLO: Semantic-Aware LiDAR Odometry with Diffusion-Based Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17050-17059} }
pFedMxF: Personalized Federated Class-Incremental Learning with Mixture of Frequency Aggregation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yifei and Zhu, Hao and Tan, Alysa Ziying and Yu, Dianzhi and Huang, Longtao and Yu, Han}, title = {pFedMxF: Personalized Federated Class-Incremental Learning with Mixture of Frequency Aggregation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30640-30650} }
Style-Editor: Text-driven Object-centric Style Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Jihun and Gim, Jongmin and Lee, Kyoungmin and Lee, Seunghun and Im, Sunghoon}, title = {Style-Editor: Text-driven Object-centric Style Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18281-18291} }
Transfer Your Perspective: Controllable 3D Generation from Any Viewpoint in a Driving Scene-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2025_CVPR, author = {Pan, Tai-Yu and Jeon, Sooyoung and Fan, Mengdi and Yoo, Jinsu and Feng, Zhenyang and Campbell, Mark and Weinberger, Kilian Q. and Hariharan, Bharath and Chao, Wei-Lun}, title = {Transfer Your Perspective: Controllable 3D Generation from Any Viewpoint in a Driving Scene}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12027-12036} }
Efficient Transfer Learning for Video-language Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Haoxing and Huang, Zizheng and Hong, Yan and Wang, Yanshuo and Lyu, Zhongcai and Xu, Zhuoer and Lan, Jun and Gu, Zhangxuan}, title = {Efficient Transfer Learning for Video-language Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29129-29138} }
Radio Frequency Ray Tracing with Neural Object Representation for Enhanced RF Modeling-
[pdf]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xingyu and Feng, Zihao and Qian, Kun and Zhang, Xinyu}, title = {Radio Frequency Ray Tracing with Neural Object Representation for Enhanced RF Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21339-21348} }
ANNEXE: Unified Analyzing, Answering, and Pixel Grounding for Egocentric Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2025_CVPR, author = {Su, Yuejiao and Wang, Yi and Hu, Qiongyang and Yang, Chuang and Chau, Lap-Pui}, title = {ANNEXE: Unified Analyzing, Answering, and Pixel Grounding for Egocentric Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9027-9038} }
MET3R: Measuring Multi-View Consistency in Generated Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Asim_2025_CVPR, author = {Asim, Mohammad and Wewer, Christopher and Wimmer, Thomas and Schiele, Bernt and Lenssen, Jan Eric}, title = {MET3R: Measuring Multi-View Consistency in Generated Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6034-6044} }
Segmenting Maxillofacial Structures in CBCT Volumes-
[pdf]
[bibtex]@InProceedings{Bolelli_2025_CVPR, author = {Bolelli, Federico and Marchesini, Kevin and van Nistelrooij, Niels and Lumetti, Luca and Pipoli, Vittorio and Ficarra, Elisa and Vinayahalingam, Shankeeth and Grana, Costantino}, title = {Segmenting Maxillofacial Structures in CBCT Volumes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5238-5248} }
3D Dental Model Segmentation with Geometrical Boundary Preserving-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xi_2025_CVPR, author = {Xi, Shufan and Liu, Zexian and Chang, Junlin and Wu, Hongyu and Wang, Xiaogang and Hao, Aimin}, title = {3D Dental Model Segmentation with Geometrical Boundary Preserving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10476-10485} }
Neuro-3D: Towards 3D Visual Decoding from EEG Signals-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Zhanqiang and Wu, Jiamin and Song, Yonghao and Bu, Jiahui and Mai, Weijian and Zheng, Qihao and Ouyang, Wanli and Song, Chunfeng}, title = {Neuro-3D: Towards 3D Visual Decoding from EEG Signals}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23870-23880} }
FastVLM: Efficient Vision Encoding for Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vasu_2025_CVPR, author = {Vasu, Pavan Kumar Anasosalu and Faghri, Fartash and Li, Chun-Liang and Koc, Cem and True, Nate and Antony, Albert and Santhanam, Gokula and Gabriel, James and Grasch, Peter and Tuzel, Oncel and Pouransari, Hadi}, title = {FastVLM: Efficient Vision Encoding for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19769-19780} }
VISTA3D: A Unified Segmentation Foundation Model For 3D Medical Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Yufan and Guo, Pengfei and Tang, Yucheng and Myronenko, Andriy and Nath, Vishwesh and Xu, Ziyue and Yang, Dong and Zhao, Can and Simon, Benjamin and Belue, Mason and Harmon, Stephanie and Turkbey, Baris and Xu, Daguang and Li, Wenqi}, title = {VISTA3D: A Unified Segmentation Foundation Model For 3D Medical Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20863-20873} }
VideoGigaGAN: Towards Detail-rich Video Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yiran and Park, Taesung and Zhang, Richard and Zhou, Yang and Shechtman, Eli and Liu, Feng and Huang, Jia-Bin and Liu, Difan}, title = {VideoGigaGAN: Towards Detail-rich Video Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2139-2149} }
Probing the Mid-level Vision Capabilities of Self-Supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xuweiyi and Marks, Markus and Cheng, Zezhou}, title = {Probing the Mid-level Vision Capabilities of Self-Supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30095-30105} }
S2D-LFE: Sparse-to-Dense Light Field Event Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yutong and Weng, Wenming and Zhang, Yueyi and Xiong, Zhiwei}, title = {S2D-LFE: Sparse-to-Dense Light Field Event Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11207-11216} }
The Art of Deception: Color Visual Illusions and Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gomez-Villa_2025_CVPR, author = {Gomez-Villa, Alexandra and Wang, Kai and Parraga, C.Alejandro and Twardowski, Bart{\l}omiej and Malo, Jesus and Vazquez-Corral, Javier and van den Weijer, Joost}, title = {The Art of Deception: Color Visual Illusions and Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18642-18652} }
GLUS: Global-Local Reasoning Unified into A Single Large Language Model for Video Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Lang and Yu, Xueyang and Pang, Ziqi and Wang, Yu-Xiong}, title = {GLUS: Global-Local Reasoning Unified into A Single Large Language Model for Video Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8658-8667} }
Progressive Rendering Distillation: Adapting Stable Diffusion for Instant Text-to-Mesh Generation without 3D Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Zhiyuan and Liang, Xinyue and Wu, Rongyuan and Zhu, Xiangyu and Lei, Zhen and Zhang, Lei}, title = {Progressive Rendering Distillation: Adapting Stable Diffusion for Instant Text-to-Mesh Generation without 3D Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11036-11050} }
Efficient Long Video Tokenization via Coordinate-based Patch Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Huiwon and Yu, Sihyun and Shin, Jinwoo and Abbeel, Pieter and Seo, Younggyo}, title = {Efficient Long Video Tokenization via Coordinate-based Patch Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22853-22863} }
Derivative-Free Diffusion Manifold-Constrained Gradient for Unified XAI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Won Jun and Chung, Hyungjin and Kim, Jaemin and Lee, Sangmin and Sim, Byeongsu and Ye, Jong Chul}, title = {Derivative-Free Diffusion Manifold-Constrained Gradient for Unified XAI}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23795-23805} }
ZoomLDM: Latent Diffusion Model for Multi-scale Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yellapragada_2025_CVPR, author = {Yellapragada, Srikar and Graikos, Alexandros and Triaridis, Kostas and Prasanna, Prateek and Gupta, Rajarsi and Saltz, Joel and Samaras, Dimitris}, title = {ZoomLDM: Latent Diffusion Model for Multi-scale Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23453-23463} }
Do Computer Vision Foundation Models Learn the Low-level Characteristics of the Human Visual System?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Yancheng and Yin, Fei and Hammou, Dounia and Mantiuk, Rafal}, title = {Do Computer Vision Foundation Models Learn the Low-level Characteristics of the Human Visual System?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20039-20048} }
GaussianUDF: Inferring Unsigned Distance Functions through 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Shujuan and Liu, Yu-Shen and Han, Zhizhong}, title = {GaussianUDF: Inferring Unsigned Distance Functions through 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27113-27123} }
Towards RAW Object Detection in Diverse Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zhong-Yu and Jin, Xin and Sun, Bo-Yuan and Guo, Chun-Le and Cheng, Ming-Ming}, title = {Towards RAW Object Detection in Diverse Conditions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8859-8868} }
FLAME: Frozen Large Language Models Enable Data-Efficient Language-Image Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Anjia and Wei, Xing and Ma, Zhiheng}, title = {FLAME: Frozen Large Language Models Enable Data-Efficient Language-Image Pre-training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4080-4090} }
CrossSDF: 3D Reconstruction of Thin Structures From Cross-Sections-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Walker_2025_CVPR, author = {Walker, Thomas and Esposito, Salvatore and Rebain, Daniel and Vaxman, Amir and Onken, Arno and Li, Changjian and Mac Aodha, Oisin}, title = {CrossSDF: 3D Reconstruction of Thin Structures From Cross-Sections}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30928-30937} }
DV-Matcher: Deformation-based Non-rigid Point Cloud Matching Guided by Pre-trained Visual Features-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zhangquan and Jiang, Puhua and Huang, Ruqi}, title = {DV-Matcher: Deformation-based Non-rigid Point Cloud Matching Guided by Pre-trained Visual Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27264-27274} }
Reasoning Mamba: Hypergraph-Guided Region Relation Calculating for Weakly Supervised Affordance Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuxuan and Wu, Aming and Yang, Muli and Min, Yukuan and Zhu, Yihang and Deng, Cheng}, title = {Reasoning Mamba: Hypergraph-Guided Region Relation Calculating for Weakly Supervised Affordance Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27618-27627} }
Adapter Merging with Centroid Prototype Mapping for Scalable Class-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fukuda_2025_CVPR, author = {Fukuda, Takuma and Kera, Hiroshi and Kawamoto, Kazuhiko}, title = {Adapter Merging with Centroid Prototype Mapping for Scalable Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4884-4893} }
OpenSDI: Spotting Diffusion-Generated Images in the Open World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yabin and Huang, Zhiwu and Hong, Xiaopeng}, title = {OpenSDI: Spotting Diffusion-Generated Images in the Open World}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4291-4301} }
Adaptive Part Learning for Fine-Grained Generalized Category Discovery: A Plug-and-Play Enhancement-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Qiyuan and Huang, Hanzhuo and Wu, Yu and Yang, Sibei}, title = {Adaptive Part Learning for Fine-Grained Generalized Category Discovery: A Plug-and-Play Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25444-25453} }
Online Task-Free Continual Learning via Dynamic Expansionable Memory Distribution-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Fei and Bors, Adrian G.}, title = {Online Task-Free Continual Learning via Dynamic Expansionable Memory Distribution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20512-20522} }
Lux Post Facto: Learning Portrait Performance Relighting with Conditional Video Diffusion and a Hybrid Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mei_2025_CVPR, author = {Mei, Yiqun and He, Mingming and Ma, Li and Philip, Julien and Xian, Wenqi and George, David M and Yu, Xueming and Dedic, Gabriel and Ta\c{s}el, Ahmet Levent and Yu, Ning and Patel, Vishal M. and Debevec, Paul}, title = {Lux Post Facto: Learning Portrait Performance Relighting with Conditional Video Diffusion and a Hybrid Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5510-5522} }
DiG: Scalable and Efficient Diffusion Models with Gated Linear Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Lianghui and Huang, Zilong and Liao, Bencheng and Liew, Jun Hao and Yan, Hanshu and Feng, Jiashi and Wang, Xinggang}, title = {DiG: Scalable and Efficient Diffusion Models with Gated Linear Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7664-7674} }
Monocular and Generalizable Gaussian Talking Head Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2025_CVPR, author = {Gong, Shengjie and Li, Haojie and Tang, Jiapeng and Hu, Dongming and Huang, Shuangping and Chen, Hao and Chen, Tianshui and Liu, Zhuoman}, title = {Monocular and Generalizable Gaussian Talking Head Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5523-5534} }
Rethinking Token Reduction with Parameter-Efficient Fine-Tuning in ViT for Pixel-Level Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Lei_2025_CVPR, author = {Lei, Cheng and Li, Ao and Yao, Hu and Zhu, Ce and Zhang, Le}, title = {Rethinking Token Reduction with Parameter-Efficient Fine-Tuning in ViT for Pixel-Level Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14954-14964} }
SVDC: Consistent Direct Time-of-Flight Video Depth Completion with Frequency Selective Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Xuan and Xiang, Jijun and Wang, Xianqi and Liu, Longliang and Wang, Yu and Zhang, Hong and Guo, Fei and Yang, Xin}, title = {SVDC: Consistent Direct Time-of-Flight Video Depth Completion with Frequency Selective Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16619-16628} }
Locally Orderless Images for Optimization in Differentiable Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mehta_2025_CVPR, author = {Mehta, Ishit and Chandraker, Manmohan and Ramamoorthi, Ravi}, title = {Locally Orderless Images for Optimization in Differentiable Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5763-5772} }
Plug-and-Play Interpretable Responsible Text-to-Image Generation via Dual-Space Multi-facet Concept Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Azam_2025_CVPR, author = {Azam, Basim and Akhtar, Naveed}, title = {Plug-and-Play Interpretable Responsible Text-to-Image Generation via Dual-Space Multi-facet Concept Control}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2976-2985} }
Rethinking Training for De-biasing Text-to-Image Generation: Unlocking the Potential of Stable Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Eunji and Kim, Siwon and Park, Minjun and Entezari, Rahim and Yoon, Sungroh}, title = {Rethinking Training for De-biasing Text-to-Image Generation: Unlocking the Potential of Stable Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13361-13370} }
FLAIR: VLM with Fine-grained Language-informed Image Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Rui and Kim, Sanghwan and Georgescu, Mariana-Iuliana and Akata, Zeynep and Alaniz, Stephan}, title = {FLAIR: VLM with Fine-grained Language-informed Image Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24884-24894} }
GG-SSMs: Graph-Generating State Space Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zubic_2025_CVPR, author = {Zubic, Nikola and Scaramuzza, Davide}, title = {GG-SSMs: Graph-Generating State Space Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28863-28873} }
Instant3dit: Multiview Inpainting for Fast Editing of 3D Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Barda_2025_CVPR, author = {Barda, Amir and Gadelha, Matheus and Kim, Vladimir G. and Aigerman, Noam and Bermano, Amit H. and Groueix, Thibault}, title = {Instant3dit: Multiview Inpainting for Fast Editing of 3D Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16273-16282} }
STDD: Spatio-Temporal Dual Diffusion for Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yao_2025_CVPR, author = {Yao, Shuaizhen and Zhang, Xiaoya and Liu, Xin and Liu, Mengyi and Cui, Zhen}, title = {STDD: Spatio-Temporal Dual Diffusion for Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12575-12584} }
Implicit Correspondence Learning for Image-to-Point Cloud Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xinjun and Yang, Wenfei and Deng, Jiacheng and Cheng, Zhixin and Zhou, Xu and Zhang, Tianzhu}, title = {Implicit Correspondence Learning for Image-to-Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16922-16931} }
Continuous Adverse Weather Removal via Degradation-Aware Distillation-
[pdf]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Xin and Xiao, Jie and Zhu, Yurui and Fu, Xueyang}, title = {Continuous Adverse Weather Removal via Degradation-Aware Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28113-28123} }
Fine-Grained Erasure in Text-to-Image Diffusion-based Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thakral_2025_CVPR, author = {Thakral, Kartik and Glaser, Tamar and Hassner, Tal and Vatsa, Mayank and Singh, Richa}, title = {Fine-Grained Erasure in Text-to-Image Diffusion-based Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9121-9130} }
ILIAS: Instance-Level Image retrieval At Scale-
[pdf]
[supp]
[bibtex]@InProceedings{Kordopatis-Zilos_2025_CVPR, author = {Kordopatis-Zilos, Giorgos and Stojni\'c, Vladan and Manko, Anna and Suma, Pavel and Ypsilantis, Nikolaos-Antonios and Efthymiadis, Nikos and Laskar, Zakaria and Matas, Jiri and Chum, Ondrej and Tolias, Giorgos}, title = {ILIAS: Instance-Level Image retrieval At Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14777-14787} }
Exploiting Temporal State Space Sharing for Video Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hesham_2025_CVPR, author = {Hesham, Syed Ariff Syed and Liu, Yun and Sun, Guolei and Ding, Henghui and Yang, Jing and Konukoglu, Ender and Geng, Xue and Jiang, Xudong}, title = {Exploiting Temporal State Space Sharing for Video Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24211-24221} }
DeRS: Towards Extremely Efficient Upcycled Mixture-of-Experts Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yongqi and Ye, Peng and Huang, Chenyu and Cao, Jianjian and Zhang, Lin and Li, Baopu and Yu, Gang and Chen, Tao}, title = {DeRS: Towards Extremely Efficient Upcycled Mixture-of-Experts Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10056-10066} }
GeoDepth: From Point-to-Depth to Plane-to-Depth Modeling for Self-Supervised Monocular Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Haifeng and Gu, Shuhang and Duan, Lixin and Li, Wen}, title = {GeoDepth: From Point-to-Depth to Plane-to-Depth Modeling for Self-Supervised Monocular Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11525-11535} }
SSHNet: Unsupervised Cross-modal Homography Estimation via Problem Reformulation and Split Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Junchen and Cao, Si-Yuan and Zhang, Runmin and Zhang, Chenghao and Yu, Zhu and Chen, Shujie and Yang, Bailin and Shen, Hui-Liang}, title = {SSHNet: Unsupervised Cross-modal Homography Estimation via Problem Reformulation and Split Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16685-16694} }
High-fidelity 3D Object Generation from Single Image with RGBN-Volume Gaussian Reconstruction Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Yiyang and Zhou, Kun and Wang, He and Yang, Yin and Shao, Tianjia}, title = {High-fidelity 3D Object Generation from Single Image with RGBN-Volume Gaussian Reconstruction Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21558-21569} }
Steepest Descent Density Control for Compact 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Peihao and Wang, Yuehao and Wang, Dilin and Mohan, Sreyas and Fan, Zhiwen and Wu, Lemeng and Cai, Ruisi and Yeh, Yu-Ying and Wang, Zhangyang and Liu, Qiang and Ranjan, Rakesh}, title = {Steepest Descent Density Control for Compact 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26663-26672} }
Optimal Transport-Guided Source-Free Adaptation for Face Anti-Spoofing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zhuowei and Zhao, Tianchen and Xu, Xiang and Zhang, Zheng and Li, Zhihua and Chen, Xuanbai and Zhang, Qin and Bergamo, Alessandro and Jain, Anil K. and Xing, Yifan}, title = {Optimal Transport-Guided Source-Free Adaptation for Face Anti-Spoofing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24351-24363} }
USP-Gaussian: Unifying Spike-based Image Reconstruction, Pose Correction and Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Kang and Zhang, Jiyuan and Hao, Zecheng and Zheng, Yajing and Huang, Tiejun and Yu, Zhaofei}, title = {USP-Gaussian: Unifying Spike-based Image Reconstruction, Pose Correction and Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16609-16618} }
Robust 3D Shape Reconstruction in Zero-Shot from a Single Image in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cho_2025_CVPR, author = {Cho, Junhyeong and Youwang, Kim and Yang, Hunmin and Oh, Tae-Hyun}, title = {Robust 3D Shape Reconstruction in Zero-Shot from a Single Image in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22786-22798} }
BOE-ViT: Boosting Orientation Estimation with Equivariance in Self-Supervised 3D Subtomogram Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Runmin and Daggett, Jackson and Pingulkar, Shriya and Zhao, Yizhou and Dhingra, Priyanshu and Brown, Daniel and Wu, Qifeng and Zeng, Xiangrui and Li, Xingjian and Xu, Min}, title = {BOE-ViT: Boosting Orientation Estimation with Equivariance in Self-Supervised 3D Subtomogram Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29352-29362} }
Holmes-VAU: Towards Long-term Video Anomaly Understanding at Any Granularity-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Huaxin and Xu, Xiaohao and Wang, Xiang and Zuo, Jialong and Huang, Xiaonan and Gao, Changxin and Zhang, Shanjun and Yu, Li and Sang, Nong}, title = {Holmes-VAU: Towards Long-term Video Anomaly Understanding at Any Granularity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13843-13853} }
Adventurer: Optimizing Vision Mamba Architecture Designs for Efficiency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Feng and Yang, Timing and Yu, Yaodong and Ren, Sucheng and Wei, Guoyizhe and Wang, Angtian and Shao, Wei and Zhou, Yuyin and Yuille, Alan and Xie, Cihang}, title = {Adventurer: Optimizing Vision Mamba Architecture Designs for Efficiency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30157-30166} }
Beyond Local Sharpness: Communication-Efficient Global Sharpness-aware Minimization for Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Caldarola_2025_CVPR, author = {Caldarola, Debora and Cagnasso, Pietro and Caputo, Barbara and Ciccone, Marco}, title = {Beyond Local Sharpness: Communication-Efficient Global Sharpness-aware Minimization for Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25187-25197} }
ALIEN: Implicit Neural Representations for Human Motion Prediction under Arbitrary Latency-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Dong and Sun, Xiaoning and Gao, Xizhan and Hu, Shengxiang and Sun, Huaijiang}, title = {ALIEN: Implicit Neural Representations for Human Motion Prediction under Arbitrary Latency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1861-1870} }
Parameterized Blur Kernel Prior Learning for Local Motion Deblurring-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Zhenxuan and Wu, Fangfang and Huang, Tao and Dong, Le and Dong, Weisheng and Li, Xin and Shi, Guangming}, title = {Parameterized Blur Kernel Prior Learning for Local Motion Deblurring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23006-23015} }
QuartDepth: Post-Training Quantization for Real-Time Depth Estimation on the Edge-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Xuan and Ma, Weize and Liu, Jing and Yang, Changdi and Ding, Rui and Wang, Quanyi and Ding, Henghui and Niu, Wei and Wang, Yanzhi and Zhao, Pu and Lin, Jun and Gu, Jiuxiang}, title = {QuartDepth: Post-Training Quantization for Real-Time Depth Estimation on the Edge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11448-11460} }
ReWind: Understanding Long Videos with Instructed Learnable Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Diko_2025_CVPR, author = {Diko, Anxhelo and Wang, Tinghuai and Swaileh, Wassim and Sun, Shiyan and Patras, Ioannis}, title = {ReWind: Understanding Long Videos with Instructed Learnable Memory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13734-13743} }
Sufficient Invariant Learning for Distribution Shift-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Taero and Park, Subeen and Lim, Sungjun and Jung, Yonghan and Muandet, Krikamol and Song, Kyungwoo}, title = {Sufficient Invariant Learning for Distribution Shift}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4958-4967} }
DirectTriGS: Triplane-based Gaussian Splatting Field Representation for 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ju_2025_CVPR, author = {Ju, Xiaoliang and Li, Hongsheng}, title = {DirectTriGS: Triplane-based Gaussian Splatting Field Representation for 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16229-16239} }
Domain Generalization in CLIP via Learning with Diverse Text Prompts-
[pdf]
[bibtex]@InProceedings{Wen_2025_CVPR, author = {Wen, Changsong and Peng, Zelin and Huang, Yu and Yang, Xiaokang and Shen, Wei}, title = {Domain Generalization in CLIP via Learning with Diverse Text Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9559-9569} }
Scene4U: Hierarchical Layered 3D Scene Reconstruction from Single Panoramic Image for Your Immerse Exploration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zilong and He, Jun and Ye, Junyan and Jiang, Lihan and Li, Weijia and Chen, Yiping and Han, Ting}, title = {Scene4U: Hierarchical Layered 3D Scene Reconstruction from Single Panoramic Image for Your Immerse Exploration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26723-26733} }
Make-It-Animatable: An Efficient Framework for Authoring Animation-Ready 3D Characters-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Zhiyang and Xiang, Jinxu and Ma, Kai and Zhou, Wengang and Li, Houqiang and Zhang, Ran}, title = {Make-It-Animatable: An Efficient Framework for Authoring Animation-Ready 3D Characters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10783-10792} }
IterIS: Iterative Inference-Solving Alignment for LoRA Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Hongxu and Wang, Zhen and Li, Runshi and Zhu, Bowei and Chen, Long}, title = {IterIS: Iterative Inference-Solving Alignment for LoRA Merging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4829-4838} }
ACAttack: Adaptive Cross Attacking RGB-T Tracker via Multi-Modal Response Decoupling-
[pdf]
[bibtex]@InProceedings{Xiang_2025_CVPR, author = {Xiang, Xinyu and Yan, Qinglong and Zhang, Hao and Ma, Jiayi}, title = {ACAttack: Adaptive Cross Attacking RGB-T Tracker via Multi-Modal Response Decoupling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22099-22108} }
DeCafNet: Delegate and Conquer for Efficient Temporal Grounding in Long Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Zijia and Iftekhar, A S M and Mittal, Gaurav and Meng, Tianjian and Wang, Xiawei and Zhao, Cheng and Kukkala, Rohith and Elhamifar, Ehsan and Chen, Mei}, title = {DeCafNet: Delegate and Conquer for Efficient Temporal Grounding in Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24066-24076} }
Efficient ANN-Guided Distillation: Aligning Rate-based Features of Spiking Neural Networks through Hybrid Block-wise Replacement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Shu and Yu, Chengting and Liu, Lei and Ma, Hanzhi and Wang, Aili and Li, Erping}, title = {Efficient ANN-Guided Distillation: Aligning Rate-based Features of Spiking Neural Networks through Hybrid Block-wise Replacement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10025-10035} }
PrEditor3D: Fast and Precise 3D Shape Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Erkoc_2025_CVPR, author = {Erko\c{c}, Ziya and G\"umeli, Can and Wang, Chaoyang and Nie{\ss}ner, Matthias and Dai, Angela and Wonka, Peter and Lee, Hsin-Ying and Zhuang, Peiye}, title = {PrEditor3D: Fast and Precise 3D Shape Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {640-649} }
Subspace Constraint and Contribution Estimation for Heterogeneous Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xiangtao and Li, Sheng and Li, Ao and Liu, Yipeng and Zhang, Fan and Zhu, Ce and Zhang, Le}, title = {Subspace Constraint and Contribution Estimation for Heterogeneous Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20632-20642} }
HoGS: Unified Near and Far Object Reconstruction via Homogeneous Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xinpeng and Huang, Zeyi and Okura, Fumio and Matsushita, Yasuyuki}, title = {HoGS: Unified Near and Far Object Reconstruction via Homogeneous Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26714-26722} }
SmartEraser: Remove Anything from Images using Masked-Region Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Longtao and Wang, Zhendong and Bao, Jianmin and Zhou, Wengang and Chen, Dongdong and Shi, Lei and Chen, Dong and Li, Houqiang}, title = {SmartEraser: Remove Anything from Images using Masked-Region Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24452-24462} }
ComRoPE: Scalable and Robust Rotary Position Embedding Parameterized by Trainable Commuting Angle Matrices-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Hao and Jiang, Tangyu and Jia, Shuning and Yan, Shannan and Liu, Shunning and Qian, Haolong and Li, Guanghao and Dong, Shuting and Yuan, Chun}, title = {ComRoPE: Scalable and Robust Rotary Position Embedding Parameterized by Trainable Commuting Angle Matrices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4508-4517} }
Sample- and Parameter-Efficient Auto-Regressive Image Models-
[pdf]
[supp]
[bibtex]@InProceedings{Amrani_2025_CVPR, author = {Amrani, Elad and Karlinsky, Leonid and Bronstein, Alex}, title = {Sample- and Parameter-Efficient Auto-Regressive Image Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30127-30136} }
Robust Audio-Visual Segmentation via Audio-Guided Visual Convergent Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Chen and Li, Peike and Yang, Liying and Wang, Dadong and Li, Lincheng and Yu, Xin}, title = {Robust Audio-Visual Segmentation via Audio-Guided Visual Convergent Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28922-28931} }
LOCORE: Image Re-ranking with Long-Context Sequence Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Zilin and Suma, Pavel and Sachdeva, Ayush and Wang, Hao-Jen and Kordopatis-Zilos, Giorgos and Tolias, Giorgos and Ordonez, Vicente}, title = {LOCORE: Image Re-ranking with Long-Context Sequence Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9580-9590} }
NeRFPrior: Learning Neural Radiance Field as a Prior for Indoor Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Wenyuan and Jia, Emily Yue-ting and Zhou, Junsheng and Ma, Baorui and Shi, Kanle and Liu, Yu-Shen and Han, Zhizhong}, title = {NeRFPrior: Learning Neural Radiance Field as a Prior for Indoor Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11317-11327} }
BiLoRA: Almost-Orthogonal Parameter Spaces for Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Hao and Zhang, Yifei and Dong, Junhao and Koniusz, Piotr}, title = {BiLoRA: Almost-Orthogonal Parameter Spaces for Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25613-25622} }
Vid2Sim: Generalizable, Video-based Reconstruction of Appearance, Geometry and Physics for Mesh-free Simulation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Chuhao and Dou, Zhiyang and Wang, Chen and Huang, Yiming and Chen, Anjun and Feng, Qiao and Gu, Jiatao and Liu, Lingjie}, title = {Vid2Sim: Generalizable, Video-based Reconstruction of Appearance, Geometry and Physics for Mesh-free Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26545-26555} }
SceneTAP: Scene-Coherent Typographic Adversarial Planner against Vision-Language Models in Real-World Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Yue and Xing, Yun and Zhang, Jie and Lin, Di and Zhang, Tianwei and Tsang, Ivor and Liu, Yang and Guo, Qing}, title = {SceneTAP: Scene-Coherent Typographic Adversarial Planner against Vision-Language Models in Real-World Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25050-25059} }
Collaborative Decoding Makes Visual Auto-Regressive Modeling Efficient-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zigeng and Ma, Xinyin and Fang, Gongfan and Wang, Xinchao}, title = {Collaborative Decoding Makes Visual Auto-Regressive Modeling Efficient}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23334-23344} }
AerialMegaDepth: Learning Aerial-Ground Reconstruction and View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vuong_2025_CVPR, author = {Vuong, Khiem and Ghosh, Anurag and Ramanan, Deva and Narasimhan, Srinivasa and Tulsiani, Shubham}, title = {AerialMegaDepth: Learning Aerial-Ground Reconstruction and View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21674-21684} }
Towards Training-free Anomaly Detection with Vision and Language Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinjin and Wang, Guodong and Jin, Yizhou and Huang, Di}, title = {Towards Training-free Anomaly Detection with Vision and Language Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15204-15213} }
LiVOS: Light Video Object Segmentation with Gated Linear Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Qin and Wang, Jianfeng and Yang, Zhengyuan and Li, Linjie and Lin, Kevin and Niethammer, Marc and Wang, Lijuan}, title = {LiVOS: Light Video Object Segmentation with Gated Linear Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8668-8678} }
Dynamic Content Prediction with Motion-aware Priors for Blind Face Video Restoration-
[pdf]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Lianxin and Zheng, Bingbing and Wu, Si and Wong, Hau San}, title = {Dynamic Content Prediction with Motion-aware Priors for Blind Face Video Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17821-17830} }
Polarized Color Screen Matting-
[pdf]
[supp]
[bibtex]@InProceedings{Enomoto_2025_CVPR, author = {Enomoto, Kenji and Cohen, Scott and Price, Brian and Rhodes, TJ}, title = {Polarized Color Screen Matting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {391-399} }
Visual Representation Learning through Causal Intervention for Controllable Image Editing-
[pdf]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Shanshan and Li, Haoxuan and Zheng, Chunyuan and Wang, Lei and Liao, Guorui and Gong, Zhili and Yang, Huayi and Liu, Li}, title = {Visual Representation Learning through Causal Intervention for Controllable Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23484-23493} }
Exploring the Deep Fusion of Large Language Models and Diffusion Transformers for Text-to-Image Synthesis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Bingda and Zheng, Boyang and Paul, Sayak and Xie, Saining}, title = {Exploring the Deep Fusion of Large Language Models and Diffusion Transformers for Text-to-Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28586-28595} }
A Comprehensive Study of Decoder-Only LLMs for Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Andrew Z. and Ge, Songwei and Karras, Tero and Liu, Ming-Yu and Balaji, Yogesh}, title = {A Comprehensive Study of Decoder-Only LLMs for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28575-28585} }
Exploring Sparse MoE in GANs for Text-conditioned Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jiapeng and Yang, Ceyuan and Zheng, Kecheng and Xu, Yinghao and Shi, Zifan and Zhang, Yifei and Chen, Qifeng and Shen, Yujun}, title = {Exploring Sparse MoE in GANs for Text-conditioned Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18411-18423} }
Deformable Radial Kernel Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yi-Hua and Lin, Ming-Xian and Sun, Yang-Tian and Yang, Ziyi and Lyu, Xiaoyang and Cao, Yan-Pei and Qi, Xiaojuan}, title = {Deformable Radial Kernel Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21513-21523} }
GOAL: Global-local Object Alignment Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2025_CVPR, author = {Choi, Hyungyu and Jang, Young Kyun and Eom, Chanho}, title = {GOAL: Global-local Object Alignment Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4070-4079} }
Bayesian Prompt Flow Learning for Zero-Shot Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Zhen and Tao, Xian and Gong, Xinyi and Qu, ShiChen and Chen, Qiyu and Zhang, Zhengtao and Wang, Xingang and Ding, Guiguang}, title = {Bayesian Prompt Flow Learning for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30398-30408} }
Post-pre-training for Modality Alignment in Vision-Language Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yamaguchi_2025_CVPR, author = {Yamaguchi, Shin'ya and Feng, Dewei and Kanai, Sekitoshi and Adachi, Kazuki and Chijiwa, Daiki}, title = {Post-pre-training for Modality Alignment in Vision-Language Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4256-4266} }
Efficient Event-Based Object Detection: A Hybrid Neural Network with Spatial and Temporal Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ahmed_2025_CVPR, author = {Ahmed, Soikat Hasan and Finkbeiner, Jan and Neftci, Emre}, title = {Efficient Event-Based Object Detection: A Hybrid Neural Network with Spatial and Temporal Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13970-13979} }
SynthLight: Portrait Relighting with Diffusion Model by Learning to Re-render Synthetic Faces-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chaturvedi_2025_CVPR, author = {Chaturvedi, Sumit and Ren, Mengwei and Hold-Geoffroy, Yannick and Liu, Jingyuan and Dorsey, Julie and Shu, Zhixin}, title = {SynthLight: Portrait Relighting with Diffusion Model by Learning to Re-render Synthetic Faces}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {369-379} }
Pseudo Visible Feature Fine-Grained Fusion for Thermal Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Ting and Ye, Mao and Wu, Tianwen and Li, Nianxin and Li, Shuaifeng and Tang, Song and Ji, Luping}, title = {Pseudo Visible Feature Fine-Grained Fusion for Thermal Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6710-6719} }
HUNet: Homotopy Unfolding Network for Image Compressive Sensing-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Feiyang and Gan, Hongping}, title = {HUNet: Homotopy Unfolding Network for Image Compressive Sensing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12799-12808} }
HalLoc: Token-level Localization of Hallucinations for Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Eunkyu and Kim, Minyeong and Kim, Gunhee}, title = {HalLoc: Token-level Localization of Hallucinations for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29893-29903} }
DiffPortrait360: Consistent Portrait Diffusion for 360 View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Yuming and Tran, Phong and Zheng, Yujian and Xu, Hongyi and Li, Heyuan and Karmanov, Adilbek and Li, Hao}, title = {DiffPortrait360: Consistent Portrait Diffusion for 360 View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26263-26273} }
SURGEON: Memory-Adaptive Fully Test-Time Adaptation via Dynamic Activation Sparsity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Ke and Tang, Jiaqi and Guo, Bin and Dang, Fan and Liu, Sicong and Zhu, Zhui and Wu, Lei and Fang, Cheng and Chen, Ying-Cong and Yu, Zhiwen and Liu, Yunhao}, title = {SURGEON: Memory-Adaptive Fully Test-Time Adaptation via Dynamic Activation Sparsity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30514-30523} }
NVILA: Efficient Frontier Visual Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhijian and Zhu, Ligeng and Shi, Baifeng and Zhang, Zhuoyang and Lou, Yuming and Yang, Shang and Xi, Haocheng and Cao, Shiyi and Gu, Yuxian and Li, Dacheng and Li, Xiuyu and Tang, Haotian and Fang, Yunhao and Chen, Yukang and Hsieh, Cheng-Yu and Huang, De-An and Cheng, An-Chieh and Hu, Jinyi and Liu, Sifei and Krishna, Ranjay and Molchanov, Pavlo and Kautz, Jan and Yin, Hongxu and Han, Song and Lu, Yao}, title = {NVILA: Efficient Frontier Visual Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4122-4134} }
SemiETS: Integrating Spatial and Content Consistencies for Semi-Supervised End-to-end Text Spotting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Dongliang and Zhu, Hanshen and Zhang, Ziyang and Liang, Dingkang and Xie, Xudong and Liu, Yuliang and Bai, Xiang}, title = {SemiETS: Integrating Spatial and Content Consistencies for Semi-Supervised End-to-end Text Spotting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9329-9338} }
See Further When Clear: Curriculum Consistency Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yunpeng and Liu, Boxiao and Zhang, Yi and Hou, Xingzhong and Song, Guanglu and Liu, Yu and You, Haihang}, title = {See Further When Clear: Curriculum Consistency Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18103-18112} }
From Slow Bidirectional to Fast Autoregressive Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Tianwei and Zhang, Qiang and Zhang, Richard and Freeman, William T. and Durand, Fredo and Shechtman, Eli and Huang, Xun}, title = {From Slow Bidirectional to Fast Autoregressive Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22963-22974} }
PassionSR: Post-Training Quantization with Adaptive Scale in One-Step Diffusion based Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Libo and Li, Jianze and Qin, Haotong and Li, Wenbo and Zhang, Yulun and Guo, Yong and Yang, Xiaokang}, title = {PassionSR: Post-Training Quantization with Adaptive Scale in One-Step Diffusion based Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12778-12788} }
RainyGS: Efficient Rain Synthesis with Physically-Based Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Qiyu and Ni, Xingyu and Shen, Qianfan and Chen, Wenzheng and Chen, Baoquan and Chu, Mengyu}, title = {RainyGS: Efficient Rain Synthesis with Physically-Based Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16153-16162} }
Noise Diffusion for Enhancing Semantic Faithfulness in Text-to-Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miao_2025_CVPR, author = {Miao, Boming and Li, Chunxiao and Wang, Xiaoxiao and Zhang, Andi and Sun, Rui and Wang, Zizhe and Zhu, Yao}, title = {Noise Diffusion for Enhancing Semantic Faithfulness in Text-to-Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23575-23584} }
MonoInstance: Enhancing Monocular Priors via Multi-view Instance Alignment for Neural Rendering and Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Wenyuan and Yang, Yixiao and Huang, Han and Han, Liang and Shi, Kanle and Liu, Yu-Shen and Han, Zhizhong}, title = {MonoInstance: Enhancing Monocular Priors via Multi-view Instance Alignment for Neural Rendering and Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21642-21653} }
Three-view Focal Length Recovery From Homographies-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2025_CVPR, author = {Ding, Yaqing and Kocur, Viktor and Haladova, Zuzana Berger and Wu, Qianliang and Cai, Shen and Yang, Jian and Kukelova, Zuzana}, title = {Three-view Focal Length Recovery From Homographies}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11505-11514} }
NoPain: No-box Point Cloud Attack via Optimal Transport Singular Boundary-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zezeng and Du, Xiaoyu and Lei, Na and Chen, Liming and Wang, Weimin}, title = {NoPain: No-box Point Cloud Attack via Optimal Transport Singular Boundary}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3492-3502} }
RAP: Retrieval-Augmented Personalization for Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hao_2025_CVPR, author = {Hao, Haoran and Han, Jiaming and Li, Changsheng and Li, Yu-Feng and Yue, Xiangyu}, title = {RAP: Retrieval-Augmented Personalization for Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14538-14548} }
FADA: Fast Diffusion Avatar Synthesis with Mixed-Supervised Multi-CFG Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_CVPR, author = {Zhong, Tianyun and Liang, Chao and Jiang, Jianwen and Lin, Gaojie and Yang, Jiaqi and Zhao, Zhou}, title = {FADA: Fast Diffusion Avatar Synthesis with Mixed-Supervised Multi-CFG Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3101-3110} }
CAT4D: Create Anything in 4D with Multi-View Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Rundi and Gao, Ruiqi and Poole, Ben and Trevithick, Alex and Zheng, Changxi and Barron, Jonathan T. and Holynski, Aleksander}, title = {CAT4D: Create Anything in 4D with Multi-View Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26057-26068} }
Exploring Semantic Feature Discrimination for Perceptual Image Super-Resolution and Opinion-Unaware No-Reference Image Quality Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Guanglu and Liao, Xiangyu and Li, Mingyang and Guo, Guihuan and Ren, Chao}, title = {Exploring Semantic Feature Discrimination for Perceptual Image Super-Resolution and Opinion-Unaware No-Reference Image Quality Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28176-28187} }
Distilling Long-tailed Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zhenghao and Wang, Haoxuan and Shang, Yuzhang and Wang, Kai and Yan, Yan}, title = {Distilling Long-tailed Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30609-30618} }
Gaze-LLE: Gaze Target Estimation via Large-Scale Learned Encoders-
[pdf]
[supp]
[bibtex]@InProceedings{Ryan_2025_CVPR, author = {Ryan, Fiona and Bati, Ajay and Lee, Sangmin and Bolya, Daniel and Hoffman, Judy and Rehg, James M.}, title = {Gaze-LLE: Gaze Target Estimation via Large-Scale Learned Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28874-28884} }
Distilling Spectral Graph for Object-Context Aware Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Chanyoung and Ju, Dayun and Han, Woojung and Yang, Ming-Hsuan and Hwang, Seong Jae}, title = {Distilling Spectral Graph for Object-Context Aware Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15033-15042} }
Incorporating Dense Knowledge Alignment into Unified Multimodal Representation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Cui_2025_CVPR, author = {Cui, Yuhao and Zu, Xinxing and Zhang, Wenhua and Zhao, Zhongzhou and Gao, Jinyang}, title = {Incorporating Dense Knowledge Alignment into Unified Multimodal Representation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29733-29743} }
Geometry Field Splatting with Gaussian Surfels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Kaiwen and Sivaram, Venkataram and Peng, Cheng and Ramamoorthi, Ravi}, title = {Geometry Field Splatting with Gaussian Surfels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5752-5762} }
Stereo4D: Learning How Things Move in 3D from Internet Stereo Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Linyi and Tucker, Richard and Li, Zhengqi and Fouhey, David and Snavely, Noah and Holynski, Aleksander}, title = {Stereo4D: Learning How Things Move in 3D from Internet Stereo Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10497-10509} }
PS-EIP: Robust Photometric Stereo Based on Event Interval Profile-
[pdf]
[supp]
[bibtex]@InProceedings{Kitazawa_2025_CVPR, author = {Kitazawa, Kazuma and Aoto, Takahito and Ikehata, Satoshi and Takatani, Tsuyoshi}, title = {PS-EIP: Robust Photometric Stereo Based on Event Interval Profile}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6241-6251} }
GenPC: Zero-shot Point Cloud Completion via 3D Generative Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, An and Zhu, Zhe and Wei, Mingqiang}, title = {GenPC: Zero-shot Point Cloud Completion via 3D Generative Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1308-1318} }
FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Kefan and Min, Chaerin and Zhang, Linguang and Hampali, Shreyas and Keskin, Cem and Sridhar, Srinath}, title = {FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17448-17460} }
InterDyn: Controllable Interactive Dynamics with Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Akkerman_2025_CVPR, author = {Akkerman, Rick and Feng, Haiwen and Black, Michael J. and Tzionas, Dimitrios and Abrevaya, Victoria Fern\'andez}, title = {InterDyn: Controllable Interactive Dynamics with Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12467-12479} }
LLMDet: Learning Strong Open-Vocabulary Object Detectors under the Supervision of Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Shenghao and Yang, Qize and Mo, Qijie and Yan, Junkai and Wei, Xihan and Meng, Jingke and Xie, Xiaohua and Zheng, Wei-Shi}, title = {LLMDet: Learning Strong Open-Vocabulary Object Detectors under the Supervision of Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14987-14997} }
Boost Your Human Image Generation Model via Direct Preference Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Na_2025_CVPR, author = {Na, Sanghyeon and Kim, Yonggyu and Lee, Hyunjoon}, title = {Boost Your Human Image Generation Model via Direct Preference Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23551-23562} }
Learning to Highlight Audio by Watching Movies-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Chao and Gao, Ruohan and Tsang, J. M. F. and Kurcius, Jan and Bilen, Cagdas and Xu, Chenliang and Kumar, Anurag and Parekh, Sanjeel}, title = {Learning to Highlight Audio by Watching Movies}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23925-23935} }
Unified Uncertainty-Aware Diffusion for Multi-Agent Trajectory Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Capellera_2025_CVPR, author = {Capellera, Guillem and Rubio, Antonio and Ferraz, Luis and Agudo, Antonio}, title = {Unified Uncertainty-Aware Diffusion for Multi-Agent Trajectory Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22476-22486} }
WeGen: A Unified Model for Interactive Multimodal Generation as We Chat-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zhipeng and Zhuang, Shaobin and Fu, Canmiao and Yang, Binxin and Zhang, Ying and Sun, Chong and Zhang, Zhizheng and Wang, Yali and Li, Chen and Zha, Zheng-Jun}, title = {WeGen: A Unified Model for Interactive Multimodal Generation as We Chat}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23679-23689} }
HRAvatar: High-Quality and Relightable Gaussian Head Avatar-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Dongbin and Liu, Yunfei and Lin, Lijian and Zhu, Ye and Chen, Kangjie and Qin, Minghan and Li, Yu and Wang, Haoqian}, title = {HRAvatar: High-Quality and Relightable Gaussian Head Avatar}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26285-26296} }
Latent Drifting in Diffusion Models for Counterfactual Medical Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yeganeh_2025_CVPR, author = {Yeganeh, Yousef and Farshad, Azade and Charisiadis, Ioannis and Hasny, Marta and Hartenberger, Martin and Ommer, Bj\"orn and Navab, Nassir and Adeli, Ehsan}, title = {Latent Drifting in Diffusion Models for Counterfactual Medical Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7685-7695} }
Rethinking Spiking Self-Attention Mechanism: Implementing a-XNOR Similarity Calculation in Spiking Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Yichen and Wang, Shuai and Zhang, Dehao and Wei, Wenjie and Shan, Yimeng and Liu, Xiaoli and Jiang, Yulin and Zhang, Malu}, title = {Rethinking Spiking Self-Attention Mechanism: Implementing a-XNOR Similarity Calculation in Spiking Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5444-5454} }
MagicQuill: An Intelligent Interactive Image Editing System-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zichen and Yu, Yue and Ouyang, Hao and Wang, Qiuyu and Cheng, Ka Leong and Wang, Wen and Liu, Zhiheng and Chen, Qifeng and Shen, Yujun}, title = {MagicQuill: An Intelligent Interactive Image Editing System}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13072-13082} }
HeMoRa: Unsupervised Heuristic Consensus Sampling for Robust Point Cloud Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Shaocheng and Wang, Yiming and Zhao, Kaiyan and Shi, Pengcheng and Zhao, Zhenjun and Zhang, Yongjun and Li, Jiayuan}, title = {HeMoRa: Unsupervised Heuristic Consensus Sampling for Robust Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1363-1373} }
Reducing Class-wise Confusion for Incremental Learning with Disentangled Manifolds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Huitong and Wang, Yu and Fan, Yan and Jiang, Guosong and Hu, Qinghua}, title = {Reducing Class-wise Confusion for Incremental Learning with Disentangled Manifolds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10121-10130} }
Open-Vocabulary Functional 3D Scene Graphs for Real-World Indoor Spaces-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chenyangguang and Delitzas, Alexandros and Wang, Fangjinhua and Zhang, Ruida and Ji, Xiangyang and Pollefeys, Marc and Engelmann, Francis}, title = {Open-Vocabulary Functional 3D Scene Graphs for Real-World Indoor Spaces}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19401-19413} }
Boosting Adversarial Transferability through Augmentation in Hypothesis Space-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Yu and Liu, Weiquan and Xu, Qingshan and Zheng, Shijun and Huang, Shujun and Zang, Yu and Shen, Siqi and Wen, Chenglu and Wang, Cheng}, title = {Boosting Adversarial Transferability through Augmentation in Hypothesis Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19175-19185} }
AniMo: Species-Aware Model for Text-Driven Animal Motion Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xuan and Ruan, Kai and Zhang, Xing and Wang, Gaoang}, title = {AniMo: Species-Aware Model for Text-Driven Animal Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1929-1939} }
EditAR: Unified Conditional Generation with Autoregressive Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mu_2025_CVPR, author = {Mu, Jiteng and Vasconcelos, Nuno and Wang, Xiaolong}, title = {EditAR: Unified Conditional Generation with Autoregressive Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7899-7909} }
Instance-wise Supervision-level Optimization in Active Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Matsuo_2025_CVPR, author = {Matsuo, Shinnosuke and Togashi, Riku and Bise, Ryoma and Uchida, Seiichi and Nomura, Masahiro}, title = {Instance-wise Supervision-level Optimization in Active Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4939-4947} }
ViiNeuS: Volumetric Initialization for Implicit Neural Surface Reconstruction of Urban Scenes with Limited Image Overlap-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Djeghim_2025_CVPR, author = {Djeghim, Hala and Piasco, Nathan and Bennehar, Moussab and Roldao, Luis and Tsishkou, Dzmitry and Sidib\'e, D\'esir\'e}, title = {ViiNeuS: Volumetric Initialization for Implicit Neural Surface Reconstruction of Urban Scenes with Limited Image Overlap}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11854-11863} }
Model Diagnosis and Correction via Linguistic and Implicit Attribute Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xuanbai and Xu, Xiang and Li, Zhihua and Zhao, Tianchen and Perona, Pietro and Zhang, Qin and Xing, Yifan}, title = {Model Diagnosis and Correction via Linguistic and Implicit Attribute Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14281-14292} }
BHViT: Binarized Hybrid Vision Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Tian and Zhang, Yu and Zhang, Zhiyuan and Liu, Huajun and Yin, Kaijie and Xu, Chengzhong and Kong, Hui}, title = {BHViT: Binarized Hybrid Vision Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3563-3572} }
UniPhy: Learning a Unified Constitutive Model for Inverse Physics Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mittal_2025_CVPR, author = {Mittal, Himangi and Zhuang, Peiye and Lee, Hsin-Ying and Tulsiani, Shubham}, title = {UniPhy: Learning a Unified Constitutive Model for Inverse Physics Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16208-16218} }
STAA-SNN: Spatial-Temporal Attention Aggregator for Spiking Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Tianqing and Yu, Kairong and Zhong, Xian and Wang, Hongwei and Xu, Qi and Zhang, Qiang}, title = {STAA-SNN: Spatial-Temporal Attention Aggregator for Spiking Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13959-13969} }
Pathways on the Image Manifold: Image Editing via Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rotstein_2025_CVPR, author = {Rotstein, Noam and Yona, Gal and Silver, Daniel and Velich, Roy and Bensaid, David and Kimmel, Ron}, title = {Pathways on the Image Manifold: Image Editing via Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7857-7866} }
DeSplat: Decomposed Gaussian Splatting for Distractor-Free Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yihao and Klasson, Marcus and Turkulainen, Matias and Wang, Shuzhe and Kannala, Juho and Solin, Arno}, title = {DeSplat: Decomposed Gaussian Splatting for Distractor-Free Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {722-732} }
Knowledge Memorization and Rumination for Pre-trained Model-based Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Zijian and Jia, Wangwang and Zhang, Xingxing and Zhou, Dulan and Xu, Kele and Dawei, Feng and Dou, Yong and Mao, Xinjun and Wang, Huaimin}, title = {Knowledge Memorization and Rumination for Pre-trained Model-based Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20523-20533} }
A Distractor-Aware Memory for Visual Object Tracking with SAM2-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Videnovic_2025_CVPR, author = {Videnovic, Jovana and Lukezic, Alan and Kristan, Matej}, title = {A Distractor-Aware Memory for Visual Object Tracking with SAM2}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24255-24264} }
Activating Sparse Part Concepts for 3D Class Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Zhenya and Xiao, Jun and Liu, Lupeng and Jiang, Haiyong}, title = {Activating Sparse Part Concepts for 3D Class Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30343-30353} }
ProxyTransformation: Preshaping Point Cloud Manifold With Proxy Attention For 3D Visual Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Qihang and Zheng, Henry and Huang, Gao}, title = {ProxyTransformation: Preshaping Point Cloud Manifold With Proxy Attention For 3D Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24582-24592} }
BFANet: Revisiting 3D Semantic Segmentation with Boundary Feature Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Weiguang and Zhang, Rui and Wang, Qiufeng and Cheng, Guangliang and Huang, Kaizhu}, title = {BFANet: Revisiting 3D Semantic Segmentation with Boundary Feature Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29395-29405} }
Stable Flow: Vital Layers for Training-Free Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Avrahami_2025_CVPR, author = {Avrahami, Omri and Patashnik, Or and Fried, Ohad and Nemchinov, Egor and Aberman, Kfir and Lischinski, Dani and Cohen-Or, Daniel}, title = {Stable Flow: Vital Layers for Training-Free Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7877-7888} }
Video-ColBERT: Contextualized Late Interaction for Text-to-Video Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Reddy_2025_CVPR, author = {Reddy, Arun and Martin, Alexander and Yang, Eugene and Yates, Andrew and Sanders, Kate and Murray, Kenton and Kriz, Reno and de Melo, Celso M. and Van Durme, Benjamin and Chellappa, Rama}, title = {Video-ColBERT: Contextualized Late Interaction for Text-to-Video Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19691-19701} }
Beyond Words: Augmenting Discriminative Richness via Diffusions in Unsupervised Prompt Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Hairui and Tang, Fan and Zhao, He and Wang, Zixuan and Guo, Dandan and Chang, Yi}, title = {Beyond Words: Augmenting Discriminative Richness via Diffusions in Unsupervised Prompt Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25135-25144} }
Unlocking the Potential of Unlabeled Data in Semi-Supervised Domain Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Dongkwan and Hwang, Kyomin and Kwak, Nojun}, title = {Unlocking the Potential of Unlabeled Data in Semi-Supervised Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30599-30608} }
TokenMotion: Decoupled Motion Control via Token Disentanglement for Human-centric Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Ruineng and Xing, Daitao and Sun, Huiming and Ha, Yuanzhou and Shen, Jinglin and Ho, Chiuman}, title = {TokenMotion: Decoupled Motion Control via Token Disentanglement for Human-centric Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1951-1961} }
CholecTrack20: A Multi-Perspective Tracking Dataset for Surgical Tools-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nwoye_2025_CVPR, author = {Nwoye, Chinedu Innocent and Elgohary, Kareem and Srinivas, Anvita and Zaid, Fauzan and Lavanchy, Jo\"el L. and Padoy, Nicolas}, title = {CholecTrack20: A Multi-Perspective Tracking Dataset for Surgical Tools}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8942-8952} }
Visual and Semantic Prompt Collaboration for Generalized Zero-Shot Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Huajie and Li, Zhengxian and Yu, Xiaohan and Hu, Yongli and Yin, Baocai and Yang, Jian and Qi, Yuankai}, title = {Visual and Semantic Prompt Collaboration for Generalized Zero-Shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20275-20285} }
Steering Away from Harm: An Adaptive Approach to Defending Vision Language Model Against Jailbreaks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Han and Wang, Gang and Zhang, Huan}, title = {Steering Away from Harm: An Adaptive Approach to Defending Vision Language Model Against Jailbreaks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29947-29957} }
Neural LightRig: Unlocking Accurate Object Normal and Material Estimation with Multi-Light Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Zexin and Wang, Tengfei and Huang, Xin and Pan, Xingang and Liu, Ziwei}, title = {Neural LightRig: Unlocking Accurate Object Normal and Material Estimation with Multi-Light Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26514-26524} }
VidMuse: A Simple Video-to-Music Generation Framework with Long-Short-Term Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Zeyue and Liu, Zhaoyang and Yuan, Ruibin and Pan, Jiahao and Liu, Qifeng and Tan, Xu and Chen, Qifeng and Xue, Wei and Guo, Yike}, title = {VidMuse: A Simple Video-to-Music Generation Framework with Long-Short-Term Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18782-18793} }
Human-centered Interactive Learning via MLLMs for Text-to-Image Person Re-identification-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Yang and Chen, Chao and Fu, Zhihang and Peng, Dezhong and Peng, Xi and Hu, Peng}, title = {Human-centered Interactive Learning via MLLMs for Text-to-Image Person Re-identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14390-14399} }
Conditional Balance: Improving Multi-Conditioning Trade-Offs in Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cohen_2025_CVPR, author = {Cohen, Nadav Z. and Nir, Oron and Shamir, Ariel}, title = {Conditional Balance: Improving Multi-Conditioning Trade-Offs in Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2641-2650} }
KeyFace: Expressive Audio-Driven Facial Animation for Long Sequences via KeyFrame Interpolation-
[pdf]
[supp]
[bibtex]@InProceedings{Bigata_2025_CVPR, author = {Bigata, Antoni and Stypu{\l}kowski, Micha{\l} and Mira, Rodrigo and Bounareli, Stella and Vougioukas, Konstantinos and Landgraf, Zoe and Drobyshev, Nikita and Zieba, Maciej and Petridis, Stavros and Pantic, Maja}, title = {KeyFace: Expressive Audio-Driven Facial Animation for Long Sequences via KeyFrame Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5477-5488} }
Context-Enhanced Memory-Refined Transformer for Online Action Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pang_2025_CVPR, author = {Pang, Zhanzhong and Sener, Fadime and Yao, Angela}, title = {Context-Enhanced Memory-Refined Transformer for Online Action Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8700-8710} }
Towards Natural Language-Based Document Image Retrieval: New Dataset and Benchmark-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Hao and Qin, Xugong and Yang, Jun Jie Ou and Zhang, Peng and Zeng, Gangyan and Li, Yubo and Lin, Hailun}, title = {Towards Natural Language-Based Document Image Retrieval: New Dataset and Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29722-29732} }
Mitigating Ambiguities in 3D Classification with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Ruiqi and Zhu, Hao and Zhao, Jingyi and Zhang, Qi and Cao, Xun and Ma, Zhan}, title = {Mitigating Ambiguities in 3D Classification with Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27275-27284} }
Exposure-slot: Exposure-centric Representations Learning with Slot-in-Slot Attention for Region-aware Exposure Correction-
[pdf]
[supp]
[bibtex]@InProceedings{Jung_2025_CVPR, author = {Jung, Donggoo and Kim, Daehyun and Wang, Guanghui and Kim, Tae Hyun}, title = {Exposure-slot: Exposure-centric Representations Learning with Slot-in-Slot Attention for Region-aware Exposure Correction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17892-17901} }
Data-Free Group-Wise Fully Quantized Winograd Convolution via Learnable Scales-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2025_CVPR, author = {Pan, Shuokai and Tuzi, Gerti and Sreeram, Sudarshan and Gope, Dibakar}, title = {Data-Free Group-Wise Fully Quantized Winograd Convolution via Learnable Scales}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4091-4100} }
EdgeDiff: Edge-aware Diffusion Network for Building Reconstruction from Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yujun and Wang, Ruisheng and Huang, Shangfeng and Cai, Guorong}, title = {EdgeDiff: Edge-aware Diffusion Network for Building Reconstruction from Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17008-17018} }
GEN3C: 3D-Informed World-Consistent Video Generation with Precise Camera Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Xuanchi and Shen, Tianchang and Huang, Jiahui and Ling, Huan and Lu, Yifan and Nimier-David, Merlin and M\"uller, Thomas and Keller, Alexander and Fidler, Sanja and Gao, Jun}, title = {GEN3C: 3D-Informed World-Consistent Video Generation with Precise Camera Control}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6121-6132} }
A Dataset for Semantic Segmentation in the Presence of Unknowns-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Laskar_2025_CVPR, author = {Laskar, Zakaria and Vojir, Tomas and Grcic, Matej and Melekhov, Iaroslav and Gangisetty, Shankar and Kannala, Juho and Matas, Jiri and Tolias, Giorgos and Jawahar, C.V.}, title = {A Dataset for Semantic Segmentation in the Presence of Unknowns}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1439-1448} }
HierarQ: Task-Aware Hierarchical Q-Former for Enhanced Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Azad_2025_CVPR, author = {Azad, Shehreen and Vineet, Vibhav and Rawat, Yogesh Singh}, title = {HierarQ: Task-Aware Hierarchical Q-Former for Enhanced Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8545-8556} }
DeNVeR: Deformable Neural Vessel Representations for Unsupervised Video Vessel Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Chun-Hung and Chen, Shih-Hong and Hu, Chih-Yao and Wu, Hsin-Yu and Chen, Kai-Hsin and Chen, Yu-You and Su, Chih-Hai and Lee, Chih-Kuo and Liu, Yu-Lun}, title = {DeNVeR: Deformable Neural Vessel Representations for Unsupervised Video Vessel Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15682-15692} }
DH-Set: Improving Vision-Language Alignment with Diverse and Hybrid Set-Embeddings Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Kun and Li, Jingyu and Li, Zhe and Zhou, S.Kevin}, title = {DH-Set: Improving Vision-Language Alignment with Diverse and Hybrid Set-Embeddings Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24993-25003} }
Task-Aware Clustering for Prompting Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Hao_2025_CVPR, author = {Hao, Fusheng and He, Fengxiang and Wu, Fuxiang and Wang, Tichao and Song, Chengqun and Cheng, Jun}, title = {Task-Aware Clustering for Prompting Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14745-14755} }
FSboard: Over 3 Million Characters of ASL Fingerspelling Collected via Smartphones-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Georg_2025_CVPR, author = {Georg, Manfred and Tanzer, Garrett and Uboweja, Esha and Hassan, Saad and Shengelia, Maximus and Sepah, Sam and Forbes, Sean and Starner, Thad}, title = {FSboard: Over 3 Million Characters of ASL Fingerspelling Collected via Smartphones}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13897-13906} }
CASP: Compression of Large Multimodal Models Based on Attention Sparsity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gholami_2025_CVPR, author = {Gholami, Mohsen and Akbari, Mohammad and Cannons, Kevin and Zhang, Yong}, title = {CASP: Compression of Large Multimodal Models Based on Attention Sparsity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9372-9381} }
UNIC-Adapter: Unified Image-instruction Adapter with Multi-modal Transformer for Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2025_CVPR, author = {Duan, Lunhao and Zhao, Shanshan and Yan, Wenjun and Li, Yinglun and Chen, Qing-Guo and Xu, Zhao and Luo, Weihua and Zhang, Kaifu and Gong, Mingming and Xia, Gui-Song}, title = {UNIC-Adapter: Unified Image-instruction Adapter with Multi-modal Transformer for Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7963-7973} }
Towards Cost-Effective Learning: A Synergy of Semi-Supervised and Active Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Tianxiang and Liu, Ningzhong and Sun, Han}, title = {Towards Cost-Effective Learning: A Synergy of Semi-Supervised and Active Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10163-10172} }
Unveil Inversion and Invariance in Flow Transformer for Versatile Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Pengcheng and Jiang, Boyuan and Hu, Xiaobin and Luo, Donghao and He, Qingdong and Zhang, Jiangning and Wang, Chengjie and Wu, Yunsheng and Ling, Charles and Wang, Boyu}, title = {Unveil Inversion and Invariance in Flow Transformer for Versatile Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28479-28489} }
Light Transport-aware Diffusion Posterior Sampling for Single-View Reconstruction of 3D Volumes-
[pdf]
[supp]
[bibtex]@InProceedings{Leonard_2025_CVPR, author = {Leonard, Ludwic and Thurey, Nils and Westermann, R\"udiger}, title = {Light Transport-aware Diffusion Posterior Sampling for Single-View Reconstruction of 3D Volumes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16163-16174} }
DyCON: Dynamic Uncertainty-aware Consistency and Contrastive Learning for Semi-supervised Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Assefa_2025_CVPR, author = {Assefa, Maregu and Naseer, Muzammal and Ganapathi, Iyyakutti Iyappan and Ali, Syed Sadaf and Seghier, Mohamed L and Werghi, Naoufel}, title = {DyCON: Dynamic Uncertainty-aware Consistency and Contrastive Learning for Semi-supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30850-30860} }
STiL: Semi-supervised Tabular-Image Learning for Comprehensive Task-Relevant Information Exploration in Multimodal Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Siyi and Luo, Xinzhe and O'Regan, Declan P. and Qin, Chen}, title = {STiL: Semi-supervised Tabular-Image Learning for Comprehensive Task-Relevant Information Exploration in Multimodal Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15549-15559} }
DUNE: Distilling a Universal Encoder from Heterogeneous 2D and 3D Teachers-
[pdf]
[supp]
[bibtex]@InProceedings{Sariyildiz_2025_CVPR, author = {Sar{\i}y{\i}ld{\i}z, Mert B\"ulent and Weinzaepfel, Philippe and Lucas, Thomas and de Jorge, Pau and Larlus, Diane and Kalantidis, Yannis}, title = {DUNE: Distilling a Universal Encoder from Heterogeneous 2D and 3D Teachers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30084-30094} }
Black Hole-Driven Identity Absorbing in Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Shaheryar_2025_CVPR, author = {Shaheryar, Muhammad and Lee, Jong Taek and Jung, Soon Ki}, title = {Black Hole-Driven Identity Absorbing in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28544-28554} }
HiRes-LLaVA: Restoring Fragmentation Input in High-Resolution Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Runhui and Ding, Xinpeng and Wang, Chunwei and Han, Jianhua and Liu, Yulong and Zhao, Hengshuang and Xu, Hang and Hou, Lu and Zhang, Wei and Liang, Xiaodan}, title = {HiRes-LLaVA: Restoring Fragmentation Input in High-Resolution Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29814-29824} }
Hallo3: Highly Dynamic and Realistic Portrait Image Animation with Video Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2025_CVPR, author = {Cui, Jiahao and Li, Hui and Zhan, Yun and Shang, Hanlin and Cheng, Kaihui and Ma, Yuqi and Mu, Shan and Zhou, Hang and Wang, Jingdong and Zhu, Siyu}, title = {Hallo3: Highly Dynamic and Realistic Portrait Image Animation with Video Diffusion Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21086-21095} }
Generative Photography: Scene-Consistent Camera Control for Realistic Text-to-Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Yu and Wang, Xijun and Sheng, Yichen and Chennuri, Prateek and Zhang, Xingguang and Chan, Stanley}, title = {Generative Photography: Scene-Consistent Camera Control for Realistic Text-to-Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7920-7930} }
Advancing Manga Analysis: Comprehensive Segmentation Annotations for the Manga109 Dataset-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Minshan and Lin, Jian and Liu, Hanyuan and Li, Chengze and Wong, Tien-Tsin}, title = {Advancing Manga Analysis: Comprehensive Segmentation Annotations for the Manga109 Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8869-8878} }
SeqMvRL: A Sequential Fusion Framework for Multi-view Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ren and Sun, Haoliang and Lin, Yuxiu and Zuo, Chuanhui and Gong, Yongshun and Yin, Yilong and Meng, Wenjia}, title = {SeqMvRL: A Sequential Fusion Framework for Multi-view Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25822-25831} }
Auto Cherry-Picker: Learning from High-quality Generative Data Driven by Language-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Yicheng and Li, Xiangtai and Li, Yining and Zeng, Yanhong and Wu, Jianzong and Zhao, Xiangyu and Chen, Kai}, title = {Auto Cherry-Picker: Learning from High-quality Generative Data Driven by Language}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19952-19962} }
EnvGS: Modeling View-Dependent Appearance with Environment Gaussian-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Tao and Chen, Xi and Xu, Zhen and Xie, Yiman and Jin, Yudong and Shen, Yujun and Peng, Sida and Bao, Hujun and Zhou, Xiaowei}, title = {EnvGS: Modeling View-Dependent Appearance with Environment Gaussian}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5742-5751} }
Provoking Multi-modal Few-Shot LVLM via Exploration-Exploitation In-Context Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Cheng and Zhai, Yunpeng and Zhao, Yifan and Gao, Jinyang and Ding, Bolin and Li, Jia}, title = {Provoking Multi-modal Few-Shot LVLM via Exploration-Exploitation In-Context Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3826-3835} }
BadToken: Token-level Backdoor Attacks to Multi-modal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Zenghui and Shi, Jiawen and Zhou, Pan and Gong, Neil Zhenqiang and Sun, Lichao}, title = {BadToken: Token-level Backdoor Attacks to Multi-modal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29927-29936} }
ReRAW: RGB-to-RAW Image Reconstruction via Stratified Sampling for Efficient Object Detection on the Edge-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Berdan_2025_CVPR, author = {Berdan, Radu and Besbinar, Beril and Reinders, Christoph and Otsuka, Junji and Iso, Daisuke}, title = {ReRAW: RGB-to-RAW Image Reconstruction via Stratified Sampling for Efficient Object Detection on the Edge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11833-11843} }
VLMs-Guided Representation Distillation for Efficient Vision-Based Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Haoran and Peng, Peixi and Tan, Guang and Chang, Yiqian and Li, Luntong and Tian, Yonghong}, title = {VLMs-Guided Representation Distillation for Efficient Vision-Based Reinforcement Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29534-29544} }
MonoDGP: Monocular 3D Object Detection with Decoupled-Query and Geometry-Error Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pu_2025_CVPR, author = {Pu, Fanqi and Wang, Yifan and Deng, Jiru and Yang, Wenming}, title = {MonoDGP: Monocular 3D Object Detection with Decoupled-Query and Geometry-Error Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6520-6530} }
NeISF++: Neural Incident Stokes Field for Polarized Inverse Rendering of Conductors and Dielectrics-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Chenhao and Ono, Taishi and Uemori, Takeshi and Nitta, Sho and Mihara, Hajime and Gatto, Alexander and Nagahara, Hajime and Moriuchi, Yusuke}, title = {NeISF++: Neural Incident Stokes Field for Polarized Inverse Rendering of Conductors and Dielectrics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26493-26503} }
HunyuanPortrait: Implicit Condition Control for Enhanced Portrait Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Zunnan and Yu, Zhentao and Zhou, Zixiang and Zhou, Jun and Jin, Xiaoyu and Hong, Fa-ting and Ji, Xiaozhong and Zhu, Junwei and Cai, Chengfei and Tang, Shiyu and Lin, Qin and Li, Xiu and Lu, Qinglin}, title = {HunyuanPortrait: Implicit Condition Control for Enhanced Portrait Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15909-15919} }
Flexible Group Count Enables Hassle-Free Structured Pruning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiamu and Zhong, Shaochen and Ye, Andrew and Liu, Zirui and Zhao, Sebastian and Zhou, Kaixiong and Li, Li and Choi, Soo-Hyun and Chen, Rui and Hu, Xia and Xu, Shuai and Chaudhary, Vipin}, title = {Flexible Group Count Enables Hassle-Free Structured Pruning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4807-4818} }
EasyCraft: A Robust and Efficient Framework for Automatic Avatar Crafting-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Suzhen and Chen, Weijie and Zhang, Wei and Zhao, Minda and Li, Lincheng and Zhang, Rongsheng and Hu, Zhipeng and Yu, Xin}, title = {EasyCraft: A Robust and Efficient Framework for Automatic Avatar Crafting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5581-5591} }
MeshArt: Generating Articulated Meshes with Structure-Guided Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Daoyi and Siddiqui, Yawar and Li, Lei and Dai, Angela}, title = {MeshArt: Generating Articulated Meshes with Structure-Guided Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {618-627} }
Non-Natural Image Understanding with Advancing Frequency-based Vision Encoders-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Wang and Wang, QingSong and Feng, Yueying and Wang, Shulei and Jin, Tao and Zhao, Zhou and Wu, Fei and Yao, Chang and Chen, Jingyuan}, title = {Non-Natural Image Understanding with Advancing Frequency-based Vision Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29756-29766} }
Generative Multimodal Pretraining with Discrete Diffusion Timestep Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2025_CVPR, author = {Pan, Kaihang and Lin, Wang and Yue, Zhongqi and Ao, Tenglong and Jia, Liyu and Zhao, Wei and Li, Juncheng and Tang, Siliang and Zhang, Hanwang}, title = {Generative Multimodal Pretraining with Discrete Diffusion Timestep Tokens}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26136-26146} }
SplatFlow: Self-Supervised Dynamic Gaussian Splatting in Neural Motion Flow Field for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Su and Zhao, Cheng and Sun, Zhuoyang and Chen, Yingjie Victor and Chen, Mei}, title = {SplatFlow: Self-Supervised Dynamic Gaussian Splatting in Neural Motion Flow Field for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27487-27496} }
Zero-shot 3D Question Answering via Voxel-based Dynamic Token Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Hsiang-Wei and Chen, Fu-Chen and Chai, Wenhao and Su, Che-Chun and Xia, Lu and Jung, Sanghun and Yang, Cheng-Yen and Hwang, Jenq-Neng and Sun, Min and Kuo, Cheng-Hao}, title = {Zero-shot 3D Question Answering via Voxel-based Dynamic Token Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19424-19434} }
AesthetiQ: Enhancing Graphic Layout Design via Aesthetic-Aware Preference Alignment of Multi-modal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Patnaik_2025_CVPR, author = {Patnaik, Sohan and Jain, Rishabh and Krishnamurthy, Balaji and Sarkar, Mausoom}, title = {AesthetiQ: Enhancing Graphic Layout Design via Aesthetic-Aware Preference Alignment of Multi-modal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23701-23711} }
Enhanced then Progressive Fusion with View Graph for Multi-View Clustering-
[pdf]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Zhibin and Liu, Meng and Wang, Siwei and Liang, Ke and Zhang, Yi and Liu, Suyuan and Jin, Jiaqi and Liu, Xinwang and Zhu, En}, title = {Enhanced then Progressive Fusion with View Graph for Multi-View Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15518-15527} }
FINECAPTION: Compositional Image Captioning Focusing on Wherever You Want at Any Granularity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hua_2025_CVPR, author = {Hua, Hang and Liu, Qing and Zhang, Lingzhi and Shi, Jing and Kim, Soo Ye and Zhang, Zhifei and Wang, Yilin and Zhang, Jianming and Lin, Zhe and Luo, Jiebo}, title = {FINECAPTION: Compositional Image Captioning Focusing on Wherever You Want at Any Granularity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24763-24773} }
Adaptive Non-Uniform Timestep Sampling for Accelerating Diffusion Model Training-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Myunsoo and Ki, Donghyeon and Shim, Seong-Woong and Lee, Byung-Jun}, title = {Adaptive Non-Uniform Timestep Sampling for Accelerating Diffusion Model Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2513-2522} }
Chebyshev Attention Depth Permutation Texture Network with Latent Texture Attribute Loss-
[pdf]
[supp]
[bibtex]@InProceedings{Evani_2025_CVPR, author = {Evani, Ravishankar and Rajan, Deepu and Mao, Shangbo}, title = {Chebyshev Attention Depth Permutation Texture Network with Latent Texture Attribute Loss}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23423-23432} }
Explainable Saliency: Articulating Reasoning with Contextual Prioritization-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Nuo and Jiang, Ming and Zhao, Qi}, title = {Explainable Saliency: Articulating Reasoning with Contextual Prioritization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9601-9610} }
Decentralized Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{McAllister_2025_CVPR, author = {McAllister, David and Tancik, Matthew and Song, Jiaming and Kanazawa, Angjoo}, title = {Decentralized Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23323-23333} }
AnyEdit: Mastering Unified High-Quality Image Editing for Any Idea-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Qifan and Chow, Wei and Yue, Zhongqi and Pan, Kaihang and Wu, Yang and Wan, Xiaoyang and Li, Juncheng and Tang, Siliang and Zhang, Hanwang and Zhuang, Yueting}, title = {AnyEdit: Mastering Unified High-Quality Image Editing for Any Idea}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26125-26135} }
Compass Control: Multi Object Orientation Control for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parihar_2025_CVPR, author = {Parihar, Rishubh and Agrawal, Vaibhav and VS, Sachidanand and Radhakrishnan, Venkatesh Babu}, title = {Compass Control: Multi Object Orientation Control for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2791-2801} }
Correcting Deviations from Normality: A Reformulated Diffusion Model for Multi-Class Unsupervised Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Beizaee_2025_CVPR, author = {Beizaee, Farzad and Lodygensky, Gregory A. and Desrosiers, Christian and Dolz, Jose}, title = {Correcting Deviations from Normality: A Reformulated Diffusion Model for Multi-Class Unsupervised Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19088-19097} }
Continuous 3D Perception Model with Persistent State-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Qianqian and Zhang, Yifei and Holynski, Aleksander and Efros, Alexei A. and Kanazawa, Angjoo}, title = {Continuous 3D Perception Model with Persistent State}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10510-10522} }
LP-Diff: Towards Improved Restoration of Real-World Degraded License Plate-
[pdf]
[supp]
[bibtex]@InProceedings{Gong_2025_CVPR, author = {Gong, Haoyan and Zhang, Zhenrong and Feng, Yuzheng and Nguyen, Anh and Liu, Hongbin}, title = {LP-Diff: Towards Improved Restoration of Real-World Degraded License Plate}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17831-17840} }
Unleashing the Potential of Consistency Learning for Detecting and Grounding Multi-Modal Media Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yiheng and Yang, Yang and Tan, Zichang and Liu, Huan and Chen, Weihua and Zhou, Xu and Lei, Zhen}, title = {Unleashing the Potential of Consistency Learning for Detecting and Grounding Multi-Modal Media Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9242-9252} }
DNF: Unconditional 4D Generation with Dictionary-based Neural Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xinyi and Li, Naiqi and Dai, Angela}, title = {DNF: Unconditional 4D Generation with Dictionary-based Neural Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26047-26056} }
ARM: Appearance Reconstruction Model for Relightable 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Xiang and Yu, Chang and Bi, Zoubin and Shang, Yintong and Gao, Feng and Wu, Hongzhi and Zhou, Kun and Jiang, Chenfanfu and Yang, Yin}, title = {ARM: Appearance Reconstruction Model for Relightable 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21425-21437} }
VideoGEM: Training-free Action Grounding in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vogel_2025_CVPR, author = {Vogel, Felix and Bousselham, Walid and Kukleva, Anna and Shvetsova, Nina and Kuehne, Hilde}, title = {VideoGEM: Training-free Action Grounding in Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3374-3383} }
FilmComposer: LLM-Driven Music Production for Silent Film Clips-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Zhifeng and He, Qile and Zhu, Youjia and He, Qiwei and Li, Mengtian}, title = {FilmComposer: LLM-Driven Music Production for Silent Film Clips}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13519-13528} }
Ground-V: Teaching VLMs to Ground Complex Instructions in Pixels-
[pdf]
[supp]
[bibtex]@InProceedings{Zong_2025_CVPR, author = {Zong, Yongshuo and Zhang, Qin and An, Dongsheng and Li, Zhihua and Xu, Xiang and Xu, Linghan and Tu, Zhuowen and Xing, Yifan and Dabeer, Onkar}, title = {Ground-V: Teaching VLMs to Ground Complex Instructions in Pixels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24635-24645} }
Structure-from-Motion with a Non-Parametric Camera Model-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yihan and Pan, Linfei and Pollefeys, Marc and Larsson, Viktor}, title = {Structure-from-Motion with a Non-Parametric Camera Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1040-1049} }
EventPSR: Surface Normal and Reflectance Estimation from Photometric Stereo Using an Event Camera-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Bohan and Han, Jin and Shi, Boxin and Sato, Imari}, title = {EventPSR: Surface Normal and Reflectance Estimation from Photometric Stereo Using an Event Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11427-11436} }
LAL: Enhancing 3D Human Motion Prediction with Latency-aware Auxiliary Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Xiaoning and Wei, Dong and Sun, Huaijiang and Hu, Shengxiang}, title = {LAL: Enhancing 3D Human Motion Prediction with Latency-aware Auxiliary Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7105-7114} }
CASP: Consistency-aware Audio-induced Saliency Prediction Model for Omnidirectional Video-
[pdf]
[bibtex]@InProceedings{Wan_2025_CVPR, author = {Wan, Zhaolin and Qin, Han and Li, Zhiyang and Fan, Xiaopeng and Zuo, Wangmeng and Zhao, Debin}, title = {CASP: Consistency-aware Audio-induced Saliency Prediction Model for Omnidirectional Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12605-12614} }
TreeMeshGPT: Artistic Mesh Generation with Autoregressive Tree Sequencing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lionar_2025_CVPR, author = {Lionar, Stefan and Liang, Jiabin and Lee, Gim Hee}, title = {TreeMeshGPT: Artistic Mesh Generation with Autoregressive Tree Sequencing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26608-26617} }
RefPose: Leveraging Reference Geometric Correspondences for Accurate 6D Pose Estimation of Unseen Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Jaeguk and Park, Jaewoo and Lee, Keuntek and Cho, Nam Ik}, title = {RefPose: Leveraging Reference Geometric Correspondences for Accurate 6D Pose Estimation of Unseen Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6447-6456} }
Relation3D : Enhancing Relation Modeling for Point Cloud Instance Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Jiahao and Deng, Jiacheng}, title = {Relation3D : Enhancing Relation Modeling for Point Cloud Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8889-8899} }
Shape My Moves: Text-Driven Shape-Aware Synthesis of Human Motions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_CVPR, author = {Liao, Ting-Hsuan and Zhou, Yi and Shen, Yu and Huang, Chun-Hao Paul and Mitra, Saayan and Huang, Jia-Bin and Bhattacharya, Uttaran}, title = {Shape My Moves: Text-Driven Shape-Aware Synthesis of Human Motions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1917-1928} }
Generating 3D-Consistent Videos from Unposed Internet Photos-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chou_2025_CVPR, author = {Chou, Gene and Zhang, Kai and Bi, Sai and Tan, Hao and Xu, Zexiang and Luan, Fujun and Hariharan, Bharath and Snavely, Noah}, title = {Generating 3D-Consistent Videos from Unposed Internet Photos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27934-27945} }
Gazing at Rewards: Eye Movements as a Lens into Human and AI Decision-Making in Hybrid Visual Foraging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Bo and Tan, Dingwei and Kuo, Yen-Ling and Sun, Zhaowei and Wolfe, Jeremy M. and Cham, Tat-Jen and Zhang, Mengmi}, title = {Gazing at Rewards: Eye Movements as a Lens into Human and AI Decision-Making in Hybrid Visual Foraging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14810-14823} }
FOCUS: Knowledge-enhanced Adaptive Visual Compression for Few-shot Whole Slide Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Zhengrui and Xiong, Conghao and Ma, Jiabo and Sun, Qichen and Feng, Lishuang and Wang, Jinzhuo and Chen, Hao}, title = {FOCUS: Knowledge-enhanced Adaptive Visual Compression for Few-shot Whole Slide Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15590-15600} }
Beyond Human Perception: Understanding Multi-Object World from Monocular View-
[pdf]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Keyu and Huang, Yongle and Sun, Shijie and Song, Xiangyu and Feng, Mingtao and Liu, Zedong and Song, Huansheng and Wang, Tiantian and Li, Jianxin and Akhtar, Naveed and Mian, Ajmal Saeed}, title = {Beyond Human Perception: Understanding Multi-Object World from Monocular View}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3751-3760} }
GRAE-3DMOT: Geometry Relation-Aware Encoder for Online 3D Multi-Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Hyunseop and Lee, Hyo-Jun and Lee, Yonguk and Lee, Jinu and Kim, Hanul and Koh, Yeong Jun}, title = {GRAE-3DMOT: Geometry Relation-Aware Encoder for Online 3D Multi-Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11697-11706} }
Automatic Joint Structured Pruning and Quantization for Efficient Neural Network Training and Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Xiaoyi and Aponte, David and Banbury, Colby and Robinson, Daniel P. and Ding, Tianyu and Koishida, Kazuhito and Zharkov, Ilya and Chen, Tianyi}, title = {Automatic Joint Structured Pruning and Quantization for Efficient Neural Network Training and Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15234-15244} }
Parameter Efficient Mamba Tuning via Projector-targeted Diagonal-centric Linear Transformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ham_2025_CVPR, author = {Ham, Seokil and Kim, Hee-Seon and Woo, Sangmin and Kim, Changick}, title = {Parameter Efficient Mamba Tuning via Projector-targeted Diagonal-centric Linear Transformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30106-30115} }
ViUniT: Visual Unit Tests for More Robust Visual Programming-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Panagopoulou_2025_CVPR, author = {Panagopoulou, Artemis and Zhou, Honglu and Savarese, Silvio and Xiong, Caiming and Callison-Burch, Chris and Yatskar, Mark and Niebles, Juan Carlos}, title = {ViUniT: Visual Unit Tests for More Robust Visual Programming}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24646-24656} }
LIRM: Large Inverse Rendering Model for Progressive Reconstruction of Shape, Materials and View-dependent Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zhengqin and Wang, Dilin and Chen, Ka and Lv, Zhaoyang and Nguyen-Phuoc, Thu and Lee, Milim and Huang, Jia-Bin and Xiao, Lei and Zhu, Yufeng and Marshall, Carl S. and Ren, Yuheng and Newcombe, Richard and Dong, Zhao}, title = {LIRM: Large Inverse Rendering Model for Progressive Reconstruction of Shape, Materials and View-dependent Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {505-517} }
DualTalk: Dual-Speaker Interaction for 3D Talking Head Conversations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Ziqiao and Fan, Yanbo and Wu, Haoyu and Wang, Xuan and Liu, Hongyan and He, Jun and Fan, Zhaoxin}, title = {DualTalk: Dual-Speaker Interaction for 3D Talking Head Conversations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21055-21064} }
MAR-3D: Progressive Masked Auto-regressor for High-Resolution 3D Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Jinnan and Zhu, Lingting and Hu, Zeyu and Qian, Shengju and Chen, Yugang and Wang, Xin and Lee, Gim Hee}, title = {MAR-3D: Progressive Masked Auto-regressor for High-Resolution 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11083-11092} }
beta-FFT: Nonlinear Interpolation and Differentiated Training Strategies for Semi-Supervised Medical Image Segmentation-
[pdf]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Ming and Yin, Jianfu and Ma, Zhuangzhuang and Ma, Jianheng and Zhu, Feiyu and Wu, Bingbing and Wen, Ya and Wu, Meng and Hu, Cong and Hu, Bingliang and Wang, Quan}, title = {beta-FFT: Nonlinear Interpolation and Differentiated Training Strategies for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30839-30849} }
Dynamic Group Normalization: Spatio-Temporal Adaptation to Evolving Data Statistics-
[pdf]
[supp]
[bibtex]@InProceedings{Smadar_2025_CVPR, author = {Smadar, Yair and Hoogi, Assaf}, title = {Dynamic Group Normalization: Spatio-Temporal Adaptation to Evolving Data Statistics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30167-30177} }
Latent Space Super-Resolution for Higher-Resolution Image Generation with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_CVPR, author = {Jeong, Jinho and Han, Sangmin and Kim, Jinwoo and Kim, Seon Joo}, title = {Latent Space Super-Resolution for Higher-Resolution Image Generation with Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2355-2365} }
SynerGen-VL: Towards Synergistic Image Understanding and Generation with Vision Experts and Token Folding-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Hao and Tian, Changyao and Shao, Jie and Zhu, Xizhou and Wang, Zhaokai and Zhu, Jinguo and Dou, Wenhan and Wang, Xiaogang and Li, Hongsheng and Lu, Lewei and Dai, Jifeng}, title = {SynerGen-VL: Towards Synergistic Image Understanding and Generation with Vision Experts and Token Folding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29767-29779} }
Synthetic Prior for Few-Shot Drivable Head Avatar Inversion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zielonka_2025_CVPR, author = {Zielonka, Wojciech and Garbin, Stephan J. and Lattas, Alexandros and Kopanas, George and Gotardo, Paulo and Beeler, Thabo and Thies, Justus and Bolkart, Timo}, title = {Synthetic Prior for Few-Shot Drivable Head Avatar Inversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10735-10746} }
Reasoning in Visual Navigation of End-to-end Trained Agents: A Dynamical Systems Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Janny_2025_CVPR, author = {Janny, Steeven and Poirier, Herv\'e and Antsfeld, Leonid and Bono, Guillaume and Monaci, Gianluca and Chidlovskii, Boris and Giuliari, Francesco and Del Bue, Alessio and Wolf, Christian}, title = {Reasoning in Visual Navigation of End-to-end Trained Agents: A Dynamical Systems Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12111-12121} }
Rethinking Noisy Video-Text Retrieval via Relation-aware Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Huakai and Xiong, Guoxin and Mai, Huayu and Liu, Xiang and Zhang, Tianzhu}, title = {Rethinking Noisy Video-Text Retrieval via Relation-aware Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9231-9241} }
DFormerv2: Geometry Self-Attention for RGBD Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Bo-Wen and Cao, Jiao-Long and Cheng, Ming-Ming and Hou, Qibin}, title = {DFormerv2: Geometry Self-Attention for RGBD Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19345-19355} }
Scaling Vision Pre-Training to 4K Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Baifeng and Li, Boyi and Cai, Han and Lu, Yao and Liu, Sifei and Pavone, Marco and Kautz, Jan and Han, Song and Darrell, Trevor and Molchanov, Pavlo and Yin, Hongxu}, title = {Scaling Vision Pre-Training to 4K Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9631-9640} }
GarmentPile: Point-Level Visual Affordance Guided Retrieval and Adaptation for Cluttered Garments Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Ruihai and Zhu, Ziyu and Wang, Yuran and Chen, Yue and Wang, Jiarui and Dong, Hao}, title = {GarmentPile: Point-Level Visual Affordance Guided Retrieval and Adaptation for Cluttered Garments Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6950-6959} }
Uncertain Multimodal Intention and Emotion Understanding in the Wild-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Qu and Shi, Qinghongya and Wang, Tongxin and Ye, Mang}, title = {Uncertain Multimodal Intention and Emotion Understanding in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24700-24709} }
GroomLight: Hybrid Inverse Rendering for Relightable Human Hair Appearance Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Yang and Chai, Menglei and Vicini, Delio and Zhou, Yuxiao and Xu, Yinghao and Guibas, Leonidas and Wetzstein, Gordon and Beeler, Thabo}, title = {GroomLight: Hybrid Inverse Rendering for Relightable Human Hair Appearance Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16040-16050} }
Improving Editability in Image Generation with Layer-wise Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Daneul and Lee, Jaeah and Park, Jaesik}, title = {Improving Editability in Image Generation with Layer-wise Memory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7889-7898} }
Sea-ing in Low-light-
[pdf]
[supp]
[bibtex]@InProceedings{Varghese_2025_CVPR, author = {Varghese, Nisha and Rajagopalan, A. N.}, title = {Sea-ing in Low-light}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16629-16640} }
VidTwin: Video VAE with Decoupled Structure and Dynamics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuchi and Guo, Junliang and Xie, Xinyi and He, Tianyu and Sun, Xu and Bian, Jiang}, title = {VidTwin: Video VAE with Decoupled Structure and Dynamics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22922-22932} }
CL-LoRA: Continual Low-Rank Adaptation for Rehearsal-Free Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Jiangpeng and Duan, Zhihao and Zhu, Fengqing}, title = {CL-LoRA: Continual Low-Rank Adaptation for Rehearsal-Free Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30534-30544} }
Generative Modeling of Class Probability for Multi-Modal Representation Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shin_2025_CVPR, author = {Shin, JungKyoo and Kim, Bumsoo and Kim, Eunwoo}, title = {Generative Modeling of Class Probability for Multi-Modal Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20737-20746} }
VisionZip: Longer is Better but Not Necessary in Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Senqiao and Chen, Yukang and Tian, Zhuotao and Wang, Chengyao and Li, Jingyao and Yu, Bei and Jia, Jiaya}, title = {VisionZip: Longer is Better but Not Necessary in Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19792-19802} }
Simplification Is All You Need against Out-of-Distribution Overconfidence-
[pdf]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Keke and Hou, Chao and Peng, Weilong and Fang, Xiang and Wu, Zhize and Nie, Yongwei and Wang, Wenping and Tian, Zhihong}, title = {Simplification Is All You Need against Out-of-Distribution Overconfidence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5030-5040} }
SpatialDreamer: Self-supervised Stereo Video Synthesis from Monocular Input-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2025_CVPR, author = {Lv, Zhen and Long, Yangqi and Huang, Congzhentao and Li, Cao and Lv, Chengfei and Ren, Hao and Zheng, Dian}, title = {SpatialDreamer: Self-supervised Stereo Video Synthesis from Monocular Input}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {811-821} }
LOD-GS: Achieving Levels of Detail using Scalable Gaussian Soup-
[pdf]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Jianxiong and Qian, Yue and Zhan, Xiaohang}, title = {LOD-GS: Achieving Levels of Detail using Scalable Gaussian Soup}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {671-680} }
BlenderGym: Benchmarking Foundational Model Systems for Graphics Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Yunqi and Huang, Ian and Je, Jihyeon and Yang, Guandao and Guibas, Leonidas}, title = {BlenderGym: Benchmarking Foundational Model Systems for Graphics Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18574-18583} }
VoteFlow: Enforcing Local Rigidity in Self-Supervised Scene Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Yancong and Wang, Shiming and Nan, Liangliang and Kooij, Julian and Caesar, Holger}, title = {VoteFlow: Enforcing Local Rigidity in Self-Supervised Scene Flow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17155-17164} }
The Devil is in Low-Level Features for Cross-Domain Few-Shot Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuhan and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {The Devil is in Low-Level Features for Cross-Domain Few-Shot Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4618-4627} }
Design2GarmentCode: Turning Design Concepts to Tangible Garments Through Program Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Feng and Liu, Ruiyang and Liu, Chen and He, Gaofeng and Li, Yong-Lu and Jin, Xiaogang and Wang, Huamin}, title = {Design2GarmentCode: Turning Design Concepts to Tangible Garments Through Program Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23712-23722} }
Uncertainty Weighted Gradients for Model Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Jinxu and Tao, Linwei and Dong, Minjing and Xu, Chang}, title = {Uncertainty Weighted Gradients for Model Calibration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15497-15507} }
Efficient Dynamic Scene Editing via 4D Gaussian-based Static-Dynamic Separation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2025_CVPR, author = {Kwon, Joohyun and Cho, Hanbyel and Kim, Junmo}, title = {Efficient Dynamic Scene Editing via 4D Gaussian-based Static-Dynamic Separation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26855-26865} }
Unlearning through Knowledge Overwriting: Reversible Federated Unlearning via Selective Sparse Adapter-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_CVPR, author = {Zhong, Zhengyi and Bao, Weidong and Wang, Ji and Zhang, Shuai and Zhou, Jingxuan and Lyu, Lingjuan and Lim, Wei Yang Bryan}, title = {Unlearning through Knowledge Overwriting: Reversible Federated Unlearning via Selective Sparse Adapter}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30661-30670} }
SocialMOIF: Multi-Order Intention Fusion for Pedestrian Trajectory Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Kai and Zhao, Xiaodong and Huang, Yujie and Fang, Guoyu and Song, Xiao and Wang, Ruiping and Wang, Ziyuan}, title = {SocialMOIF: Multi-Order Intention Fusion for Pedestrian Trajectory Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22465-22475} }
FFaceNeRF: Few-shot Face Editing in Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yun_2025_CVPR, author = {Yun, Kwan and Kim, Chaelin and Shin, Hangyeul and Noh, Junyong}, title = {FFaceNeRF: Few-shot Face Editing in Neural Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10825-10835} }
Discrete to Continuous: Generating Smooth Transition Poses from Sign Language Observations-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Shengeng and He, Jiayi and Cheng, Lechao and Wu, Jingjing and Guo, Dan and Hong, Richang}, title = {Discrete to Continuous: Generating Smooth Transition Poses from Sign Language Observations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3481-3491} }
HistoFS: Non-IID Histopathologic Whole Slide Image Classification via Federated Style Transfer with RoI-Preserving-
[pdf]
[supp]
[bibtex]@InProceedings{Raswa_2025_CVPR, author = {Raswa, Farchan Hakim and Lu, Chun-Shien and Wang, Jia-Ching}, title = {HistoFS: Non-IID Histopathologic Whole Slide Image Classification via Federated Style Transfer with RoI-Preserving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30251-30260} }
Unified Medical Lesion Segmentation via Self-referring Indicator-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2025_CVPR, author = {Chang, Shijie and Zhao, Xiaoqi and Zhang, Lihe and Wang, Tiancheng}, title = {Unified Medical Lesion Segmentation via Self-referring Indicator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10414-10424} }
SGSST: Scaling Gaussian Splatting Style Transfer-
[pdf]
[supp]
[bibtex]@InProceedings{Galerne_2025_CVPR, author = {Galerne, Bruno and Wang, Jianling and Raad, Lara and Morel, Jean-Michel}, title = {SGSST: Scaling Gaussian Splatting Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26535-26544} }
Learning Bijective Surface Parameterization for Inferring Signed Distance Functions from Sparse Point Clouds with Grid Deformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Noda_2025_CVPR, author = {Noda, Takeshi and Chen, Chao and Zhou, Junsheng and Zhang, Weiqi and Liu, Yu-Shen and Han, Zhizhong}, title = {Learning Bijective Surface Parameterization for Inferring Signed Distance Functions from Sparse Point Clouds with Grid Deformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22139-22149} }
Minimizing Labeled, Maximizing Unlabeled: An Image-Driven Approach for Video Instance Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Fangyun and Zhao, Jinjing and Yan, Kun and Xu, Chang}, title = {Minimizing Labeled, Maximizing Unlabeled: An Image-Driven Approach for Video Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19304-19314} }
Layer- and Timestep-Adaptive Differentiable Token Compression Ratios for Efficient Diffusion Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{You_2025_CVPR, author = {You, Haoran and Barnes, Connelly and Zhou, Yuqian and Kang, Yan and Du, Zhenbang and Zhou, Wei and Zhang, Lingzhi and Nitzan, Yotam and Liu, Xiaoyang and Lin, Zhe and Shechtman, Eli and Amirghodsi, Sohrab and Lin, Yingyan Celine}, title = {Layer- and Timestep-Adaptive Differentiable Token Compression Ratios for Efficient Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18072-18082} }
Zero-shot RGB-D Point Cloud Registration with Pre-trained Large Vision Model-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Haobo and Xie, Jin and Yang, Jian and Yu, Liang and Zheng, Jianmin}, title = {Zero-shot RGB-D Point Cloud Registration with Pre-trained Large Vision Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16943-16952} }
Balancing Two Classifiers via A Simplex ETF Structure for Model Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2025_CVPR, author = {Ni, Jiani and Zhao, He and Gao, Jintong and Guo, Dandan and Zha, Hongyuan}, title = {Balancing Two Classifiers via A Simplex ETF Structure for Model Calibration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30712-30721} }
DistinctAD: Distinctive Audio Description Generation in Contexts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Bo and Wu, Wenhao and Wu, Qiangqiang and Song, Yuxin and Chan, Antoni B.}, title = {DistinctAD: Distinctive Audio Description Generation in Contexts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13571-13581} }
DAMM-Diffusion: Learning Divergence-Aware Multi-Modal Diffusion Model for Nanoparticles Distribution Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Junjie and Wang, Shouju and Tang, Yuxia and Zhu, Qi and Zhang, Daoqiang and Shao, Wei}, title = {DAMM-Diffusion: Learning Divergence-Aware Multi-Modal Diffusion Model for Nanoparticles Distribution Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30886-30895} }
Unveiling Differences in Generative Models: A Scalable Differential Clustering Approach-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jingwei and Jalali, Mohammad and Li, Cheuk Ting and Farnia, Farzan}, title = {Unveiling Differences in Generative Models: A Scalable Differential Clustering Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8269-8278} }
CL-MoE: Enhancing Multimodal Large Language Model with Dual Momentum Mixture-of-Experts for Continual Visual Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Huai_2025_CVPR, author = {Huai, Tianyu and Zhou, Jie and Wu, Xingjiao and Chen, Qin and Bai, Qingchun and Zhou, Ze and He, Liang}, title = {CL-MoE: Enhancing Multimodal Large Language Model with Dual Momentum Mixture-of-Experts for Continual Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19608-19617} }
Semantic Library Adaptation: LoRA Retrieval and Fusion for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qorbani_2025_CVPR, author = {Qorbani, Reza and Villani, Gianluca and Panagiotakopoulos, Theodoros and Colomer, Marc Botet and H\"arenstam-Nielsen, Linus and Segu, Mattia and Dovesi, Pier Luigi and Karlgren, Jussi and Cremers, Daniel and Tombari, Federico and Poggi, Matteo}, title = {Semantic Library Adaptation: LoRA Retrieval and Fusion for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9804-9815} }
PhyS-EdiT: Physics-aware Semantic Image Editing with Text Description-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Ziqi and Weng, Shuchen and Xia, Yifei and Shi, Boxin}, title = {PhyS-EdiT: Physics-aware Semantic Image Editing with Text Description}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7867-7876} }
U-Know-DiffPAN: An Uncertainty-aware Knowledge Distillation Diffusion Framework with Details Enhancement for PAN-Sharpening-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Sungpyo and Do, Jeonghyeok and Lee, Jaehyup and Kim, Munchurl}, title = {U-Know-DiffPAN: An Uncertainty-aware Knowledge Distillation Diffusion Framework with Details Enhancement for PAN-Sharpening}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23069-23079} }
SceneDiffuser++: City-Scale Traffic Simulation via a Generative World Model-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2025_CVPR, author = {Tan, Shuhan and Lambert, John and Jeon, Hong and Kulshrestha, Sakshum and Bai, Yijing and Luo, Jing and Anguelov, Dragomir and Tan, Mingxing and Jiang, Chiyu Max}, title = {SceneDiffuser++: City-Scale Traffic Simulation via a Generative World Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1570-1580} }
Gaussian Splatting Feature Fields for (Privacy-Preserving) Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Pietrantoni_2025_CVPR, author = {Pietrantoni, Maxime and Csurka, Gabriela and Sattler, Torsten}, title = {Gaussian Splatting Feature Fields for (Privacy-Preserving) Visual Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1082-1092} }
Point Cloud Upsampling Using Conditional Diffusion Module with Adaptive Noise Suppression-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Boqian and Yang, Shen and Chen, Hao and Yang, Chao and Jia, Jing and Jiang, Guang}, title = {Point Cloud Upsampling Using Conditional Diffusion Module with Adaptive Noise Suppression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16987-16996} }
Gazing Into Missteps: Leveraging Eye-Gaze for Unsupervised Mistake Detection in Egocentric Videos of Skilled Human Activities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mazzamuto_2025_CVPR, author = {Mazzamuto, Michele and Furnari, Antonino and Sato, Yoichi and Farinella, Giovanni Maria}, title = {Gazing Into Missteps: Leveraging Eye-Gaze for Unsupervised Mistake Detection in Egocentric Videos of Skilled Human Activities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8310-8320} }
Trajectory Mamba: Efficient Attention-Mamba Forecasting Model Based on Selective SSM-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yizhou and Cheng, Yihua and Wang, Kezhi}, title = {Trajectory Mamba: Efficient Attention-Mamba Forecasting Model Based on Selective SSM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12058-12067} }
Recover and Match: Open-Vocabulary Multi-Label Recognition through Knowledge-Constrained Optimal Transport-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2025_CVPR, author = {Tan, Hao and Tan, Zichang and Li, Jun and Liu, Ajian and Wan, Jun and Lei, Zhen}, title = {Recover and Match: Open-Vocabulary Multi-Label Recognition through Knowledge-Constrained Optimal Transport}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4650-4660} }
RelationField: Relate Anything in Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koch_2025_CVPR, author = {Koch, Sebastian and Wald, Johanna and Colosi, Mirco and Vaskevicius, Narunas and Hermosilla, Pedro and Tombari, Federico and Ropinski, Timo}, title = {RelationField: Relate Anything in Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21706-21716} }
DyFo: A Training-Free Dynamic Focus Visual Search for Enhancing LMMs in Fine-Grained Visual Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Geng and Xu, Jinglin and Zhao, Yunzhen and Peng, Yuxin}, title = {DyFo: A Training-Free Dynamic Focus Visual Search for Enhancing LMMs in Fine-Grained Visual Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9098-9108} }
From Head to Tail: Towards Balanced Representation in Large Vision-Language Models through Adaptive Data Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Mingyang and Qu, Xiaoye and Zhou, Jiawei and Cheng, Yu}, title = {From Head to Tail: Towards Balanced Representation in Large Vision-Language Models through Adaptive Data Calibration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9434-9444} }
Let Humanoids Hike! Integrative Skill Development on Complex Trails-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Kwan-Yee and Yu, Stella X.}, title = {Let Humanoids Hike! Integrative Skill Development on Complex Trails}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22498-22507} }
VLOGGER: Multimodal Diffusion for Embodied Avatar Synthesis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Corona_2025_CVPR, author = {Corona, Enric and Zanfir, Andrei and Bazavan, Eduard Gabriel and Kolotouros, Nikos and Alldieck, Thiemo and Sminchisescu, Cristian}, title = {VLOGGER: Multimodal Diffusion for Embodied Avatar Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15896-15908} }
DEIM: DETR with Improved Matching for Fast Convergence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Shihua and Lu, Zhichao and Cun, Xiaodong and Yu, Yongjun and Zhou, Xiao and Shen, Xi}, title = {DEIM: DETR with Improved Matching for Fast Convergence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15162-15171} }
BF-STVSR: B-Splines and Fourier---Best Friends for High Fidelity Spatial-Temporal Video Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Eunjin and Kim, Hyeonjin and Jin, Kyong Hwan and Yoo, Jaejun}, title = {BF-STVSR: B-Splines and Fourier---Best Friends for High Fidelity Spatial-Temporal Video Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28009-28018} }
DIO: Decomposable Implicit 4D Occupancy-Flow World Model-
[pdf]
[supp]
[bibtex]@InProceedings{Diehl_2025_CVPR, author = {Diehl, Christopher and Sykora, Quinlan and Agro, Ben and Gilles, Thomas and Casas, Sergio and Urtasun, Raquel}, title = {DIO: Decomposable Implicit 4D Occupancy-Flow World Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27456-27466} }
SLADE: Shielding against Dual Exploits in Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Hossain_2025_CVPR, author = {Hossain, Md Zarif and Imteaj, Ahmed}, title = {SLADE: Shielding against Dual Exploits in Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24244-24254} }
Human Motion Instruction Tuning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Lei and Jia, Sen and Wang, Jianhao and Jiang, Zhongyu and Zhou, Feng and Dai, Ju and Zhang, Tianfang and Wu, Zongkai and Hwang, Jenq-Neng}, title = {Human Motion Instruction Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17582-17591} }
A Flag Decomposition for Hierarchical Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mankovich_2025_CVPR, author = {Mankovich, Nathan and Santamaria, Ignacio and Camps-Valls, Gustau and Birdal, Tolga}, title = {A Flag Decomposition for Hierarchical Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18738-18748} }
RCP-Bench: Benchmarking Robustness for Collaborative Perception Under Diverse Corruptions-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Shihang and Qu, Sanqing and Wang, Tianhang and Zhang, Xudong and Zhu, Yunwei and Mao, Jian and Lu, Fan and Lin, Qiao and Chen, Guang}, title = {RCP-Bench: Benchmarking Robustness for Collaborative Perception Under Diverse Corruptions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11908-11918} }
Olympus: A Universal Task Router for Computer Vision Tasks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Yuanze and Li, Yunsheng and Chen, Dongdong and Xu, Weijian and Clark, Ronald and Torr, Philip}, title = {Olympus: A Universal Task Router for Computer Vision Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14235-14246} }
HERA: Hybrid Explicit Representation for Ultra-Realistic Head Avatars-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Hongrui and Xiao, Yuting and Wang, Xuan and Li, Jiafei and Guo, Yudong and Fan, Yanbo and Gao, Shenghua and Zhang, Juyong}, title = {HERA: Hybrid Explicit Representation for Ultra-Realistic Head Avatars}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {260-270} }
Circumventing Shortcuts in Audio-visual Deepfake Detection Datasets with Unsupervised Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Smeu_2025_CVPR, author = {Smeu, Stefan and Boldisor, Dragos-Alexandru and Oneata, Dan and Oneata, Elisabeta}, title = {Circumventing Shortcuts in Audio-visual Deepfake Detection Datasets with Unsupervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18815-18825} }
CoE: Chain-of-Explanation via Automatic Visual Concept Circuit Description and Polysemanticity Quantification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Wenlong and Wang, Qilong and Liu, Chuang and Li, Dong and Hu, Qinghua}, title = {CoE: Chain-of-Explanation via Automatic Visual Concept Circuit Description and Polysemanticity Quantification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4364-4374} }
Ego4o: Egocentric Human Motion Capture and Understanding from Multi-Modal Input-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jian and Dabral, Rishabh and Luvizon, Diogo and Cao, Zhe and Liu, Lingjie and Beeler, Thabo and Theobalt, Christian}, title = {Ego4o: Egocentric Human Motion Capture and Understanding from Multi-Modal Input}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22668-22679} }
Image Over Text: Transforming Formula Recognition Evaluation with Character Detection Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Bin and Wu, Fan and Ouyang, Linke and Gu, Zhuangcheng and Zhang, Rui and Xia, Renqiu and Shi, Botian and Zhang, Bo and He, Conghui}, title = {Image Over Text: Transforming Formula Recognition Evaluation with Character Detection Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19681-19690} }
FreePCA: Integrating Consistency Information across Long-short Frames in Training-free Long Video Generation via Principal Component Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2025_CVPR, author = {Tan, Jiangtong and Yu, Hu and Huang, Jie and Xiao, Jie and Zhao, Feng}, title = {FreePCA: Integrating Consistency Information across Long-short Frames in Training-free Long Video Generation via Principal Component Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27979-27988} }
Hierarchical Adaptive Filtering Network for Text Image Specular Highlight Removal-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Zhi and Hu, Jingbo and Zhang, Ling and Fu, Gang and Xiao, Chunxia}, title = {Hierarchical Adaptive Filtering Network for Text Image Specular Highlight Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2408-2417} }
Improving Semi-Supervised Semantic Segmentation with Sliced-Wasserstein Feature Alignment and Uniformity-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Chen-Yi and Derakhshandeh, Kasra and Chaterji, Somali}, title = {Improving Semi-Supervised Semantic Segmentation with Sliced-Wasserstein Feature Alignment and Uniformity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20233-20243} }
Mind the Time: Temporally-Controlled Multi-Event Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Ziyi and Siarohin, Aliaksandr and Menapace, Willi and Skorokhodov, Ivan and Fang, Yuwei and Chordia, Varnith and Gilitschenski, Igor and Tulyakov, Sergey}, title = {Mind the Time: Temporally-Controlled Multi-Event Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23989-24000} }
Learning Extremely High Density Crowds as Active Matters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Feixiang and Yue, Jiangbei and Zhu, Jialin and Seyfried, Armin and Casas, Dan and Pettr\'e, Julien and Wang, He}, title = {Learning Extremely High Density Crowds as Active Matters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {540-550} }
Audio-Visual Semantic Graph Network for Audio-Visual Event Localization-
[pdf]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Liang and Li, Shuaiyong and Zhu, Yongqiang}, title = {Audio-Visual Semantic Graph Network for Audio-Visual Event Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23957-23966} }
3D-Mem: 3D Scene Memory for Embodied Exploration and Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Yuncong and Yang, Han and Zhou, Jiachen and Chen, Peihao and Zhang, Hongxin and Du, Yilun and Gan, Chuang}, title = {3D-Mem: 3D Scene Memory for Embodied Exploration and Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17294-17303} }
EchoMimicV2: Towards Striking, Simplified, and Semi-Body Human Animation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Meng_2025_CVPR, author = {Meng, Rang and Zhang, Xingyu and Li, Yuming and Ma, Chenguang}, title = {EchoMimicV2: Towards Striking, Simplified, and Semi-Body Human Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5489-5498} }
Navigation World Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bar_2025_CVPR, author = {Bar, Amir and Zhou, Gaoyue and Tran, Danny and Darrell, Trevor and LeCun, Yann}, title = {Navigation World Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15791-15801} }
Video Motion Transfer with Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pondaven_2025_CVPR, author = {Pondaven, Alexander and Siarohin, Aliaksandr and Tulyakov, Sergey and Torr, Philip and Pizzati, Fabio}, title = {Video Motion Transfer with Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22911-22921} }
Gaussian Splatting for Efficient Satellite Image Photogrammetry-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aira_2025_CVPR, author = {Aira, Luca Savant and Facciolo, Gabriele and Ehret, Thibaud}, title = {Gaussian Splatting for Efficient Satellite Image Photogrammetry}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5959-5969} }
Unified Reconstruction of Static and Dynamic Scenes from Events-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Qiyao and Duan, Peiqi and Lou, Hanyue and Teng, Minggui and Cai, Ziqi and Chen, Xu and Shi, Boxin}, title = {Unified Reconstruction of Static and Dynamic Scenes from Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27914-27923} }
Automatic Spectral Calibration of Hyperspectral Images: Method, Dataset and Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Zhuoran and You, Shaodi and Cheng, Cheng and Wei, Shikui}, title = {Automatic Spectral Calibration of Hyperspectral Images: Method, Dataset and Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28081-28090} }
Conformal Prediction and MLLM aided Uncertainty Quantification in Scene Graph Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nag_2025_CVPR, author = {Nag, Sayak and Ghosh, Udita and Ta, Calvin-Khang and Bose, Sarosij and Li, Jiachen and Roy-Chowdhury, Amit K.}, title = {Conformal Prediction and MLLM aided Uncertainty Quantification in Scene Graph Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11676-11686} }
Point-to-Region Loss for Semi-Supervised Point-Based Crowd Counting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Wei and Zhao, Chenyang and Chan, Antoni B.}, title = {Point-to-Region Loss for Semi-Supervised Point-Based Crowd Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29363-29373} }
Reconstructing Close Human Interaction with Appearance and Proxemics Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Buzhen and Li, Chen and Xu, Chongyang and Lu, Dongyue and Chen, Jinnan and Wang, Yangang and Lee, Gim Hee}, title = {Reconstructing Close Human Interaction with Appearance and Proxemics Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17475-17485} }
Towards Improved Text-Aligned Codebook Learning: Multi-Hierarchical Codebook-Text Alignment with Long Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Guotao and Zhang, Baoquan and Wen, Zhiyuan and Zhao, Junteng and Ye, Yunming and Ye, Kola and He, Yao}, title = {Towards Improved Text-Aligned Codebook Learning: Multi-Hierarchical Codebook-Text Alignment with Long Text}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4060-4069} }
Parallel Sequence Modeling via Generalized Spatial Propagation Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Hongjun and Byeon, Wonmin and Xu, Jiarui and Gu, Jinwei and Cheung, Ka Chun and Wang, Xiaolong and Han, Kai and Kautz, Jan and Liu, Sifei}, title = {Parallel Sequence Modeling via Generalized Spatial Propagation Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4473-4483} }
Scenario Dreamer: Vectorized Latent Diffusion for Generating Driving Simulation Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rowe_2025_CVPR, author = {Rowe, Luke and Girgis, Roger and Gosselin, Anthony and Paull, Liam and Pal, Christopher and Heide, Felix}, title = {Scenario Dreamer: Vectorized Latent Diffusion for Generating Driving Simulation Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17207-17218} }
Poly-Autoregressive Prediction for Modeling Interactions-
[pdf]
[supp]
[bibtex]@InProceedings{Thakkar_2025_CVPR, author = {Thakkar, Neerja and Sadjadpour, Tara and Rajasegeran, Jathushan and Ginosar, Shiry and Malik, Jitendra}, title = {Poly-Autoregressive Prediction for Modeling Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12402-12412} }
NADER: Neural Architecture Design via Multi-Agent Collaboration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Zekang and Zeng, Wang and Jin, Sheng and Qian, Chen and Luo, Ping and Liu, Wentao}, title = {NADER: Neural Architecture Design via Multi-Agent Collaboration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4452-4461} }
Move-in-2D: 2D-Conditioned Human Motion Generation-
[pdf]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Hsin-Ping and Zhou, Yang and Wang, Jui-Hsien and Liu, Difan and Liu, Feng and Yang, Ming-Hsuan and Xu, Zhan}, title = {Move-in-2D: 2D-Conditioned Human Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22766-22775} }
PoseBH: Prototypical Multi-Dataset Training Beyond Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_CVPR, author = {Jeong, Uyoung and Freer, Jonathan and Baek, Seungryul and Chang, Hyung Jin and Kim, Kwang In}, title = {PoseBH: Prototypical Multi-Dataset Training Beyond Human Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12278-12288} }
MATCHA: Towards Matching Anything-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2025_CVPR, author = {Xue, Fei and Elflein, Sven and Leal-Taix\'e, Laura and Zhou, Qunjie}, title = {MATCHA: Towards Matching Anything}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27081-27091} }
CTRL-D: Controllable Dynamic 3D Scene Editing with Personalized 2D Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Kai and Wu, Chin-Hsuan and Gilitschenski, Igor}, title = {CTRL-D: Controllable Dynamic 3D Scene Editing with Personalized 2D Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26630-26640} }
Separation of Powers: On Segregating Knowledge from Observation in LLM-enabled Knowledge-based Visual Question Answering-
[pdf]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Zhen and Tao, Zhuo and Chen, Qi and Li, Liang and Qi, Yuankai and van den Hengel, Anton and Huang, Qingming}, title = {Separation of Powers: On Segregating Knowledge from Observation in LLM-enabled Knowledge-based Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24753-24762} }
Decision SpikeFormer: Spike-Driven Transformer for Decision Making-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Wei and Gu, Qinying and Ye, Nanyang}, title = {Decision SpikeFormer: Spike-Driven Transformer for Decision Making}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19241-19250} }
SF2T: Self-supervised Fragment Finetuning of Video-LLMs for Fine-Grained Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Yangliu and Song, Zikai and Feng, Na and Luo, Yawei and Yu, Junqing and Chen, Yi-Ping Phoebe and Yang, Wei}, title = {SF2T: Self-supervised Fragment Finetuning of Video-LLMs for Fine-Grained Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29108-29117} }
Theory-Inspired Deep Multi-View Multi-Label Learning with Incomplete Views and Noisy Labels-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Quanjiang and Luo, Tingjin and Liao, Jiahui}, title = {Theory-Inspired Deep Multi-View Multi-Label Learning with Incomplete Views and Noisy Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20706-20715} }
Fitted Neural Lossless Image Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhe and Chen, Zhenzhong and Liu, Shan}, title = {Fitted Neural Lossless Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23249-23258} }
Fortifying Federated Learning Towards Trustworthiness via Auditable Data Valuation and Verifiable Client Contribution-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2025_CVPR, author = {Kumar, K Naveen and Jha, Ranjeet Ranjan and Mohan, C Krishna and Tallamraju, Ravindra Babu}, title = {Fortifying Federated Learning Towards Trustworthiness via Auditable Data Valuation and Verifiable Client Contribution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4999-5009} }
EMOE: Modality-Specific Enhanced Dynamic Emotion Experts-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Yiyang and Huang, Wenke and Wan, Guancheng and Su, Kehua and Ye, Mang}, title = {EMOE: Modality-Specific Enhanced Dynamic Emotion Experts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14314-14324} }
JarvisIR: Elevating Autonomous Driving Perception with Intelligent Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Yunlong and Lin, Zixu and Chen, Haoyu and Pan, Panwang and Li, Chenxin and Chen, Sixiang and Wen, Kairun and Jin, Yeying and Li, Wenbo and Ding, Xinghao}, title = {JarvisIR: Elevating Autonomous Driving Perception with Intelligent Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22369-22380} }
UniPre3D: Unified Pre-training of 3D Point Cloud Models with Cross-Modal Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ziyi and Zhang, Yanran and Zhou, Jie and Lu, Jiwen}, title = {UniPre3D: Unified Pre-training of 3D Point Cloud Models with Cross-Modal Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1319-1329} }
Charm: The Missing Piece in ViT Fine-Tuning for Image Aesthetic Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Behrad_2025_CVPR, author = {Behrad, Fatemeh and Tuytelaars, Tinne and Wagemans, Johan}, title = {Charm: The Missing Piece in ViT Fine-Tuning for Image Aesthetic Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7815-7824} }
F-LMM: Grounding Frozen Large Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Size and Jin, Sheng and Zhang, Wenwei and Xu, Lumin and Liu, Wentao and Li, Wei and Loy, Chen Change}, title = {F-LMM: Grounding Frozen Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24710-24721} }
EntityErasure: Erasing Entity Cleanly via Amodal Entity Segmentation and Completion-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yixing and Zhang, Qing and Wang, Yitong and Nie, Yongwei and Zheng, Wei-Shi}, title = {EntityErasure: Erasing Entity Cleanly via Amodal Entity Segmentation and Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28274-28283} }
Generative Video Propagation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Shaoteng and Wang, Tianyu and Wang, Jui-Hsien and Liu, Qing and Zhang, Zhifei and Lee, Joon-Young and Li, Yijun and Yu, Bei and Lin, Zhe and Kim, Soo Ye and Jia, Jiaya}, title = {Generative Video Propagation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17712-17722} }
From Multimodal LLMs to Generalist Embodied Agents: Methods and Lessons-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Szot_2025_CVPR, author = {Szot, Andrew and Mazoure, Bogdan and Attia, Omar and Timofeev, Aleksei and Agrawal, Harsh and Hjelm, Devon and Gan, Zhe and Kira, Zsolt and Toshev, Alexander}, title = {From Multimodal LLMs to Generalist Embodied Agents: Methods and Lessons}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10644-10655} }
Mosaic3D: Foundation Dataset and Model for Open-Vocabulary 3D Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Junha and Park, Chunghyun and Choe, Jaesung and Wang, Yu-Chiang Frank and Kautz, Jan and Cho, Minsu and Choy, Chris}, title = {Mosaic3D: Foundation Dataset and Model for Open-Vocabulary 3D Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14089-14101} }
T-CIL: Temperature Scaling using Adversarial Perturbation for Calibration in Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Hwang_2025_CVPR, author = {Hwang, Seong-Hyeon and Kim, Minsu and Whang, Steven Euijong}, title = {T-CIL: Temperature Scaling using Adversarial Perturbation for Calibration in Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15339-15348} }
LoRA Subtraction for Drift-Resistant Space in Exemplar-Free Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xuan and Chang, Xiaobin}, title = {LoRA Subtraction for Drift-Resistant Space in Exemplar-Free Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15308-15318} }
Joint Out-of-Distribution Filtering and Data Discovery Active Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Schmidt_2025_CVPR, author = {Schmidt, Sebastian and Schenk, Leonard and Schwinn, Leo and G\"unnemann, Stephan}, title = {Joint Out-of-Distribution Filtering and Data Discovery Active Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25677-25687} }
AniMer: Animal Pose and Shape Estimation Using Family Aware Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lyu_2025_CVPR, author = {Lyu, Jin and Zhu, Tianyi and Gu, Yi and Lin, Li and Cheng, Pujin and Liu, Yebin and Tang, Xiaoying and An, Liang}, title = {AniMer: Animal Pose and Shape Estimation Using Family Aware Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17486-17496} }
Co-op: Correspondence-based Novel Object Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Moon_2025_CVPR, author = {Moon, Sungphill and Son, Hyeontae and Hur, Dongcheol and Kim, Sangwook}, title = {Co-op: Correspondence-based Novel Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11622-11632} }
Finding Local Diffusion Schrodinger Bridge using Kolmogorov-Arnold Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2025_CVPR, author = {Qiu, Xingyu and Yang, Mengying and Ma, Xinghua and Li, Fanding and Liang, Dong and Luo, Gongning and Wang, Wei and Wang, Kuanquan and Li, Shuo}, title = {Finding Local Diffusion Schrodinger Bridge using Kolmogorov-Arnold Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23227-23236} }
CorrBEV: Multi-View 3D Object Detection by Correlation Learning with Multi-modal Prototypes-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2025_CVPR, author = {Xue, Ziteng and Guo, Mingzhe and Fan, Heng and Zhang, Shihui and Zhang, Zhipeng}, title = {CorrBEV: Multi-View 3D Object Detection by Correlation Learning with Multi-modal Prototypes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27413-27423} }
Completion as Enhancement: A Degradation-Aware Selective Image Guided Network for Depth Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Zhiqiang and Wang, Zhengxue and Wang, Kun and Li, Jun and Yang, Jian}, title = {Completion as Enhancement: A Degradation-Aware Selective Image Guided Network for Depth Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26943-26953} }
CATANet: Efficient Content-Aware Token Aggregation for Lightweight Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xin and Liu, Jie and Tang, Jie and Wu, Gangshan}, title = {CATANet: Efficient Content-Aware Token Aggregation for Lightweight Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17902-17912} }
SeeGround: See and Ground for Zero-Shot Open-Vocabulary 3D Visual Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Rong and Li, Shijie and Kong, Lingdong and Yang, Xulei and Liang, Junwei}, title = {SeeGround: See and Ground for Zero-Shot Open-Vocabulary 3D Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3707-3717} }
RayFlow: Instance-Aware Diffusion Acceleration via Adaptive Flow Trajectories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2025_CVPR, author = {Shao, Huiyang and Xia, Xin and Yang, Yuhong and Ren, Yuxi and Wang, Xing and Xiao, Xuefeng}, title = {RayFlow: Instance-Aware Diffusion Acceleration via Adaptive Flow Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18113-18123} }
Linear Attention Modeling for Learned Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Donghui and Cheng, Zhengxue and Wang, Shen and Wu, Ronghua and Hu, Hongwei and Lu, Guo and Song, Li}, title = {Linear Attention Modeling for Learned Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7623-7632} }
Around the World in 80 Timesteps: A Generative Approach to Global Visual Geolocation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dufour_2025_CVPR, author = {Dufour, Nicolas and Kalogeiton, Vicky and Picard, David and Landrieu, Loic}, title = {Around the World in 80 Timesteps: A Generative Approach to Global Visual Geolocation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23016-23026} }
Asynchronous Collaborative Graph Representation for Frames and Events-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Dianze and Li, Jianing and Liu, Xu and Fan, Xiaopeng and Tian, Yonghong}, title = {Asynchronous Collaborative Graph Representation for Frames and Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1655-1666} }
Real-time High-fidelity Gaussian Human Avatars with Position-based Interpolation of Spatially Distributed MLPs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2025_CVPR, author = {Zhan, Youyi and Shao, Tianjia and Yang, Yin and Zhou, Kun}, title = {Real-time High-fidelity Gaussian Human Avatars with Position-based Interpolation of Spatially Distributed MLPs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26297-26307} }
ReconDreamer: Crafting World Models for Driving Scene Reconstruction via Online Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2025_CVPR, author = {Ni, Chaojun and Zhao, Guosheng and Wang, Xiaofeng and Zhu, Zheng and Qin, Wenkang and Huang, Guan and Liu, Chen and Chen, Yuyin and Wang, Yida and Zhang, Xueyang and Zhan, Yifei and Zhan, Kun and Jia, Peng and Lang, Xianpeng and Wang, Xingang and Mei, Wenjun}, title = {ReconDreamer: Crafting World Models for Driving Scene Reconstruction via Online Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1559-1569} }
RoboSense: Large-scale Dataset and Benchmark for Egocentric Robot Perception and Navigation in Crowded and Unstructured Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2025_CVPR, author = {Su, Haisheng and Song, Feixiang and Ma, Cong and Wu, Wei and Yan, Junchi}, title = {RoboSense: Large-scale Dataset and Benchmark for Egocentric Robot Perception and Navigation in Crowded and Unstructured Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27446-27455} }
Self-Supervised Large Scale Point Cloud Completion for Archaeological Site Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Aocheng and Zimmer-Dauphinee, James R. and Kalyanam, Rajesh and Lindsay, Ian and VanValkenburgh, Parker and Wernke, Steven and Aliaga, Daniel}, title = {Self-Supervised Large Scale Point Cloud Completion for Archaeological Site Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11759-11768} }
Chain of Attack: On the Robustness of Vision-Language Models Against Transfer-Based Adversarial Attacks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Peng and Bie, Yequan and Mao, Jianda and Song, Yangqiu and Wang, Yang and Chen, Hao and Chen, Kani}, title = {Chain of Attack: On the Robustness of Vision-Language Models Against Transfer-Based Adversarial Attacks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14679-14689} }
Rate-In: Information-Driven Adaptive Dropout Rates for Improved Inference-Time Uncertainty Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Zeevi_2025_CVPR, author = {Zeevi, Tal and Shwartz-Ziv, Ravid and LeCun, Yann and Staib, Lawrence H. and Onofrey, John A.}, title = {Rate-In: Information-Driven Adaptive Dropout Rates for Improved Inference-Time Uncertainty Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20757-20766} }
Thin-Shell-SfT: Fine-Grained Monocular Non-rigid 3D Surface Tracking with Neural Deformation Fields-
[pdf]
[supp]
[bibtex]@InProceedings{Kairanda_2025_CVPR, author = {Kairanda, Navami and Habermann, Marc and Naik, Shanthika and Theobalt, Christian and Golyanik, Vladislav}, title = {Thin-Shell-SfT: Fine-Grained Monocular Non-rigid 3D Surface Tracking with Neural Deformation Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11373-11383} }
DeCLIP: Decoupled Learning for Open-Vocabulary Dense Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Junjie and Chen, Bin and Li, Yulin and Kang, Bin and Chen, Yichi and Tian, Zhuotao}, title = {DeCLIP: Decoupled Learning for Open-Vocabulary Dense Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14824-14834} }
SocialGesture: Delving into Multi-person Gesture Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Xu and Virupaksha, Pranav and Jia, Wenqi and Lai, Bolin and Ryan, Fiona and Lee, Sangmin and Rehg, James M.}, title = {SocialGesture: Delving into Multi-person Gesture Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19509-19519} }
GenFusion: Closing the Loop between Reconstruction and Generation via Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Sibo and Xu, Congrong and Huang, Binbin and Geiger, Andreas and Chen, Anpei}, title = {GenFusion: Closing the Loop between Reconstruction and Generation via Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6078-6088} }
The PanAf-FGBG Dataset: Understanding the Impact of Backgrounds in Wildlife Behaviour Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Brookes_2025_CVPR, author = {Brookes, Otto and Kukushkin, Maksim and Mirmehdi, Majid and Stephens, Colleen and Dieguez, Paula and Hicks, Thurston C. and Jones, Sorrel and Lee, Kevin and McCarthy, Maureen S. and Meier, Amelia and Normand, Emmanuelle and Wessling, Erin G. and Wittig, Roman M. and Langergraber, Kevin and Zuberb\"uhler, Klaus and Boesch, Lukas and Schmid, Thomas and Arandjelovic, Mimi and K\"uhl, Hjalmar and Burghardt, Tilo}, title = {The PanAf-FGBG Dataset: Understanding the Impact of Backgrounds in Wildlife Behaviour Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5433-5443} }
Multi-modal Topology-embedded Graph Learning for Spatially Resolved Genes Prediction from Pathology Images with Prior Gene Similarity Information-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Hang and Chi, Changxi and Wan, Peng and Zhang, Daoqiang and Shao, Wei}, title = {Multi-modal Topology-embedded Graph Learning for Spatially Resolved Genes Prediction from Pathology Images with Prior Gene Similarity Information}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20810-20819} }
Question-Aware Gaussian Experts for Audio-Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Hongyeob and Jung, Inyoung and Suh, Dayoon and Zhang, Youjia and Lee, Sangmin and Hong, Sungeun}, title = {Question-Aware Gaussian Experts for Audio-Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13681-13690} }
Sonic: Shifting Focus to Global Audio Perception in Portrait Animation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ji_2025_CVPR, author = {Ji, Xiaozhong and Hu, Xiaobin and Xu, Zhihong and Zhu, Junwei and Lin, Chuming and He, Qingdong and Zhang, Jiangning and Luo, Donghao and Chen, Yi and Lin, Qin and Lu, Qinglin and Wang, Chengjie}, title = {Sonic: Shifting Focus to Global Audio Perception in Portrait Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {193-203} }
Multitwine: Multi-Object Compositing with Text and Layout Control-
[pdf]
[supp]
[bibtex]@InProceedings{Tarres_2025_CVPR, author = {Tarr\'es, Gemma Canet and Lin, Zhe and Zhang, Zhifei and Zhang, He and Gilbert, Andrew and Collomosse, John and Kim, Soo Ye}, title = {Multitwine: Multi-Object Compositing with Text and Layout Control}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8094-8104} }
DEFOM-Stereo: Depth Foundation Model Based Stereo Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Hualie and Lou, Zhiqiang and Ding, Laiyan and Xu, Rui and Tan, Minglang and Jiang, Wenjie and Huang, Rui}, title = {DEFOM-Stereo: Depth Foundation Model Based Stereo Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21857-21867} }
Adaptive Rectangular Convolution for Remote Sensing Pansharpening-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xueyang and Zheng, Zhixin and Shao, Jiandong and Duan, Yule and Deng, Liang-Jian}, title = {Adaptive Rectangular Convolution for Remote Sensing Pansharpening}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17872-17881} }
Video Depth without Video Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2025_CVPR, author = {Ke, Bingxin and Narnhofer, Dominik and Huang, Shengyu and Ke, Lei and Peters, Torben and Fragkiadaki, Katerina and Obukhov, Anton and Schindler, Konrad}, title = {Video Depth without Video Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7233-7243} }
PointLoRA: Low-Rank Adaptation with Token Selection for Point Cloud Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Song and Liu, Xiaolu and Kong, Lingdong and Xu, Jianyun and Hu, Chunyong and Fang, Gongfan and Li, Wentong and Zhu, Jianke and Wang, Xinchao}, title = {PointLoRA: Low-Rank Adaptation with Token Selection for Point Cloud Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6605-6615} }
HumanRig: Learning Automatic Rigging for Humanoid Character in a Large Scale Dataset-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chu_2025_CVPR, author = {Chu, Zedong and Xiong, Feng and Liu, Meiduo and Zhang, Jinzhi and Shao, Mingqi and Sun, Zhaoxu and Wang, Di and Xu, Mu}, title = {HumanRig: Learning Automatic Rigging for Humanoid Character in a Large Scale Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {304-313} }
GaussHDR: High Dynamic Range Gaussian Splatting via Learning Unified 3D and 2D Local Tone Mapping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jinfeng and Kong, Lingtong and Li, Bo and Xu, Dan}, title = {GaussHDR: High Dynamic Range Gaussian Splatting via Learning Unified 3D and 2D Local Tone Mapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5991-6000} }
UIBDiffusion: Universal Imperceptible Backdoor Attack for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Yuning and Zhao, Bingyin and Chu, Rui and Luo, Feng and Sikdar, Biplab and Lao, Yingjie}, title = {UIBDiffusion: Universal Imperceptible Backdoor Attack for Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19186-19196} }
DiskVPS: Vanishing Point Detector via Hough Transform in a Disk Region-
[pdf]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Jianping}, title = {DiskVPS: Vanishing Point Detector via Hough Transform in a Disk Region}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27049-27058} }
Seeing Far and Clearly: Mitigating Hallucinations in MLLMs with Attention Causal Decoding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Feilong and Liu, Chengzhi and Xu, Zhongxing and Hu, Ming and Huang, Zile and Xue, Haochen and Chen, Ziyang and Peng, Zelin and Yang, Zhiwei and Zhou, Sijin and Li, Wenxue and Li, Yulong and Song, Wenxuan and Su, Shiyan and Feng, Wei and Su, Jionglong and Lin, Mingquan and Peng, Yifan and Cheng, Xuelian and Razzak, Imran and Ge, Zongyuan}, title = {Seeing Far and Clearly: Mitigating Hallucinations in MLLMs with Attention Causal Decoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26147-26159} }
Towards Autonomous Micromobility through Scalable Urban Simulation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Wayne and He, Honglin and Zhang, Chaoyuan and He, Jack and Zhao, Seth Z. and Gong, Ran and Li, Quanyi and Zhou, Bolei}, title = {Towards Autonomous Micromobility through Scalable Urban Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27553-27563} }
FisherTune: Fisher-Guided Robust Tuning of Vision Foundation Models for Domain Generalized Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Dong and Li, Jinlong and Wang, Shuang and Wu, Mengyao and Zang, Qi and Sebe, Nicu and Zhong, Zhun}, title = {FisherTune: Fisher-Guided Robust Tuning of Vision Foundation Models for Domain Generalized Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15043-15054} }
Language-Assisted Debiasing and Smoothing for Foundation Model-Based Semi-Supervised Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Na and Song, Xuemeng and Dong, Xue and Ghosh, Aashish Nikhil and Nie, Liqiang and Zimmermann, Roger}, title = {Language-Assisted Debiasing and Smoothing for Foundation Model-Based Semi-Supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25708-25717} }
EdgeMovingNet: Edge-preserving Point Cloud Reconstruction via Joint Geometry Features-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Xinran and Ji, Donghao and Li, Yuanqi and Xie, Junyuan and Guo, Jie and Guo, Yanwen}, title = {EdgeMovingNet: Edge-preserving Point Cloud Reconstruction via Joint Geometry Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22150-22160} }
AdMiT: Adaptive Multi-Source Tuning in Dynamic Environments-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2025_CVPR, author = {Chang, Xiangyu and Niloy, Fahim Faisal and Ahmed, Sk Miraj and Krishnamurthy, Srikanth V. and Guler, Basak and Swami, Ananthram and Oymak, Samet and Roy-Chowdhury, Amit}, title = {AdMiT: Adaptive Multi-Source Tuning in Dynamic Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20569-20579} }
Unbiased Video Scene Graph Generation via Visual and Semantic Dual Debiasing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yanjun and Li, Zhaoyang and Chen, Honghui and Xu, Lizhi}, title = {Unbiased Video Scene Graph Generation via Visual and Semantic Dual Debiasing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19047-19056} }
Channel-wise Noise Scheduled Diffusion for Inverse Rendering in Indoor Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2025_CVPR, author = {Choi, JunYong and Sagong, Min-cheol and Lee, SeokYeong and Jung, Seung-Won and Kim, Ig-Jae and Cho, Junghyun}, title = {Channel-wise Noise Scheduled Diffusion for Inverse Rendering in Indoor Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5773-5782} }
Targeted Forgetting of Image Subgroups in CLIP Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zeliang and Liu, Gaowen and Fleming, Charles and Kompella, Ramana Rao and Xu, Chenliang}, title = {Targeted Forgetting of Image Subgroups in CLIP Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9870-9880} }
Unleashing In-context Learning of Autoregressive Models for Few-shot Image Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Bolin and Juefei-Xu, Felix and Liu, Miao and Dai, Xiaoliang and Mehta, Nikhil and Zhu, Chenguang and Huang, Zeyi and Rehg, James M. and Lee, Sangmin and Zhang, Ning and Xiao, Tong}, title = {Unleashing In-context Learning of Autoregressive Models for Few-shot Image Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18346-18357} }
Harnessing Frozen Unimodal Encoders for Flexible Multimodal Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maniparambil_2025_CVPR, author = {Maniparambil, Mayug and Akshulakov, Raiymbek and Djilali, Yasser Abdelaziz Dahou and Narayan, Sanath and Singh, Ankit and O'Connor, Noel E.}, title = {Harnessing Frozen Unimodal Encoders for Flexible Multimodal Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29847-29857} }
Feature Information Driven Position Gaussian Distribution Estimation for Tiny Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Bian_2025_CVPR, author = {Bian, Jinghao and Feng, Mingtao and Dong, Weisheng and Wu, Fangfang and Luo, Jianqiao and Wang, Yaonan and Shi, Guangming}, title = {Feature Information Driven Position Gaussian Distribution Estimation for Tiny Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30376-30386} }
Enhancing Diversity for Data-free Quantization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Kai and Zhuang, Zhihao and Zhang, Miao and Guo, Chenjuan and Shu, Yang and Yang, Bin}, title = {Enhancing Diversity for Data-free Quantization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20969-20978} }
SeqAfford: Sequential 3D Affordance Reasoning via Multimodal Large Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Chunlin and Wang, Hanqing and Shi, Ye and Luo, Haoyang and Yang, Sibei and Yu, Jingyi and Wang, Jingya}, title = {SeqAfford: Sequential 3D Affordance Reasoning via Multimodal Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1691-1701} }
DSV-LFS: Unifying LLM-Driven Semantic Cues with Visual Features for Robust Few-Shot Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Karimi_2025_CVPR, author = {Karimi, Amin and Poullis, Charalambos}, title = {DSV-LFS: Unifying LLM-Driven Semantic Cues with Visual Features for Robust Few-Shot Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4584-4594} }
Revisiting Generative Replay for Class Incremental Object Detection-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shizhou and Lv, Xueqiang and Xing, Yinghui and Wu, Qirui and Xu, Di and Zhang, Yanning}, title = {Revisiting Generative Replay for Class Incremental Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20340-20349} }
SemAlign3D: Semantic Correspondence between RGB-Images through Aligning 3D Object-Class Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wandel_2025_CVPR, author = {Wandel, Krispin and Wang, Hesheng}, title = {SemAlign3D: Semantic Correspondence between RGB-Images through Aligning 3D Object-Class Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1138-1147} }
Bridging Viewpoint Gaps: Geometric Reasoning Boosts Semantic Correspondence-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2025_CVPR, author = {Qian, Qiyang and Chen, Hansheng and Tomizuka, Masayoshi and Keutzer, Kurt and Wang, Qianqian and Xu, Chenfeng}, title = {Bridging Viewpoint Gaps: Geometric Reasoning Boosts Semantic Correspondence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11579-11589} }
DPU: Dynamic Prototype Updating for Multimodal Out-of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Shawn and Gong, Huixian and Dong, Hao and Yang, Tiankai and Tu, Zhengzhong and Zhao, Yue}, title = {DPU: Dynamic Prototype Updating for Multimodal Out-of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10193-10202} }
Towards Unbiased and Robust Spatio-Temporal Scene Graph Generation and Anticipation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peddi_2025_CVPR, author = {Peddi, Rohith and Saurabh, Saurabh and Shrivastava, Ayush Abhay and Singla, Parag and Gogate, Vibhav}, title = {Towards Unbiased and Robust Spatio-Temporal Scene Graph Generation and Anticipation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8648-8657} }
Spatial Transport Optimization by Repositioning Attention Map for Training-Free Text-to-Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Woojung and Lee, Yeonkyung and Kim, Chanyoung and Park, Kwanghyun and Hwang, Seong Jae}, title = {Spatial Transport Optimization by Repositioning Attention Map for Training-Free Text-to-Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18401-18410} }
From Alexnet to Transformers: Measuring the Non-linearity of Deep Neural Networks with Affine Optimal Transport-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bouniot_2025_CVPR, author = {Bouniot, Quentin and Redko, Ievgen and Mallasto, Anton and Laclau, Charlotte and Struckmeier, Oliver and Arndt, Karol and Heinonen, Markus and Kyrki, Ville and Kaski, Samuel}, title = {From Alexnet to Transformers: Measuring the Non-linearity of Deep Neural Networks with Affine Optimal Transport}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25250-25260} }
Prompt2Perturb (P2P): Text-Guided Diffusion-Based Adversarial Attack on Breast Ultrasound Images-
[pdf]
[arXiv]
[bibtex]@InProceedings{Medghalchi_2025_CVPR, author = {Medghalchi, Yasamin and Heidari, Moein and Allard, Clayton and Sigal, Leonid and Hacihaliloglu, Ilker}, title = {Prompt2Perturb (P2P): Text-Guided Diffusion-Based Adversarial Attack on Breast Ultrasound Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28564-28574} }
Boosting Domain Incremental Learning: Selecting the Optimal Parameters is All You Need-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Qiang and Song, Xiang and He, Yuhang and Han, Jizhou and Ding, Chenhao and Gao, Xinyuan and Gong, Yihong}, title = {Boosting Domain Incremental Learning: Selecting the Optimal Parameters is All You Need}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4839-4849} }
COAP: Memory-Efficient Training with Correlation-Aware Gradient Projection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Jinqi and Sang, Shen and Zhi, Tiancheng and Liu, Jing and Yan, Qing and Luo, Linjie and Yuan, Bo}, title = {COAP: Memory-Efficient Training with Correlation-Aware Gradient Projection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30116-30126} }
Perceptual Inductive Bias Is What You Need Before Contrastive Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Junru and Li, Tianqin and Jiang, Dunhan and Wu, Shenghao and Ramirez, Alan and Lee, Tai Sing}, title = {Perceptual Inductive Bias Is What You Need Before Contrastive Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9621-9630} }
FaceBench: A Multi-View Multi-Level Facial Attribute VQA Dataset for Benchmarking Face Perception MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xiaoqin and Ma, Xusen and Hou, Xianxu and Ding, Meidan and Li, Yudong and Chen, Junliang and Chen, Wenting and Peng, Xiaoyang and Shen, Linlin}, title = {FaceBench: A Multi-View Multi-Level Facial Attribute VQA Dataset for Benchmarking Face Perception MLLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9154-9164} }
EffiDec3D: An Optimized Decoder for High-Performance and Efficient 3D Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Rahman_2025_CVPR, author = {Rahman, Md Mostafijur and Marculescu, Radu}, title = {EffiDec3D: An Optimized Decoder for High-Performance and Efficient 3D Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10435-10444} }
Exploring Historical Information for RGBE Visual Tracking with Mamba-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Chuanyu and Zhang, Jiqing and Wang, Yang and Ge, Huilin and Xia, Qianchen and Yin, Baocai and Yang, Xin}, title = {Exploring Historical Information for RGBE Visual Tracking with Mamba}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6500-6509} }
Gyro-based Neural Single Image Deblurring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Heemin and Rim, Jaesung and Lee, Seungyong and Baek, Seung-Hwan and Cho, Sunghyun}, title = {Gyro-based Neural Single Image Deblurring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23111-23120} }
ArtiScene: Language-Driven Artistic 3D Scene Generation Through Image Intermediary-
[pdf]
[supp]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Zeqi and Cui, Yin and Li, Zhaoshuo and Wei, Fangyin and Ge, Yunhao and Gu, Jinwei and Liu, Ming-Yu and Davis, Abe and Ding, Yifan}, title = {ArtiScene: Language-Driven Artistic 3D Scene Generation Through Image Intermediary}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2891-2901} }
MobileH2R: Learning Generalizable Human to Mobile Robot Handover Exclusively from Scalable and Diverse Synthetic Data-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zifan and Chen, Ziqing and Chen, Junyu and Wang, Jilong and Yang, Yuxin and Liu, Yunze and Liu, Xueyi and Wang, He and Yi, Li}, title = {MobileH2R: Learning Generalizable Human to Mobile Robot Handover Exclusively from Scalable and Diverse Synthetic Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17315-17325} }
Improving Sound Source Localization with Joint Slot Attention on Image and Audio-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Inho and Song, Youngkil and Park, Jicheol and Kim, Won Hwa and Kwak, Suha}, title = {Improving Sound Source Localization with Joint Slot Attention on Image and Audio}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3121-3130} }
Improved Monocular Depth Prediction Using Distance Transform Over Pre-semantic Contours with Self-supervised Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Hariat_2025_CVPR, author = {Hariat, Marwane and Manzanera, Antoine and Filliat, David}, title = {Improved Monocular Depth Prediction Using Distance Transform Over Pre-semantic Contours with Self-supervised Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21868-21879} }
Feature-Preserving Mesh Decimation for Normal Integration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Heep_2025_CVPR, author = {Heep, Moritz and Behnke, Sven and Zell, Eduard}, title = {Feature-Preserving Mesh Decimation for Normal Integration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5783-5792} }
Is this Generated Person Existed in Real-world? Fine-grained Detecting and Calibrating Abnormal Human-body-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zeqing and Ma, Qingyang and Wan, Wentao and Li, Haojie and Wang, Keze and Tian, Yonghong}, title = {Is this Generated Person Existed in Real-world? Fine-grained Detecting and Calibrating Abnormal Human-body}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21226-21237} }
PERSE: Personalized 3D Generative Avatars from A Single Portrait-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cha_2025_CVPR, author = {Cha, Hyunsoo and Lee, Inhee and Joo, Hanbyul}, title = {PERSE: Personalized 3D Generative Avatars from A Single Portrait}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15953-15962} }
Automated Generation of Challenging Multiple-Choice Questions for Vision Language Model Evaluation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yuhui and Su, Yuchang and Liu, Yiming and Wang, Xiaohan and Burgess, James and Sui, Elaine and Wang, Chenyu and Aklilu, Josiah and Lozano, Alejandro and Wei, Anjiang and Schmidt, Ludwig and Yeung-Levy, Serena}, title = {Automated Generation of Challenging Multiple-Choice Questions for Vision Language Model Evaluation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29580-29590} }
DexHandDiff: Interaction-aware Diffusion Planning for Adaptive Dexterous Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Zhixuan and Mu, Yao and Wang, Yixiao and Chen, Tianxing and Shao, Wenqi and Zhan, Wei and Tomizuka, Masayoshi and Luo, Ping and Ding, Mingyu}, title = {DexHandDiff: Interaction-aware Diffusion Planning for Adaptive Dexterous Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1745-1755} }
VerbDiff: Text-Only Diffusion Models with Enhanced Interaction Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cha_2025_CVPR, author = {Cha, SeungJu and Lee, Kwanyoung and Kim, Ye-Chan and Oh, Hyunwoo and Kim, Dong-Jin}, title = {VerbDiff: Text-Only Diffusion Models with Enhanced Interaction Awareness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8041-8050} }
ROLL: Robust Noisy Pseudo-label Learning for Multi-View Clustering with Noisy Correspondence-
[pdf]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Yuan and Li, Yongxiang and Ren, Zhenwen and Duan, Guiduo and Peng, Dezhong and Hu, Peng}, title = {ROLL: Robust Noisy Pseudo-label Learning for Multi-View Clustering with Noisy Correspondence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30732-30741} }
Towards In-the-wild 3D Plane Reconstruction from a Single Image-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jiachen and Yu, Rui and Chen, Sili and Huang, Sharon X. and Guo, Hengkai}, title = {Towards In-the-wild 3D Plane Reconstruction from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27027-27037} }
Memories of Forgotten Concepts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rusanovsky_2025_CVPR, author = {Rusanovsky, Matan and Malnick, Shimon and Jevnisek, Amir and Fried, Ohad and Avidan, Shai}, title = {Memories of Forgotten Concepts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2966-2975} }
Dynamic Stereotype Theory Induced Micro-expression Recognition with Oriented Deformation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Bohao and Wang, Xuejiao and Wang, Changbo and He, Gaoqi}, title = {Dynamic Stereotype Theory Induced Micro-expression Recognition with Oriented Deformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10701-10711} }
PQPP: A Joint Benchmark for Text-to-Image Prompt and Query Performance Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Poesina_2025_CVPR, author = {Poesina, Eduard and Costache, Adriana Valentina and Chifu, Adrian-Gabriel and Mothe, Josiane and Ionescu, Radu Tudor}, title = {PQPP: A Joint Benchmark for Text-to-Image Prompt and Query Performance Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28651-28661} }
CheXwhatsApp: A Dataset for Exploring Challenges in the Diagnosis of Chest X-rays through Mobile Devices-
[pdf]
[supp]
[bibtex]@InProceedings{Antony_2025_CVPR, author = {Antony, Mariamma and Porana, Rajiv and Lathiya, Sahil M and Kakileti, Siva Teja and Bhattacharyya, Chiranjib}, title = {CheXwhatsApp: A Dataset for Exploring Challenges in the Diagnosis of Chest X-rays through Mobile Devices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25887-25896} }
PSBD: Prediction Shift Uncertainty Unlocks Backdoor Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Wei and Chen, Pin-Yu and Liu, Sijia and Wang, Ren}, title = {PSBD: Prediction Shift Uncertainty Unlocks Backdoor Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10255-10264} }
Degradation-Aware Feature Perturbation for All-in-One Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Xiangpeng and Liao, Xiangyu and Liu, Xiao and Li, Meng and Ren, Chao}, title = {Degradation-Aware Feature Perturbation for All-in-One Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28165-28175} }
ACL: Activating Capability of Linear Attention for Image Restoration-
[pdf]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Yubin and Meng, Yuan and Ji, Jiayi and Sun, Xiaoshuai}, title = {ACL: Activating Capability of Linear Attention for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17913-17923} }
GenDeg: Diffusion-based Degradation Synthesis for Generalizable All-In-One Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rajagopalan_2025_CVPR, author = {Rajagopalan, Sudarshan and Nair, Nithin Gopalakrishnan and Paranjape, Jay N. and Patel, Vishal M.}, title = {GenDeg: Diffusion-based Degradation Synthesis for Generalizable All-In-One Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28144-28154} }
Phoenix: A Motion-based Self-Reflection Framework for Fine-grained Robotic Action Correction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Wenke and Feng, Ruoxuan and Wang, Dong and Hu, Di}, title = {Phoenix: A Motion-based Self-Reflection Framework for Fine-grained Robotic Action Correction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6981-6990} }
The Power of Context: How Multimodality Improves Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mei_2025_CVPR, author = {Mei, Kangfu and Talebi, Hossein and Ardakani, Mojtaba and Patel, Vishal M. and Milanfar, Peyman and Delbracio, Mauricio}, title = {The Power of Context: How Multimodality Improves Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23141-23152} }
MARBLE: Material Recomposition and Blending in CLIP-Space-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Ta Ying and Sharma, Prafull and Boss, Mark and Jampani, Varun}, title = {MARBLE: Material Recomposition and Blending in CLIP-Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13061-13071} }
Multirate Neural Image Compression with Adaptive Lattice Vector Quantization-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Hao and Wu, Xiaolin and Zhang, Xi}, title = {Multirate Neural Image Compression with Adaptive Lattice Vector Quantization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7633-7642} }
EventFly: Event Camera Perception from Ground to the Sky-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2025_CVPR, author = {Kong, Lingdong and Lu, Dongyue and Xu, Xiang and Ng, Lai Xing and Ooi, Wei Tsang and Cottereau, Benoit R.}, title = {EventFly: Event Camera Perception from Ground to the Sky}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1472-1484} }
Detect Any Mirrors: Boosting Learning Reliability on Large-Scale Unlabeled Data with an Iterative Data Engine-
[pdf]
[supp]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Zhaohu and Liu, Lihao and Yang, Yijun and Wang, Hongqiu and Ye, Tian and Chen, Sixiang and Li, Wenxue and Liu, Guang and Zhu, Lei}, title = {Detect Any Mirrors: Boosting Learning Reliability on Large-Scale Unlabeled Data with an Iterative Data Engine}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25476-25486} }
CH3Depth: Efficient and Flexible Depth Foundation Model with Flow Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiaqi and Wang, Yiran and Zheng, Jinghong and Zhang, Junrui and Shen, Liao and Liu, Tianqi and Cao, Zhiguo}, title = {CH3Depth: Efficient and Flexible Depth Foundation Model with Flow Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7222-7232} }
Pow3R: Empowering Unconstrained 3D Reconstruction with Camera and Scene Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Wonbong and Weinzaepfel, Philippe and Leroy, Vincent and Agapito, Lourdes and Revaud, Jerome}, title = {Pow3R: Empowering Unconstrained 3D Reconstruction with Camera and Scene Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1071-1081} }
Efficient Visual State Space Model for Image Deblurring-
[pdf]
[supp]
[bibtex]@InProceedings{Kong_2025_CVPR, author = {Kong, Lingshun and Dong, Jiangxin and Tang, Jinhui and Yang, Ming-Hsuan and Pan, Jinshan}, title = {Efficient Visual State Space Model for Image Deblurring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12710-12719} }
4D LangSplat: 4D Language Gaussian Splatting via Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Wanhua and Zhou, Renping and Zhou, Jiawei and Song, Yingwei and Herter, Johannes and Qin, Minghan and Huang, Gao and Pfister, Hanspeter}, title = {4D LangSplat: 4D Language Gaussian Splatting via Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22001-22011} }
MambaVLT: Time-Evolving Multimodal State Space Model for Vision-Language Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xinqi and Zhou, Li and Zhou, Zikun and Chen, Jianqiu and He, Zhenyu}, title = {MambaVLT: Time-Evolving Multimodal State Space Model for Vision-Language Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8731-8741} }
Enhancing 3D Gaze Estimation in the Wild using Weak Supervision with Gaze Following Labels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vuillecard_2025_CVPR, author = {Vuillecard, Pierre and Odobez, Jean-Marc}, title = {Enhancing 3D Gaze Estimation in the Wild using Weak Supervision with Gaze Following Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13508-13518} }
Reward Fine-Tuning Two-Step Diffusion Models via Learning Differentiable Latent-Space Surrogate Reward-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2025_CVPR, author = {Jia, Zhiwei and Nan, Yuesong and Zhao, Huixi and Liu, Gengdai}, title = {Reward Fine-Tuning Two-Step Diffusion Models via Learning Differentiable Latent-Space Surrogate Reward}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12912-12922} }
Detecting Out-of-Distribution Through the Lens of Neural Collapse-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Litian and Qin, Yao}, title = {Detecting Out-of-Distribution Through the Lens of Neural Collapse}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15424-15433} }
Adaptive Parameter Selection for Tuning Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yi and Deng, Yi-Xuan and Guo, Meng-Hao and Hu, Shi-Min}, title = {Adaptive Parameter Selection for Tuning Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4280-4290} }
MotionMap: Representing Multimodality in Human Pose Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hosseininejad_2025_CVPR, author = {Hosseininejad, Reyhaneh and Shukla, Megh and Saadatnejad, Saeed and Salzmann, Mathieu and Alahi, Alexandre}, title = {MotionMap: Representing Multimodality in Human Pose Forecasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22680-22689} }
Learning-enabled Polynomial Lyapunov Function Synthesis via High-Accuracy Counterexample-Guided Framework-
[pdf]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Hanrui and Qi, Niuniu and Ren, Mengxin and Liu, Banglong and Shi, Shuming and Yang, Zhengfeng}, title = {Learning-enabled Polynomial Lyapunov Function Synthesis via High-Accuracy Counterexample-Guided Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10275-10284} }
Factored-NeuS: Reconstructing Surfaces, Illumination, and Materials of Possibly Glossy Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Yue and Fan, Ningjing and Skorokhodov, Ivan and Voynov, Oleg and Ignatyev, Savva and Burnaev, Evgeny and Wonka, Peter and Wang, Yiqun}, title = {Factored-NeuS: Reconstructing Surfaces, Illumination, and Materials of Possibly Glossy Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21317-21327} }
GaussianSpa: An "Optimizing-Sparsifying" Simplification Framework for Compact and High-Quality 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yangming and Jia, Wenqi and Niu, Wei and Yin, Miao}, title = {GaussianSpa: An ''Optimizing-Sparsifying'' Simplification Framework for Compact and High-Quality 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26673-26682} }
Sparse2DGS: Geometry-Prioritized Gaussian Splatting for Surface Reconstruction from Sparse Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Jiang and Li, Rui and Zhu, Yu and Guo, Rong and Sun, Jinqiu and Zhang, Yanning}, title = {Sparse2DGS: Geometry-Prioritized Gaussian Splatting for Surface Reconstruction from Sparse Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11307-11316} }
VinTAGe: Joint Video and Text Conditioning for Holistic Audio Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kushwaha_2025_CVPR, author = {Kushwaha, Saksham Singh and Tian, Yapeng}, title = {VinTAGe: Joint Video and Text Conditioning for Holistic Audio Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13529-13539} }
Efficient Decoupled Feature 3D Gaussian Splatting via Hierarchical Compression-
[pdf]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Zhenqi and Liu, Ting and Zhang, Yanning}, title = {Efficient Decoupled Feature 3D Gaussian Splatting via Hierarchical Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11156-11166} }
CountLLM: Towards Generalizable Repetitive Action Counting via Large Language Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yao_2025_CVPR, author = {Yao, Ziyu and Cheng, Xuxin and Huang, Zhiqi and Li, Lei}, title = {CountLLM: Towards Generalizable Repetitive Action Counting via Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19143-19153} }
Navigating the Unseen: Zero-shot Scene Graph Generation via Capsule-Based Equivariant Features-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Wenhuan and JI, Yi and Zhu, Guiqian and Ying, Li and Liu, Chunping}, title = {Navigating the Unseen: Zero-shot Scene Graph Generation via Capsule-Based Equivariant Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29448-29457} }
VL-RewardBench: A Challenging Benchmark for Vision-Language Generative Reward Models-
[pdf]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Lei and Wei, Yuancheng and Xie, Zhihui and Yang, Xuqing and Song, Yifan and Wang, Peiyi and An, Chenxin and Liu, Tianyu and Li, Sujian and Lin, Bill Yuchen and Kong, Lingpeng and Liu, Qi}, title = {VL-RewardBench: A Challenging Benchmark for Vision-Language Generative Reward Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24657-24668} }
ASHiTA: Automatic Scene-grounded HIerarchical Task Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2025_CVPR, author = {Chang, Yun and Fermoselle, Leonor and Ta, Duy and Bucher, Bernadette and Carlone, Luca and Wang, Jiuguang}, title = {ASHiTA: Automatic Scene-grounded HIerarchical Task Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29458-29468} }
Patient-Level Anatomy Meets Scanning-Level Physics: Personalized Federated Low-Dose CT Denoising Empowered by Large Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Ziyuan and Chen, Yingyu and Wang, Zhiwen and Shan, Hongming and Chen, Yang and Zhang, Yi}, title = {Patient-Level Anatomy Meets Scanning-Level Physics: Personalized Federated Low-Dose CT Denoising Empowered by Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5154-5163} }
Exploiting Deblurring Networks for Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2025_CVPR, author = {Choi, Haeyun and Yang, Heemin and Han, Janghyeok and Cho, Sunghyun}, title = {Exploiting Deblurring Networks for Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6012-6021} }
Rethinking Lanes and Points in Complex Scenarios for Monocular 3D Lane Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chang_2025_CVPR, author = {Chang, Yifan and Huang, Junjie and Wang, Xiaofeng and Ye, Yun and Liang, Zhujin and Shan, Yi and Du, Dalong and Wang, Xingang}, title = {Rethinking Lanes and Points in Complex Scenarios for Monocular 3D Lane Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6802-6811} }
SPAR3D: Stable Point-Aware Reconstruction of 3D Objects from Single Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zixuan and Boss, Mark and Vasishta, Aaryaman and Rehg, James M. and Jampani, Varun}, title = {SPAR3D: Stable Point-Aware Reconstruction of 3D Objects from Single Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16860-16870} }
Discovering Fine-Grained Visual-Concept Relations by Disentangled Optimal Transport Concept Bottleneck Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Yan and Zeng, Zequn and Zhang, Hao and Ding, Yucheng and Wang, Yi and Wang, Zhengjue and Chen, Bo and Liu, Hongwei}, title = {Discovering Fine-Grained Visual-Concept Relations by Disentangled Optimal Transport Concept Bottleneck Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30199-30209} }
Focus-N-Fix: Region-Aware Fine-Tuning for Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Xiaoying and Saha, Avinab and He, Junfeng and Hao, Susan and Vicol, Paul and Ryu, Moonkyung and Li, Gang and Singla, Sahil and Young, Sarah and Li, Yinxiao and Yang, Feng and Ramachandran, Deepak}, title = {Focus-N-Fix: Region-Aware Fine-Tuning for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18486-18496} }
RoomTour3D: Geometry-Aware Video-Instruction Tuning for Embodied Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Mingfei and Ma, Liang and Zhumakhanova, Kamila and Radionova, Ekaterina and Zhang, Jingyi and Chang, Xiaojun and Liang, Xiaodan and Laptev, Ivan}, title = {RoomTour3D: Geometry-Aware Video-Instruction Tuning for Embodied Navigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27586-27596} }
PSA-SSL: Pose and Size-aware Self-Supervised Learning on LiDAR Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Nisar_2025_CVPR, author = {Nisar, Barza and Waslander, Steven L.}, title = {PSA-SSL: Pose and Size-aware Self-Supervised Learning on LiDAR Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6670-6679} }
Bringing CLIP to the Clinic: Dynamic Soft Labels and Negation-Aware Learning for Medical Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ko_2025_CVPR, author = {Ko, Hanbin and Park, Chang-Min}, title = {Bringing CLIP to the Clinic: Dynamic Soft Labels and Negation-Aware Learning for Medical Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25897-25906} }
SnapGen: Taming High-Resolution Text-to-Image Models for Mobile Devices with Efficient Architectures and Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Jierun and Hu, Dongting and Huang, Xijie and Coskun, Huseyin and Sahni, Arpit and Gupta, Aarush and Goyal, Anujraaj and Lahiri, Dishani and Singh, Rajesh and Idelbayev, Yerlan and Cao, Junli and Li, Yanyu and Cheng, Kwang-Ting and Chan, S.-H. Gary and Gong, Mingming and Tulyakov, Sergey and Kag, Anil and Xu, Yanwu and Ren, Jian}, title = {SnapGen: Taming High-Resolution Text-to-Image Models for Mobile Devices with Efficient Architectures and Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7997-8008} }
Label Shift Meets Online Learning: Ensuring Consistent Adaptation with Universal Dynamic Regret-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Yucong and Gu, Shilin and Fan, Ruidong and Xu, Chao and Hou, Chenping}, title = {Label Shift Meets Online Learning: Ensuring Consistent Adaptation with Universal Dynamic Regret}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15392-15401} }
A Physics-Informed Blur Learning Framework for Imaging Systems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Liqun and Li, Yuxuan and Dai, Jun and Gu, Jinwei and Xue, Tianfan}, title = {A Physics-Informed Blur Learning Framework for Imaging Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10913-10922} }
A Semantic Knowledge Complementarity based Decoupling Framework for Semi-supervised Class-imbalanced Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zheng and Yin, Guanchun and Zhang, Bo and Liu, Wu and Zhou, Xiuzhuang and Wang, Wendong}, title = {A Semantic Knowledge Complementarity based Decoupling Framework for Semi-supervised Class-imbalanced Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25940-25949} }
Community Forensics: Using Thousands of Generators to Train Fake Image Detectors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Jeongsoo and Owens, Andrew}, title = {Community Forensics: Using Thousands of Generators to Train Fake Image Detectors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8245-8257} }
ModeSeq: Taming Sparse Multimodal Motion Prediction with Sequential Mode Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zikang and Zhou, Hengjian and Hu, Haibo and Wen, Zihao and Wang, Jianping and Li, Yung-Hui and Huang, Yu-Kai}, title = {ModeSeq: Taming Sparse Multimodal Motion Prediction with Sequential Mode Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1612-1621} }
Quaffure: Real-Time Quasi-Static Neural Hair Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stuyck_2025_CVPR, author = {Stuyck, Tuur and Lin, Gene Wei-Chin and Larionov, Egor and Chen, Hsiao-yu and Bozic, Aljaz and Sarafianos, Nikolaos and Roble, Doug}, title = {Quaffure: Real-Time Quasi-Static Neural Hair Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {239-249} }
Towards Practical Real-Time Neural Video Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2025_CVPR, author = {Jia, Zhaoyang and Li, Bin and Li, Jiahao and Xie, Wenxuan and Qi, Linfeng and Li, Houqiang and Lu, Yan}, title = {Towards Practical Real-Time Neural Video Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12543-12552} }
DepthSplat: Connecting Gaussian Splatting and Depth-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Haofei and Peng, Songyou and Wang, Fangjinhua and Blum, Hermann and Barath, Daniel and Geiger, Andreas and Pollefeys, Marc}, title = {DepthSplat: Connecting Gaussian Splatting and Depth}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16453-16463} }
LumiNet: Latent Intrinsics Meets Diffusion Models for Indoor Scene Relighting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Xiaoyan and Groh, Konrad and Karaoglu, Sezer and Gevers, Theo and Bhattad, Anand}, title = {LumiNet: Latent Intrinsics Meets Diffusion Models for Indoor Scene Relighting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {442-452} }
FedBiP: Heterogeneous One-Shot Federated Learning with Personalized Latent Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Haokun and Li, Hang and Zhang, Yao and Bi, Jinhe and Zhang, Gengyuan and Zhang, Yueqi and Torr, Philip and Gu, Jindong and Krompass, Denis and Tresp, Volker}, title = {FedBiP: Heterogeneous One-Shot Federated Learning with Personalized Latent Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30440-30450} }
DiC: Rethinking Conv3x3 Designs in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Yuchuan and Han, Jing and Wang, Chengcheng and Liang, Yuchen and Xu, Chao and Chen, Hanting}, title = {DiC: Rethinking Conv3x3 Designs in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2469-2478} }
Dynamic Camera Poses and Where to Find Them-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rockwell_2025_CVPR, author = {Rockwell, Chris and Tung, Joseph and Lin, Tsung-Yi and Liu, Ming-Yu and Fouhey, David F. and Lin, Chen-Hsuan}, title = {Dynamic Camera Poses and Where to Find Them}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12444-12455} }
MoSca: Dynamic Gaussian Fusion from Casual Videos via 4D Motion Scaffolds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2025_CVPR, author = {Lei, Jiahui and Weng, Yijia and Harley, Adam W. and Guibas, Leonidas and Daniilidis, Kostas}, title = {MoSca: Dynamic Gaussian Fusion from Casual Videos via 4D Motion Scaffolds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6165-6177} }
GCE-Pose: Global Context Enhancement for Category-level Object Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Weihang and XU, Hongli and Huang, Junwen and Jung, Hyunjun and Yu, Peter KT and Navab, Nassir and Busam, Benjamin}, title = {GCE-Pose: Global Context Enhancement for Category-level Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27154-27165} }
OmniGen: Unified Image Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Shitao and Wang, Yueze and Zhou, Junjie and Yuan, Huaying and Xing, Xingrun and Yan, Ruiran and Li, Chaofan and Wang, Shuting and Huang, Tiejun and Liu, Zheng}, title = {OmniGen: Unified Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13294-13304} }
QuCOOP: A Versatile Framework for Solving Composite and Binary-Parametrised Problems on Quantum Annealers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meli_2025_CVPR, author = {Meli, Natacha Kuete and Golyanik, Vladislav and Benkner, Marcel Seelbach and Moeller, Michael}, title = {QuCOOP: A Versatile Framework for Solving Composite and Binary-Parametrised Problems on Quantum Annealers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11395-11405} }
Mesh Mamba: A Unified State Space Model for Saliency Prediction in Non-Textured and Textured Meshes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Kaiwei and Zhu, Dandan and Min, Xiongkuo and Zhai, Guangtao}, title = {Mesh Mamba: A Unified State Space Model for Saliency Prediction in Non-Textured and Textured Meshes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16219-16228} }
Not Just Text: Uncovering Vision Modality Typographic Threats in Image Generation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Hao and Xiao, Erjia and Yang, Jiayan and Cao, Jiahang and Zhang, Qiang and Zhang, Jize and Xu, Kaidi and Gu, Jindong and Xu, Renjing}, title = {Not Just Text: Uncovering Vision Modality Typographic Threats in Image Generation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2997-3007} }
SILMM: Self-Improving Large Multimodal Models for Compositional Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Leigang and Li, Haochuan and Wang, Wenjie and Liu, Xiang and Li, Juncheng and Nie, Liqiang and Chua, Tat-Seng}, title = {SILMM: Self-Improving Large Multimodal Models for Compositional Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18497-18508} }
Calibrated Multi-Preference Optimization for Aligning Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Kyungmin and Li, Xiahong and Wang, Qifei and He, Junfeng and Ke, Junjie and Yang, Ming-Hsuan and Essa, Irfan and Shin, Jinwoo and Yang, Feng and Li, Yinxiao}, title = {Calibrated Multi-Preference Optimization for Aligning Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18465-18475} }
Learning from Neighbors: Category Extrapolation for Long-Tail Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shizhen and Wen, Xin and Liu, Jiahui and Ma, Chuofan and Yuan, Chunfeng and Qi, Xiaojuan}, title = {Learning from Neighbors: Category Extrapolation for Long-Tail Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30483-30492} }
Material Anything: Generating Materials for Any 3D Object via Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Xin and Wang, Tengfei and Liu, Ziwei and Wang, Qing}, title = {Material Anything: Generating Materials for Any 3D Object via Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26556-26565} }
TokenHSI: Unified Synthesis of Physical Human-Scene Interactions through Task Tokenization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2025_CVPR, author = {Pan, Liang and Yang, Zeshi and Dou, Zhiyang and Wang, Wenjia and Huang, Buzhen and Dai, Bo and Komura, Taku and Wang, Jingbo}, title = {TokenHSI: Unified Synthesis of Physical Human-Scene Interactions through Task Tokenization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5379-5391} }
ShapeShifter: 3D Variations Using Multiscale and Sparse Point-Voxel Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maruani_2025_CVPR, author = {Maruani, Nissim and Yifan, Wang and Fisher, Matthew and Alliez, Pierre and Desbrun, Mathieu}, title = {ShapeShifter: 3D Variations Using Multiscale and Sparse Point-Voxel Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {605-617} }
ImagineFSL: Self-Supervised Pretraining Matters on Imagined Base Set for VLM-based Few-shot Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Haoyuan and Li, Xiaoou and Lv, Jiaming and Cheng, Xianjun and Wang, Qilong and Li, Peihua}, title = {ImagineFSL: Self-Supervised Pretraining Matters on Imagined Base Set for VLM-based Few-shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30020-30031} }
Continuous Locomotive Crowd Behavior Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Bae_2025_CVPR, author = {Bae, Inhwan and Lee, Junoh and Jeon, Hae-Gon}, title = {Continuous Locomotive Crowd Behavior Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22416-22431} }
Project-Probe-Aggregate: Efficient Fine-Tuning for Group Robustness-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Beier and Cui, Jiequan and Zhang, Hanwang and Zhang, Chi}, title = {Project-Probe-Aggregate: Efficient Fine-Tuning for Group Robustness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25487-25496} }
Implicit Bias Injection Attacks against Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Huayang and Jin, Xiangye and Miao, Jiaxu and Wu, Yu}, title = {Implicit Bias Injection Attacks against Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28779-28789} }
ROICtrl: Boosting Instance Control for Visual Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Yuchao and Zhou, Yipin and Ye, Yunfan and Nie, Yixin and Yu, Licheng and Ma, Pingchuan and Lin, Kevin Qinghong and Shou, Mike Zheng}, title = {ROICtrl: Boosting Instance Control for Visual Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23658-23667} }
FRESA: Feedforward Reconstruction of Personalized Skinned Avatars from Few Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Rong and Prada, Fabian and Wang, Ziyan and Jiang, Zhongshi and Yin, Chengxiang and Li, Junxuan and Saito, Shunsuke and Santesteban, Igor and Romero, Javier and Joshi, Rohan and Li, Hongdong and Saragih, Jason and Sheikh, Yaser}, title = {FRESA: Feedforward Reconstruction of Personalized Skinned Avatars from Few Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {281-291} }
ReasonGrounder: LVLM-Guided Hierarchical Feature Splatting for Open-Vocabulary 3D Visual Grounding and Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhenyang and Wang, Yikai and Zheng, Sixiao and Pan, Tongying and Liang, Longfei and Fu, Yanwei and Xue, Xiangyang}, title = {ReasonGrounder: LVLM-Guided Hierarchical Feature Splatting for Open-Vocabulary 3D Visual Grounding and Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3718-3727} }
Cropper: Vision-Language Model for Image Cropping through In-Context Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Seung Hyun and Jiang, Jijun and Xu, Yiran and Li, Zhuofang and Ke, Junjie and Li, Yinxiao and He, Junfeng and Hickson, Steven and Datsenko, Katie and Kim, Sangpil and Yang, Ming-Hsuan and Essa, Irfan and Yang, Feng}, title = {Cropper: Vision-Language Model for Image Cropping through In-Context Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30010-30019} }
Advancing Adversarial Robustness in GNeRFs: The IL2-NeRF Attack-
[pdf]
[supp]
[bibtex]@InProceedings{Meng_2025_CVPR, author = {Meng, Nicole and Manicke, Caleb and Sahu, Ronak and Ding, Caiwen and Lao, Yingjie}, title = {Advancing Adversarial Robustness in GNeRFs: The IL2-NeRF Attack}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16388-16397} }
WonderWorld: Interactive 3D Scene Generation from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Hong-Xing and Duan, Haoyi and Herrmann, Charles and Freeman, William T. and Wu, Jiajun}, title = {WonderWorld: Interactive 3D Scene Generation from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5916-5926} }
A Lightweight UDF Learning Framework for 3D Reconstruction Based on Local Shape Functions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Jiangbei and Li, Yanggeng and Hou, Fei and Hou, Junhui and Zhang, Zhebin and Wang, Shengfa and Lei, Na and He, Ying}, title = {A Lightweight UDF Learning Framework for 3D Reconstruction Based on Local Shape Functions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1297-1307} }
DiffCAM: Data-Driven Saliency Maps by Capturing Feature Differences-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xingjian and Zhao, Qiming and Bisht, Neelesh and Uddin, Mostofa Rafid and Kim, Jin Yu and Zhang, Bryan and Xu, Min}, title = {DiffCAM: Data-Driven Saliency Maps by Capturing Feature Differences}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10327-10337} }
PolarNeXt: Rethink Instance Segmentation with Polar Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Jiacheng and Zhou, Xinghong and Wu, Yiqiang and Zhu, Bin and Lu, Jiaxuan and Qin, Yu and Li, Xiaomao}, title = {PolarNeXt: Rethink Instance Segmentation with Polar Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19315-19324} }
ScaMo: Exploring the Scaling Law in Autoregressive Motion Generation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Shunlin and Wang, Jingbo and Lu, Zeyu and Chen, Ling-Hao and Dai, Wenxun and Dong, Junting and Dou, Zhiyang and Dai, Bo and Zhang, Ruimao}, title = {ScaMo: Exploring the Scaling Law in Autoregressive Motion Generation Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27872-27882} }
From Sparse Signal to Smooth Motion: Real-Time Motion Generation with Rolling Prediction Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Barquero_2025_CVPR, author = {Barquero, German and Bertsch, Nadine and Marramreddy, Manojkumar and Chac\'on, Carlos and Arcadu, Filippo and Rigual, Ferran and He, Nicky Sijia and Palmero, Cristina and Escalera, Sergio and Ye, Yuting and Kips, Robin}, title = {From Sparse Signal to Smooth Motion: Real-Time Motion Generation with Rolling Prediction Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1850-1860} }
Imagine and Seek: Improving Composed Image Retrieval with an Imagined Proxy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, You and Ma, Fan and Yang, Yi}, title = {Imagine and Seek: Improving Composed Image Retrieval with an Imagined Proxy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3984-3993} }
EMOVA: Empowering Language Models to See, Hear and Speak with Vivid Emotions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Kai and Gou, Yunhao and Huang, Runhui and Liu, Zhili and Tan, Daxin and Xu, Jing and Wang, Chunwei and Zhu, Yi and Zeng, Yihan and Yang, Kuo and Wang, Dingdong and Xiang, Kun and Li, Haoyuan and Bai, Haoli and Han, Jianhua and Li, Xiaohui and Jin, Weike and Xie, Nian and Zhang, Yu and Kwok, James T. and Zhao, Hengshuang and Liang, Xiaodan and Yeung, Dit-Yan and Chen, Xiao and Li, Zhenguo and Zhang, Wei and Liu, Qun and Hong, Lanqing and Hou, Lu and Xu, Hang}, title = {EMOVA: Empowering Language Models to See, Hear and Speak with Vivid Emotions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5455-5466} }
SAM-REF: Introducing Image-Prompt Synergy during Interaction for Detail Enhancement in the Segment Anything Model-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Chongkai and Liu, Ting and Li, Anqi and Qu, Xiaochao and Wu, Chengjing and Liu, Luoqi and Hu, Xiaolin}, title = {SAM-REF: Introducing Image-Prompt Synergy during Interaction for Detail Enhancement in the Segment Anything Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19356-19365} }
DarkIR: Robust Low-Light Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feijoo_2025_CVPR, author = {Feijoo, Daniel and Benito, Juan C. and Garcia, Alvaro and Conde, Marcos V.}, title = {DarkIR: Robust Low-Light Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10879-10889} }
R2C: Mapping Room to Chessboard to Unlock LLM As Low-Level Action Planner-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2025_CVPR, author = {Bai, Ziyi and Li, Hanxuan and Fu, Bin and Xiong, Chuyan and Wang, Ruiping and Chen, Xilin}, title = {R2C: Mapping Room to Chessboard to Unlock LLM As Low-Level Action Planner}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19456-19466} }
ICE: Intrinsic Concept Extraction from a Single Image via Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cendra_2025_CVPR, author = {Cendra, Fernando Julio and Han, Kai}, title = {ICE: Intrinsic Concept Extraction from a Single Image via Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23734-23743} }
ASIGN: An Anatomy-aware Spatial Imputation Graphic Network for 3D Spatial Transcriptomics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Junchao and Deng, Ruining and Yao, Tianyuan and Xiong, Juming and Qu, Chongyu and Guo, Junlin and Lu, Siqi and Yin, Mengmeng and Wang, Yu and Zhao, Shilin and Yang, Haichun and Huo, Yuankai}, title = {ASIGN: An Anatomy-aware Spatial Imputation Graphic Network for 3D Spatial Transcriptomics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30829-30838} }
Reversing Flow for Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Haina and Luo, Wenyang and Wang, Libin and Zheng, Dandan and Chen, Jingdong and Yang, Ming and Li, Bing and Hu, Weiming}, title = {Reversing Flow for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7545-7558} }
Shadow Generation Using Diffusion Model with Geometry Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Haonan and Liu, Qingyang and Tao, Xinhao and Niu, Li and Zhai, Guangtao}, title = {Shadow Generation Using Diffusion Model with Geometry Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7603-7612} }
Rethinking Epistemic and Aleatoric Uncertainty for Active Open-Set Annotation: An Energy-Based Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zong_2025_CVPR, author = {Zong, Chen-Chen and Huang, Sheng-Jun}, title = {Rethinking Epistemic and Aleatoric Uncertainty for Active Open-Set Annotation: An Energy-Based Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10153-10162} }
Any3DIS: Class-Agnostic 3D Instance Segmentation by 2D Mask Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Phuc and Luu, Minh and Tran, Anh and Pham, Cuong and Nguyen, Khoi}, title = {Any3DIS: Class-Agnostic 3D Instance Segmentation by 2D Mask Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3636-3645} }
FDS: Frequency-Aware Denoising Score for Text-Guided Latent Diffusion Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Yufan and Jiang, Zicong and Zhang, Tong and Forchhammer, S{\o}ren and S\"usstrunk, Sabine}, title = {FDS: Frequency-Aware Denoising Score for Text-Guided Latent Diffusion Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2651-2660} }
MMAR: Towards Lossless Multi-Modal Auto-Regressive Probabilistic Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jian and Yin, Dacheng and Zhou, Yizhou and Rao, Fengyun and Zhai, Wei and Cao, Yang and Zha, Zheng-Jun}, title = {MMAR: Towards Lossless Multi-Modal Auto-Regressive Probabilistic Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7974-7985} }
ROS-SAM: High-Quality Interactive Segmentation for Remote Sensing Moving Object-
[pdf]
[supp]
[bibtex]@InProceedings{Shan_2025_CVPR, author = {Shan, Zhe and Liu, Yang and Zhou, Lei and Yan, Cheng and Wang, Heng and Xie, Xia}, title = {ROS-SAM: High-Quality Interactive Segmentation for Remote Sensing Moving Object}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3625-3635} }
MultiMorph: On-demand Atlas Construction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Abulnaga_2025_CVPR, author = {Abulnaga, S. Mazdak and Hoopes, Andrew and Dey, Neel and Hoffmann, Malte and Fischl, Bruce and Guttag, John and Dalca, Adrian}, title = {MultiMorph: On-demand Atlas Construction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30906-30917} }
MI-DETR: An Object Detection Model with Multi-time Inquiries Mechanism-
[pdf]
[supp]
[bibtex]@InProceedings{Nan_2025_CVPR, author = {Nan, Zhixiong and Li, Xianghong and Dai, Jifeng and Xiang, Tao}, title = {MI-DETR: An Object Detection Model with Multi-time Inquiries Mechanism}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4703-4712} }
From Prototypes to General Distributions: An Efficient Curriculum for Masked Image Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Jinhong and Wu, Cheng-En and Li, Huanran and Zhang, Jifan and Hu, Yu Hen and Morgado, Pedro}, title = {From Prototypes to General Distributions: An Efficient Curriculum for Masked Image Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20028-20038} }
Synthetic Visual Genome-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Jae Sung and Ma, Zixian and Li, Linjie and Zheng, Chenhao and Hsieh, Cheng-Yu and Lu, Ximing and Chandu, Khyathi and Kong, Quan and Kobori, Norimasa and Farhadi, Ali and Choi, Yejin and Krishna, Ranjay}, title = {Synthetic Visual Genome}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9073-9086} }
Difference Inversion: Interpolate and Isolate the Difference with Token Consistency for Image Analogy Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Hyunsoo and Kim, Donghyun and Kim, Suhyun}, title = {Difference Inversion: Interpolate and Isolate the Difference with Token Consistency for Image Analogy Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18250-18259} }
Octopus: Alleviating Hallucination via Dynamic Contrastive Decoding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Suo_2025_CVPR, author = {Suo, Wei and Zhang, Lijun and Sun, Mengyang and Wu, Lin Yuanbo and Wang, Peng and Zhang, Yanning}, title = {Octopus: Alleviating Hallucination via Dynamic Contrastive Decoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29904-29914} }
MTADiffusion: Mask Text Alignment Diffusion Model for Object Inpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Jun and Liu, Ting and Wu, Yihang and Qu, Xiaochao and Liu, Luoqi and Hu, Xiaolin}, title = {MTADiffusion: Mask Text Alignment Diffusion Model for Object Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18325-18334} }
Stop Learning it all to Mitigate Visual Hallucination, Focus on the Hallucination Target.-
[pdf]
[supp]
[bibtex]@InProceedings{Yoon_2025_CVPR, author = {Yoon, Dokyoon and Song, Youngsook and Park, Woomyoung}, title = {Stop Learning it all to Mitigate Visual Hallucination, Focus on the Hallucination Target.}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4200-4208} }
Seeing the Abstract: Translating the Abstract Language for Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Talon_2025_CVPR, author = {Talon, Davide and Girella, Federico and Liu, Ziyue and Cristani, Marco and Wang, Yiming}, title = {Seeing the Abstract: Translating the Abstract Language for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9253-9262} }
Spiking Transformer: Introducing Accurate Addition-Only Spiking Self-Attention for Transformer-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Yufei and Liu, Xiaode and Chen, Yuanpei and Peng, Weihang and Zhang, Yuhan and Ma, Zhe}, title = {Spiking Transformer: Introducing Accurate Addition-Only Spiking Self-Attention for Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24398-24408} }
Grounding 3D Object Affordance with Language Instructions, Visual Observations and Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, He and Kong, Quyu and Xu, Kechun and Xia, Xunlong and Deng, Bing and Ye, Jieping and Xiong, Rong and Wang, Yue}, title = {Grounding 3D Object Affordance with Language Instructions, Visual Observations and Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17337-17346} }
MOVIS: Enhancing Multi-Object Novel View Synthesis for Indoor Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Ruijie and Chen, Yixin and Ni, Junfeng and Jia, Baoxiong and Liu, Yu and Wan, Diwen and Zeng, Gang and Huang, Siyuan}, title = {MOVIS: Enhancing Multi-Object Novel View Synthesis for Indoor Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26767-26778} }
One-Step Event-Driven High-Speed Autofocus-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bao_2025_CVPR, author = {Bao, Yuhan and Gao, Shaohua and Li, Wenyong and Wang, Kaiwei}, title = {One-Step Event-Driven High-Speed Autofocus}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6222-6230} }
Symbolic Representation for Any-to-Any Generative Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Jiaqi and Zhu, Xiaoye and Wang, Yue and Liu, Tianyang and Chen, Xinhui and Chen, Ying and Leong, Chak Tou and Ke, Yifei and Liu, Joseph and Yuan, Yiwen and McAuley, Julian and Li, Li-jia}, title = {Symbolic Representation for Any-to-Any Generative Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27816-27826} }
Protecting Your Video Content: Disrupting Automated Video-based LLM Annotations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Haitong and Gao, Kuofeng and Bai, Yang and Li, Jinmin and Shan, Jinxiao and Dai, Tao and Xia, Shu-Tao}, title = {Protecting Your Video Content: Disrupting Automated Video-based LLM Annotations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24056-24065} }
PanDA: Towards Panoramic Depth Anything with Unlabeled Panoramas and Mobius Spatial Augmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Zidong and Zhu, Jinjing and Zhang, Weiming and Ai, Hao and Bai, Haotian and Zhao, Hengshuang and Wang, Lin}, title = {PanDA: Towards Panoramic Depth Anything with Unlabeled Panoramas and Mobius Spatial Augmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {982-992} }
Towards High-fidelity 3D Talking Avatar with Personalized Dynamic Texture-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xuanchen and Wang, Jianyu and Cheng, Yuhao and Zeng, Yikun and Ren, Xingyu and Zhu, Wenhan and Zhao, Weiming and Yan, Yichao}, title = {Towards High-fidelity 3D Talking Avatar with Personalized Dynamic Texture}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {204-214} }
Scene Splatter: Momentum 3D Scene Generation from Single Image with Video Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shengjun and Li, Jinzhao and Fei, Xin and Liu, Hao and Duan, Yueqi}, title = {Scene Splatter: Momentum 3D Scene Generation from Single Image with Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6089-6098} }
JiSAM: Alleviate Labeling Burden and Corner Case Problems in Autonomous Driving via Minimal Real-World Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Runjian and Shao, Wenqi and Zhang, Bo and Shi, Shaoshuai and Jiang, Li and Luo, Ping}, title = {JiSAM: Alleviate Labeling Burden and Corner Case Problems in Autonomous Driving via Minimal Real-World Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6792-6801} }
OSMamba: Omnidirectional Spectral Mamba with Dual-Domain Prior Generator for Exposure Correction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Gehui and Chen, Bin and Zhao, Chen and Zhang, Lei and Zhang, Jian}, title = {OSMamba: Omnidirectional Spectral Mamba with Dual-Domain Prior Generator for Exposure Correction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7480-7490} }
Image is All You Need to Empower Large-scale Diffusion Models for In-Domain Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Pu and Zhou, Feng and Yang, Lu and Huang, Tianrui and Song, Qing}, title = {Image is All You Need to Empower Large-scale Diffusion Models for In-Domain Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18358-18368} }
MedUnifier: Unifying Vision-and-Language Pre-training on Medical Data with Vision Generation Task using Discrete Visual Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Ziyang and Yu, Yang and Chen, Yucheng and Yang, Xulei and Yeo, Si Yong}, title = {MedUnifier: Unifying Vision-and-Language Pre-training on Medical Data with Vision Generation Task using Discrete Visual Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29744-29755} }
RandAR: Decoder-only Autoregressive Visual Generation in Random Orders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pang_2025_CVPR, author = {Pang, Ziqi and Zhang, Tianyuan and Luan, Fujun and Man, Yunze and Tan, Hao and Zhang, Kai and Freeman, William T. and Wang, Yu-Xiong}, title = {RandAR: Decoder-only Autoregressive Visual Generation in Random Orders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {45-55} }
Evolving High-Quality Rendering and Reconstruction in a Unified Framework with Contribution-Adaptive Regularization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, You and Zhang, Zhipeng and Li, Xinyang and Qu, Yansong and Lin, Yu and Zhang, Shengchuan and Cao, Liujuan}, title = {Evolving High-Quality Rendering and Reconstruction in a Unified Framework with Contribution-Adaptive Regularization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16346-16355} }
ArticulatedGS: Self-supervised Digital Twin Modeling of Articulated Objects using 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Junfu and Xin, Yu and Liu, Gaoyi and Xu, Kai and Liu, Ligang and Hu, Ruizhen}, title = {ArticulatedGS: Self-supervised Digital Twin Modeling of Articulated Objects using 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27144-27153} }
NoiseCtrl: A Sampling-Algorithm-Agnostic Conditional Generation Method for Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Longquan and Wang, He and Tang, Jinhui}, title = {NoiseCtrl: A Sampling-Algorithm-Agnostic Conditional Generation Method for Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18093-18102} }
Leveraging 3D Geometric Priors in 2D Rotation Symmetry Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2025_CVPR, author = {Seo, Ahyun and Cho, Minsu}, title = {Leveraging 3D Geometric Priors in 2D Rotation Symmetry Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22109-22118} }
KMD: Koopman Multi-modality Decomposition for Generalized Brain Tumor Segmentation under Incomplete Modalities-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Tianyi and Jiang, Haochuan and Huang, Kaizhu}, title = {KMD: Koopman Multi-modality Decomposition for Generalized Brain Tumor Segmentation under Incomplete Modalities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15663-15671} }
Vid2Sim: Realistic and Interactive Simulation from Video for Urban Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Ziyang and Liu, Zhizheng and Peng, Zhenghao and Wu, Wayne and Zhou, Bolei}, title = {Vid2Sim: Realistic and Interactive Simulation from Video for Urban Navigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1581-1591} }
DORNet: A Degradation Oriented and Regularized Network for Blind Depth Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhengxue and Yan, Zhiqiang and Pan, Jinshan and Gao, Guangwei and Zhang, Kai and Yang, Jian}, title = {DORNet: A Degradation Oriented and Regularized Network for Blind Depth Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15813-15822} }
Fractal Calibration for Long-tailed Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alexandridis_2025_CVPR, author = {Alexandridis, Konstantinos Panagiotis and Elezi, Ismail and Deng, Jiankang and Nguyen, Anh and Luo, Shan}, title = {Fractal Calibration for Long-tailed Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15139-15150} }
M3GYM: A Large-Scale Multimodal Multi-view Multi-person Pose Dataset for Fitness Activity Understanding in Real-world Settings-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Qingzheng and Cao, Ru and Shen, Xin and Du, Heming and Wang, Sen and Yu, Xin}, title = {M3GYM: A Large-Scale Multimodal Multi-view Multi-person Pose Dataset for Fitness Activity Understanding in Real-world Settings}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12289-12300} }
Noise Calibration and Spatial-Frequency Interactive Network for STEM Image Enhancement-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Hesong and Wu, Ziqi and Shao, Ruiwen and Zhang, Tao and Fu, Ying}, title = {Noise Calibration and Spatial-Frequency Interactive Network for STEM Image Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21287-21296} }
Type-R: Automatically Retouching Typos for Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Shimoda_2025_CVPR, author = {Shimoda, Wataru and Inoue, Naoto and Haraguchi, Daichi and Mitani, Hayato and Uchida, Seiichi and Yamaguchi, Kota}, title = {Type-R: Automatically Retouching Typos for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2745-2754} }
Video-3D LLM: Learning Position-Aware Video Representation for 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Duo and Huang, Shijia and Wang, Liwei}, title = {Video-3D LLM: Learning Position-Aware Video Representation for 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8995-9006} }
FedSPA: Generalizable Federated Graph Learning under Homophily Heterogeneity-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2025_CVPR, author = {Tan, Zihan and Wan, Guancheng and Huang, Wenke and Li, He and Zhang, Guibin and Yang, Carl and Ye, Mang}, title = {FedSPA: Generalizable Federated Graph Learning under Homophily Heterogeneity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15464-15475} }
Homogeneous Dynamics Space for Heterogeneous Humans-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xinpeng and Liang, Junxuan and Zhang, Chenshuo and Cai, Zixuan and Lu, Cewu and Li, Yong-Lu}, title = {Homogeneous Dynamics Space for Heterogeneous Humans}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27782-27793} }
TailedCore: Few-Shot Sampling for Unsupervised Long-Tail Noisy Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2025_CVPR, author = {Jung, Yoon Gyo and Park, Jaewoo and Yoon, Jaeho and Peng, Kuan-Chuan and Kim, Wonchul and Teoh, Andrew Beng Jin and Camps, Octavia}, title = {TailedCore: Few-Shot Sampling for Unsupervised Long-Tail Noisy Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25539-25548} }
GazeGene: Large-scale Synthetic Gaze Dataset with 3D Eyeball Annotations-
[pdf]
[supp]
[bibtex]@InProceedings{Bao_2025_CVPR, author = {Bao, Yiwei and Wang, Zhiming and Lu, Feng}, title = {GazeGene: Large-scale Synthetic Gaze Dataset with 3D Eyeball Annotations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18749-18759} }
VideoHandles: Editing 3D Object Compositions in Videos Using Video Generative Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koo_2025_CVPR, author = {Koo, Juil and Guerrero, Paul and Huang, Chun-Hao P. and Ceylan, Duygu and Sung, Minhyuk}, title = {VideoHandles: Editing 3D Object Compositions in Videos Using Video Generative Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17692-17701} }
Satellite Observations Guided Diffusion Model for Accurate Meteorological States at Arbitrary Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tu_2025_CVPR, author = {Tu, Siwei and Fei, Ben and Yang, Weidong and Ling, Fenghua and Chen, Hao and Liu, Zili and Chen, Kun and Fan, Hang and Ouyang, Wanli and Bai, Lei}, title = {Satellite Observations Guided Diffusion Model for Accurate Meteorological States at Arbitrary Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28071-28080} }
Reconstructing People, Places, and Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Muller_2025_CVPR, author = {M\"uller, Lea and Choi, Hongsuk and Zhang, Anthony and Yi, Brent and Malik, Jitendra and Kanazawa, Angjoo}, title = {Reconstructing People, Places, and Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21948-21958} }
InPO: Inversion Preference Optimization with Reparametrized DDIM for Efficient Diffusion Model Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Yunhong and Wang, Qichao and Cao, Hengyuan and Wang, Xierui and Xu, Xiaoyin and Zhang, Min}, title = {InPO: Inversion Preference Optimization with Reparametrized DDIM for Efficient Diffusion Model Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28629-28639} }
GaussTR: Foundation Model-Aligned Gaussian Transformer for Self-Supervised 3D Spatial Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Haoyi and Liu, Liu and Cheng, Tianheng and Wang, Xinjie and Lin, Tianwei and Su, Zhizhong and Liu, Wenyu and Wang, Xinggang}, title = {GaussTR: Foundation Model-Aligned Gaussian Transformer for Self-Supervised 3D Spatial Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11960-11970} }
Single Domain Generalization for Few-Shot Counting via Universal Representation Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xianing and Huo, Si and Jiang, Borui and Hu, Hailin and Chen, Xinghao}, title = {Single Domain Generalization for Few-Shot Counting via Universal Representation Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4639-4649} }
Discovering Hidden Visual Concepts Beyond Linguistic Input in Infant Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2025_CVPR, author = {Ke, Xueyi and Tsutsui, Satoshi and Zhang, Yayun and Wen, Bihan}, title = {Discovering Hidden Visual Concepts Beyond Linguistic Input in Infant Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4343-4352} }
Do We Always Need the Simplicity Bias? Looking for Optimal Inductive Biases in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Teney_2025_CVPR, author = {Teney, Damien and Jiang, Liangze and Gogianu, Florin and Abbasnejad, Ehsan}, title = {Do We Always Need the Simplicity Bias? Looking for Optimal Inductive Biases in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {79-90} }
A General Adaptive Dual-level Weighting Mechanism for Remote Sensing Pansharpening-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Jie and Chen, Haorui and Ren, Jiaxuan and Peng, Siran and Deng, Liangjian}, title = {A General Adaptive Dual-level Weighting Mechanism for Remote Sensing Pansharpening}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7447-7456} }
RASP: Revisiting 3D Anamorphic Art for Shadow-Guided Packing of Irregular Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Debnath_2025_CVPR, author = {Debnath, Soumyaratna and Tiwari, Ashish and Sadekar, Kaustubh and Raman, Shanmuganathan}, title = {RASP: Revisiting 3D Anamorphic Art for Shadow-Guided Packing of Irregular Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5849-5858} }
Identifying and Mitigating Spurious Correlation in Multi-Task Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Chai_2025_CVPR, author = {Chai, Junyi and Lu, Shenyu and Wang, Xiaoqian}, title = {Identifying and Mitigating Spurious Correlation in Multi-Task Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25698-25707} }
Continuous, Subject-Specific Attribute Control in T2I Models by Identifying Semantic Directions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Baumann_2025_CVPR, author = {Baumann, Stefan Andreas and Krause, Felix and Neumayr, Michael and Stracke, Nick and Sevi, Melvin and Hu, Vincent Tao and Ommer, Bj\"orn}, title = {Continuous, Subject-Specific Attribute Control in T2I Models by Identifying Semantic Directions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13231-13241} }
Diffusion Bridge: Leveraging Diffusion Model to Reduce the Modality Gap Between Text and Vision for Zero-Shot Image Captioning-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Jeong Ryong and Shin, Yejee and Son, Geonhui and Hwang, Dosik}, title = {Diffusion Bridge: Leveraging Diffusion Model to Reduce the Modality Gap Between Text and Vision for Zero-Shot Image Captioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4050-4059} }
MODfinity: Unsupervised Domain Adaptation with Multimodal Information Flow Intertwining-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Shanglin and Lv, Jianming and Kang, Jingdan and Zhang, Huaidong and Liang, Zequan and He, Shengfeng}, title = {MODfinity: Unsupervised Domain Adaptation with Multimodal Information Flow Intertwining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5092-5101} }
Towards Universal Soccer Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rao_2025_CVPR, author = {Rao, Jiayuan and Wu, Haoning and Jiang, Hao and Zhang, Ya and Wang, Yanfeng and Xie, Weidi}, title = {Towards Universal Soccer Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8384-8394} }
SimLingo: Vision-Only Closed-Loop Autonomous Driving with Language-Action Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Renz_2025_CVPR, author = {Renz, Katrin and Chen, Long and Arani, Elahe and Sinavski, Oleg}, title = {SimLingo: Vision-Only Closed-Loop Autonomous Driving with Language-Action Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11993-12003} }
Improved Video VAE for Latent Video Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Pingyu and Zhu, Kai and Liu, Yu and Zhao, Liming and Zhai, Wei and Cao, Yang and Zha, Zheng-Jun}, title = {Improved Video VAE for Latent Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18124-18133} }
Immune: Improving Safety Against Jailbreaks in Multi-modal LLMs via Inference-Time Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghosal_2025_CVPR, author = {Ghosal, Soumya Suvra and Chakraborty, Souradip and Singh, Vaibhav and Guan, Tianrui and Wang, Mengdi and Beirami, Ahmad and Huang, Furong and Velasquez, Alvaro and Manocha, Dinesh and Bedi, Amrit Singh}, title = {Immune: Improving Safety Against Jailbreaks in Multi-modal LLMs via Inference-Time Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25038-25049} }
Efficient Video Super-Resolution for Real-time Rendering with Decoupled G-buffer Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Mingjun and Sun, Long and Dong, Jiangxin and Pan, Jinshan}, title = {Efficient Video Super-Resolution for Real-time Rendering with Decoupled G-buffer Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11328-11337} }
CustomKD: Customizing Large Vision Foundation for Edge Model Improvement via Knowledge Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Jungsoo and Das, Debasmit and Hayat, Munawar and Choi, Sungha and Hwang, Kyuwoong and Porikli, Fatih}, title = {CustomKD: Customizing Large Vision Foundation for Edge Model Improvement via Knowledge Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25176-25186} }
Enhancing Privacy-Utility Trade-offs to Mitigate Memorization in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Chen and Liu, Daochang and Shah, Mubarak and Xu, Chang}, title = {Enhancing Privacy-Utility Trade-offs to Mitigate Memorization in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8182-8191} }
Learned Image Compression with Dictionary-based Entropy Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Jingbo and Zhang, Leheng and Zhou, Xingyu and Li, Mu and Li, Wen and Gu, Shuhang}, title = {Learned Image Compression with Dictionary-based Entropy Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12850-12859} }
PMNI: Pose-free Multi-view Normal Integration for Reflective and Textureless Surface Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pei_2025_CVPR, author = {Pei, Mingzhi and Cao, Xu and Wang, Xiangyi and Guo, Heng and Ma, Zhanyu}, title = {PMNI: Pose-free Multi-view Normal Integration for Reflective and Textureless Surface Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26834-26843} }
NVComposer: Boosting Generative Novel View Synthesis with Multiple Sparse and Unposed Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Lingen and Zhang, Zhaoyang and Li, Yaowei and Xu, Jiale and Hu, Wenbo and Li, Xiaoyu and Cheng, Weihao and Gu, Jinwei and Xue, Tianfan and Shan, Ying}, title = {NVComposer: Boosting Generative Novel View Synthesis with Multiple Sparse and Unposed Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {777-787} }
LeanGaussian: Breaking Pixel or Point Cloud Correspondence in Modeling 3D Gaussians-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Jiamin and Liu, Kenkun and Gao, Han and Jiang, Xiaoke and Yao, Yuan and Zhang, Lei}, title = {LeanGaussian: Breaking Pixel or Point Cloud Correspondence in Modeling 3D Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26641-26651} }
Modeling Multiple Normal Action Representations for Error Detection in Procedural Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Wei-Jin and Li, Yuan-Ming and Xia, Zhi-Wei and Tang, Yu-Ming and Lin, Kun-Yu and Hu, Jian-Fang and Zheng, Wei-Shi}, title = {Modeling Multiple Normal Action Representations for Error Detection in Procedural Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27794-27804} }
Efficient Personalization of Quantized Diffusion Model without Backpropagation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2025_CVPR, author = {Seo, Hoigi and Jeong, Wongi and Lee, Kyungryeol and Chun, Se Young}, title = {Efficient Personalization of Quantized Diffusion Model without Backpropagation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7717-7727} }
Alias-Free Latent Diffusion Models: Improving Fractional Shift Equivariance of Diffusion Latent Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yifan and Xiao, Zeqi and Yang, Shuai and Pan, Xingang}, title = {Alias-Free Latent Diffusion Models: Improving Fractional Shift Equivariance of Diffusion Latent Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {34-44} }
A Unified Latent Schrodinger Bridge Diffusion Model for Unsupervised Anomaly Detection and Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Akshay_2025_CVPR, author = {Akshay, Shilhora and Narasimhan, Niveditha Lakshmi and George, Jacob and Balasubramanian, Vineeth N}, title = {A Unified Latent Schrodinger Bridge Diffusion Model for Unsupervised Anomaly Detection and Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25528-25538} }
KVQ: Boosting Video Quality Assessment via Saliency-guided Local Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Yunpeng and Yuan, Kun and Xie, Qizhi and Sun, Ming and Zhou, Chao and Wang, Jian}, title = {KVQ: Boosting Video Quality Assessment via Saliency-guided Local Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2150-2160} }
MambaVision: A Hybrid Mamba-Transformer Vision Backbone-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hatamizadeh_2025_CVPR, author = {Hatamizadeh, Ali and Kautz, Jan}, title = {MambaVision: A Hybrid Mamba-Transformer Vision Backbone}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25261-25270} }
Learning Flow Fields in Attention for Controllable Person Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zijian and Liu, Shikun and Han, Xiao and Liu, Haozhe and Ng, Kam Woh and Xie, Tian and Cong, Yuren and Li, Hang and Xu, Mengmeng and Perez-Rua, Juan-Manuel and Patel, Aditya and Xiang, Tao and Shi, Miaojing and He, Sen}, title = {Learning Flow Fields in Attention for Controllable Person Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2491-2501} }
Multi-Label Prototype Visual Spatial Search for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2025_CVPR, author = {Duan, Songsong and Yang, Xi and Wang, Nannan}, title = {Multi-Label Prototype Visual Spatial Search for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30241-30250} }
Early-Bird Diffusion: Investigating and Leveraging Timestep-Aware Early-Bird Tickets in Diffusion Models for Efficient Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Whalen_2025_CVPR, author = {Whalen, Lexington and Du, Zhenbang and You, Haoran and Li, Chaojian and Li, Sixu and Lin, Yingyan}, title = {Early-Bird Diffusion: Investigating and Leveraging Timestep-Aware Early-Bird Tickets in Diffusion Models for Efficient Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7675-7684} }
FireEdit: Fine-grained Instruction-based Image Editing via Region-aware Vision Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jun and Li, Jiahao and Xu, Zunnan and Li, Hanhui and Cheng, Yiji and Hong, Fa-Ting and Lin, Qin and Lu, Qinglin and Liang, Xiaodan}, title = {FireEdit: Fine-grained Instruction-based Image Editing via Region-aware Vision Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13093-13103} }
Doppelgangers++: Improved Visual Disambiguation with Geometric 3D Features-
[pdf]
[bibtex]@InProceedings{Xiangli_2025_CVPR, author = {Xiangli, Yuanbo and Cai, Ruojin and Chen, Hanyu and Byrne, Jeffrey and Snavely, Noah}, title = {Doppelgangers++: Improved Visual Disambiguation with Geometric 3D Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27166-27175} }
Learnable Infinite Taylor Gaussian for Dynamic View Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Bingbing and Li, Yanyan and Xie, Rui and Xu, Bo and Dong, Haoye and Yao, Junfeng and Lee, Gim Hee}, title = {Learnable Infinite Taylor Gaussian for Dynamic View Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26844-26854} }
DL2G: Degradation-guided Local-to-Global Restoration for Eyeglass Reflection Removal-
[pdf]
[supp]
[bibtex]@InProceedings{Yi_2025_CVPR, author = {Yi, Zhilv and Lu, Xiao and Ding, Hong and Hu, Jingbo and Jiang, Zhi and Xiao, Chunxia}, title = {DL2G: Degradation-guided Local-to-Global Restoration for Eyeglass Reflection Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16061-16070} }
DIV-FF: Dynamic Image-Video Feature Fields For Environment Understanding in Egocentric Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Mur-Labadia_2025_CVPR, author = {Mur-Labadia, Lorenzo and Guerrero, Josechu and Martinez-Cantin, Ruben}, title = {DIV-FF: Dynamic Image-Video Feature Fields For Environment Understanding in Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3470-3480} }
SaMam: Style-aware State Space Model for Arbitrary Image Style Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Hongda and Wang, Longguang and Zhang, Ye and Yu, Ziru and Guo, Yulan}, title = {SaMam: Style-aware State Space Model for Arbitrary Image Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28468-28478} }
MFogHub: Bridging Multi-Regional and Multi-Satellite Data for Global Marine Fog Detection and Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Mengqiu and Chen, Kaixin and Guo, Heng and Huang, Yixiang and Wu, Ming and Shi, Zhenwei and Zhang, Chuang and Guo, Jun}, title = {MFogHub: Bridging Multi-Regional and Multi-Satellite Data for Global Marine Fog Detection and Forecasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12637-12646} }
The Illusion of Unlearning: The Unstable Nature of Machine Unlearning in Text-to-Image Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{George_2025_CVPR, author = {George, Naveen and Dasaraju, Karthik Nandan and Chittepu, Rutheesh Reddy and Mopuri, Konda Reddy}, title = {The Illusion of Unlearning: The Unstable Nature of Machine Unlearning in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13393-13402} }
Making Old Film Great Again: Degradation-aware State Space Model for Old Film Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2025_CVPR, author = {Mao, Yudong and Luo, Hao and Zhong, Zhiwei and Chen, Peilin and Zhang, Zhijiang and Wang, Shiqi}, title = {Making Old Film Great Again: Degradation-aware State Space Model for Old Film Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28039-28049} }
Leveraging Global Stereo Consistency for Category-Level Shape and 6D Pose Estimation from Stereo Images-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2025_CVPR, author = {Qiu, Junning and Lu, Minglei and Wang, Fei and Guo, Yu and Ling, Yonggen}, title = {Leveraging Global Stereo Consistency for Category-Level Shape and 6D Pose Estimation from Stereo Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16839-16849} }
AlphaPre: Amplitude-Phase Disentanglement Model for Precipitation Nowcasting-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Kenghong and Zhang, Baoquan and Yu, Demin and Feng, Wenzhi and Chen, Shidong and Gao, Feifan and Li, Xutao and Ye, Yunming}, title = {AlphaPre: Amplitude-Phase Disentanglement Model for Precipitation Nowcasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17841-17850} }
EfficientLLaVA: Generalizable Auto-Pruning for Large Vision-language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Yinan and Wang, Ziwei and Xu, Xiuwei and Zhou, Jie and Lu, Jiwen}, title = {EfficientLLaVA: Generalizable Auto-Pruning for Large Vision-language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9445-9454} }
Detection-Friendly Nonuniformity Correction: A Union Framework for Infrared UAV Target Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Houzhang and Wang, Xiaolin and Li, Zengyang and Wang, Lu and Li, Qingshan and Chang, Yi and Yan, Luxin}, title = {Detection-Friendly Nonuniformity Correction: A Union Framework for Infrared UAV Target Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11898-11907} }
Articulated Kinematics Distillation from Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xuan and Ma, Qianli and Lin, Tsung-Yi and Chen, Yongxin and Jiang, Chenfanfu and Liu, Ming-Yu and Xiang, Donglai}, title = {Articulated Kinematics Distillation from Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17571-17581} }
ExpertAF: Expert Actionable Feedback from Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ashutosh_2025_CVPR, author = {Ashutosh, Kumar and Nagarajan, Tushar and Pavlakos, Georgios and Kitani, Kris and Grauman, Kristen}, title = {ExpertAF: Expert Actionable Feedback from Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13582-13594} }
MP-SfM: Monocular Surface Priors for Robust Structure-from-Motion-
[pdf]
[supp]
[bibtex]@InProceedings{Pataki_2025_CVPR, author = {Pataki, Zador and Sarlin, Paul-Edouard and Sch\"onberger, Johannes L. and Pollefeys, Marc}, title = {MP-SfM: Monocular Surface Priors for Robust Structure-from-Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21891-21901} }
OnlineAnySeg: Online Zero-Shot 3D Segmentation by Visual Foundation Model Guided 2D Mask Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Yijie and Zhang, Jiazhao and Lan, Yuqing and Guo, Yulan and Dong, Dezun and Zhu, Chenyang and Xu, Kai}, title = {OnlineAnySeg: Online Zero-Shot 3D Segmentation by Visual Foundation Model Guided 2D Mask Merging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3676-3685} }
Tora: Trajectory-oriented Diffusion Transformer for Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhenghao and Liao, Junchao and Li, Menghao and Dai, ZuoZhuo and Qiu, Bingxue and Zhu, Siyu and Qin, Long and Wang, Weizhi}, title = {Tora: Trajectory-oriented Diffusion Transformer for Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2063-2073} }
Volumetrically Consistent 3D Gaussian Rasterization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Talegaonkar_2025_CVPR, author = {Talegaonkar, Chinmay and Belhe, Yash and Ramamoorthi, Ravi and Antipa, Nicholas}, title = {Volumetrically Consistent 3D Gaussian Rasterization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10953-10963} }
Deterministic-to-Stochastic Diverse Latent Feature Mapping for Human Motion Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hua_2025_CVPR, author = {Hua, Yu and Liu, Weiming and Xu, Gui and Hou, Yaqing and Ong, Yew-Soon and Zhang, Qiang}, title = {Deterministic-to-Stochastic Diverse Latent Feature Mapping for Human Motion Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22724-22734} }
Morpheus: Text-Driven 3D Gaussian Splat Shape and Color Stylization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wynn_2025_CVPR, author = {Wynn, Jamie and Qureshi, Zawar and Powierza, Jakub and Watson, Jamie and Sayed, Mohamed}, title = {Morpheus: Text-Driven 3D Gaussian Splat Shape and Color Stylization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7825-7836} }
CacheQuant: Comprehensively Accelerated Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xuewen and Li, Zhikai and Gu, Qingyi}, title = {CacheQuant: Comprehensively Accelerated Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23269-23280} }
The Impact Label Noise and Choice of Threshold has on Cross-Entropy and Soft-Dice in Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Nordstrom_2025_CVPR, author = {Nordstr\"om, Marcus and Maki, Atsuto and Hult, Henrik}, title = {The Impact Label Noise and Choice of Threshold has on Cross-Entropy and Soft-Dice in Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20820-20829} }
Open-World Objectness Modeling Unifies Novel Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shan and Ni, Yao and Du, Jinhao and Xue, Yuan and Torr, Philip and Koniusz, Piotr and van den Hengel, Anton}, title = {Open-World Objectness Modeling Unifies Novel Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30332-30342} }
LLaVA-Critic: Learning to Evaluate Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xiong_2025_CVPR, author = {Xiong, Tianyi and Wang, Xiyao and Guo, Dong and Ye, Qinghao and Fan, Haoqi and Gu, Quanquan and Huang, Heng and Li, Chunyuan}, title = {LLaVA-Critic: Learning to Evaluate Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13618-13628} }
VILA-M3: Enhancing Vision-Language Models with Medical Expert Knowledge-
[pdf]
[supp]
[bibtex]@InProceedings{Nath_2025_CVPR, author = {Nath, Vishwesh and Li, Wenqi and Yang, Dong and Myronenko, Andriy and Zheng, Mingxin and Lu, Yao and Liu, Zhijian and Yin, Hongxu and Law, Yee Man and Tang, Yucheng and Guo, Pengfei and Zhao, Can and Xu, Ziyue and He, Yufan and Harmon, Stephanie and Simon, Benjamin and Heinrich, Greg and Aylward, Stephen and Edgar, Marc and Zephyr, Michael and Molchanov, Pavlo and Turkbey, Baris and Roth, Holger and Xu, Daguang}, title = {VILA-M3: Enhancing Vision-Language Models with Medical Expert Knowledge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14788-14798} }
Repurposing Pre-trained Video Diffusion Models for Event-based Video Interpolation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Jingxi and Feng, Brandon Y. and Cai, Haoming and Wang, Tianfu and Burner, Levi and Yuan, Dehao and Fermuller, Cornelia and Metzler, Christopher A. and Aloimonos, Yiannis}, title = {Repurposing Pre-trained Video Diffusion Models for Event-based Video Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12456-12466} }
MotionPRO: Exploring the Role of Pressure in Human MoCap and Beyond-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Shenghao and Lu, Yi and Huang, Jiayi and Zhao, Jiayi and Zhang, He and Yu, Tao and Shen, Qiu and Cao, Xun}, title = {MotionPRO: Exploring the Role of Pressure in Human MoCap and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27760-27770} }
DiffVsgg: Diffusion-Driven Online Video Scene Graph Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Mu and Li, Liulei and Wang, Wenguan and Yang, Yi}, title = {DiffVsgg: Diffusion-Driven Online Video Scene Graph Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29161-29172} }
Large-scale Multi-view Tensor Clustering with Implicit Linear Kernels-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jiyuan and Liu, Xinwang and Li, Chuankun and Wan, Xinhang and Tan, Hao and Zhang, Yi and Liang, Weixuan and Qu, Qian and Feng, Yu and Guan, Renxiang and Liang, Ke}, title = {Large-scale Multi-view Tensor Clustering with Implicit Linear Kernels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20727-20736} }
Generalized Diffusion Detector: Mining Robust Features from Diffusion Models for Domain-Generalized Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Boyong and Ji, Yuxiang and Ye, Qianwen and Tan, Zhuoyue and Wu, Liaoni}, title = {Generalized Diffusion Detector: Mining Robust Features from Diffusion Models for Domain-Generalized Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9921-9932} }
Q-Eval-100K: Evaluating Visual Quality and Alignment Level for Text-to-Vision Content-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zicheng and Kou, Tengchuan and Wang, Shushi and Li, Chunyi and Sun, Wei and Wang, Wei and Li, Xiaoyu and Wang, Zongyu and Cao, Xuezhi and Min, Xiongkuo and Liu, Xiaohong and Zhai, Guangtao}, title = {Q-Eval-100K: Evaluating Visual Quality and Alignment Level for Text-to-Vision Content}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10621-10631} }
Dual Focus-Attention Transformer for Robust Point Cloud Registration-
[pdf]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Kexue and Yuan, Mingzhi and Wang, Changwei and Pang, Weiguang and Chi, Jing and Wang, Manning and Gao, Longxiang}, title = {Dual Focus-Attention Transformer for Robust Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11769-11778} }
Forming Auxiliary High-confident Instance-level Loss to Promote Learning from Label Proportions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Tianhao and Chen, Han and Hu, Juncheng and Zhu, Yungang and Li, Ximing}, title = {Forming Auxiliary High-confident Instance-level Loss to Promote Learning from Label Proportions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20592-20601} }
Progress-Aware Video Frame Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2025_CVPR, author = {Xue, Zihui and An, Joungbin and Yang, Xitong and Grauman, Kristen}, title = {Progress-Aware Video Frame Captioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13639-13650} }
SMTPD: A New Benchmark for Temporal Prediction of Social Media Popularity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yijie and Zheng, Bolun and Zhu, Wei and Pan, Hangjia and Yao, Yuchen and Xu, Ning and Liu, Anan and Zhang, Quan and Yan, Chenggang}, title = {SMTPD: A New Benchmark for Temporal Prediction of Social Media Popularity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18847-18857} }
Enhancing Dance-to-Music Generation via Negative Conditioning Latent Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Changchang and Liu, Gaowen and Fleming, Charles and Yan, Yan}, title = {Enhancing Dance-to-Music Generation via Negative Conditioning Latent Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8321-8330} }
Neuro-Symbolic Evaluation of Text-to-Video Models using Formal Verification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sharan_2025_CVPR, author = {Sharan, S P and Choi, Minkyu and Shah, Sahil and Goel, Harsh and Omama, Mohammad and Chinchali, Sandeep}, title = {Neuro-Symbolic Evaluation of Text-to-Video Models using Formal Verification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8395-8405} }
Spherical Manifold Guided Diffusion Model for Panoramic Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Xiancheng and Xu, Mai and Li, Shengxi and Ma, Senmao and Deng, Xin and Jiang, Lai and Shen, Gang}, title = {Spherical Manifold Guided Diffusion Model for Panoramic Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5824-5834} }
Learning on Model Weights using Tree Experts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Horwitz_2025_CVPR, author = {Horwitz, Eliahu and Cavia, Bar and Kahana, Jonathan and Hoshen, Yedid}, title = {Learning on Model Weights using Tree Experts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20468-20478} }
Rethinking Query-based Transformer for Continual Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yuchen and Shi, Cheng and Wang, Dingyou and Tang, Jiajin and Wei, Zhengxuan and Wu, Yu and Li, Guanbin and Yang, Sibei}, title = {Rethinking Query-based Transformer for Continual Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4595-4606} }
Image Reconstruction from Readout-Multiplexed Single-Photon Detector Arrays-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bharadwaj_2025_CVPR, author = {Bharadwaj, Shashwath and Kitichotkul, Ruangrawee and Agarwal, Akshay and Goyal, Vivek K}, title = {Image Reconstruction from Readout-Multiplexed Single-Photon Detector Arrays}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11406-11415} }
Towards Smart Point-and-Shoot Photography-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiawan and Zhou, Fei and Zhong, Zhipeng and Lin, Jiongzhi and Qiu, Guoping}, title = {Towards Smart Point-and-Shoot Photography}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28242-28251} }
SlideChat: A Large Vision-Language Assistant for Whole-Slide Pathology Image Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Ying and Wang, Guoan and Ji, Yuanfeng and Li, Yanjun and Ye, Jin and Li, Tianbin and Hu, Ming and Yu, Rongshan and Qiao, Yu and He, Junjun}, title = {SlideChat: A Large Vision-Language Assistant for Whole-Slide Pathology Image Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5134-5143} }
Prototype-Based Image Prompting for Weakly Supervised Histopathological Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Qingchen and Fan, Lei and Pagnucco, Maurice and Song, Yang}, title = {Prototype-Based Image Prompting for Weakly Supervised Histopathological Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30271-30280} }
Towards Transformer-Based Aligned Generation with Self-Coherence Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shulei and Lin, Wang and Huang, Hai and Wang, Hanting and Cai, Sihang and Han, WenKang and Jin, Tao and Chen, Jingyuan and Sun, Jiacheng and Zhu, Jieming and Zhao, Zhou}, title = {Towards Transformer-Based Aligned Generation with Self-Coherence Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18455-18464} }
Accurate Scene Text Recognition with Efficient Model Scaling and Cloze Self-Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maracani_2025_CVPR, author = {Maracani, Andrea and Ozkan, Savas and Cho, Sijun and Kim, Hyowon and Noh, Eunchung and Min, Jeongwon and Min, Cho Jung and Park, Dookun and Ozay, Mete}, title = {Accurate Scene Text Recognition with Efficient Model Scaling and Cloze Self-Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14516-14526} }
DART: Disease-aware Image-Text Alignment and Self-correcting Re-alignment for Trustworthy Radiology Report Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Sang-Jun and Heo, Keun-Soo and Shin, Dong-Hee and Son, Young-Han and Oh, Ji-Hye and Kam, Tae-Eui}, title = {DART: Disease-aware Image-Text Alignment and Self-correcting Re-alignment for Trustworthy Radiology Report Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15580-15589} }
On the Consistency of Video Large Language Models in Temporal Comprehension-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2025_CVPR, author = {Jung, Minjoon and Xiao, Junbin and Zhang, Byoung-Tak and Yao, Angela}, title = {On the Consistency of Video Large Language Models in Temporal Comprehension}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13713-13722} }
Mitigating the Human-Robot Domain Discrepancy in Visual Pre-training for Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jiaming and Ma, Teli and Lin, Kun-Yu and Wang, Zifan and Qiu, Ronghe and Liang, Junwei}, title = {Mitigating the Human-Robot Domain Discrepancy in Visual Pre-training for Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22551-22561} }
Less is More: Efficient Model Merging with Binary Task Switch-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2025_CVPR, author = {Qi, Biqing and Li, Fangyuan and Wang, Zhen and Gao, Junqi and Li, Dong and Ye, Peng and Zhou, Bowen}, title = {Less is More: Efficient Model Merging with Binary Task Switch}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15265-15274} }
One-Minute Video Generation with Test-Time Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dalal_2025_CVPR, author = {Dalal, Karan and Koceja, Daniel and Xu, Jiarui and Zhao, Yue and Han, Shihao and Cheung, Ka Chun and Kautz, Jan and Choi, Yejin and Sun, Yu and Wang, Xiaolong}, title = {One-Minute Video Generation with Test-Time Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17702-17711} }
InteractionMap: Improving Online Vectorized HDMap Construction with Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Kuang and Yang, Chuan and Li, Zhanbin}, title = {InteractionMap: Improving Online Vectorized HDMap Construction with Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17176-17186} }
Text-guided Sparse Voxel Pruning for Efficient 3D Visual Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Wenxuan and Xu, Xiuwei and Wang, Ziwei and Feng, Jianjiang and Zhou, Jie and Lu, Jiwen}, title = {Text-guided Sparse Voxel Pruning for Efficient 3D Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3666-3675} }
ROCKET-1: Mastering Open-World Interaction with Visual-Temporal Context Prompting-
[pdf]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Shaofei and Wang, Zihao and Lian, Kewei and Mu, Zhancun and Ma, Xiaojian and Liu, Anji and Liang, Yitao}, title = {ROCKET-1: Mastering Open-World Interaction with Visual-Temporal Context Prompting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12122-12131} }
Common3D: Self-Supervised Learning of 3D Morphable Models for Common Objects in Neural Feature Space-
[pdf]
[supp]
[bibtex]@InProceedings{Sommer_2025_CVPR, author = {Sommer, Leonhard and D\"unkel, Olaf and Theobalt, Christian and Kortylewski, Adam}, title = {Common3D: Self-Supervised Learning of 3D Morphable Models for Common Objects in Neural Feature Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6468-6479} }
RLAIF-V: Open-Source AI Feedback Leads to Super GPT-4V Trustworthiness-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Tianyu and Zhang, Haoye and Li, Qiming and Xu, Qixin and Yao, Yuan and Chen, Da and Lu, Xiaoman and Cui, Ganqu and Dang, Yunkai and He, Taiwen and Feng, Xiaocheng and Song, Jun and Zheng, Bo and Liu, Zhiyuan and Chua, Tat-Seng and Sun, Maosong}, title = {RLAIF-V: Open-Source AI Feedback Leads to Super GPT-4V Trustworthiness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19985-19995} }
ECVC: Exploiting Non-Local Correlations in Multiple Frames for Contextual Video Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Wei and Li, Junru and Zhang, Kai and Zhang, Li}, title = {ECVC: Exploiting Non-Local Correlations in Multiple Frames for Contextual Video Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7331-7341} }
LinGen: Towards High-Resolution Minute-Length Text-to-Video Generation with Linear Computational Complexity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Hongjie and Ma, Chih-Yao and Liu, Yen-Cheng and Hou, Ji and Xu, Tao and Wang, Jialiang and Juefei-Xu, Felix and Luo, Yaqiao and Zhang, Peizhao and Hou, Tingbo and Vajda, Peter and Jha, Niraj K. and Dai, Xiaoliang}, title = {LinGen: Towards High-Resolution Minute-Length Text-to-Video Generation with Linear Computational Complexity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2578-2588} }
EditSplat: Multi-View Fusion and Attention-Guided Optimization for View-Consistent 3D Scene Editing with 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Dong In and Park, Hyeongcheol and Seo, Jiyoung and Park, Eunbyung and Park, Hyunje and Baek, Ha Dam and Shin, Sangheon and Kim, Sangmin and Kim, Sangpil}, title = {EditSplat: Multi-View Fusion and Attention-Guided Optimization for View-Consistent 3D Scene Editing with 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11135-11145} }
SpatialCLIP: Learning 3D-aware Image Representations from Spatially Discriminative Language-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zehan and Zhou, Sashuai and He, Shaoxuan and Huang, Haifeng and Yang, Lihe and Zhang, Ziang and Cheng, Xize and Ji, Shengpeng and Jin, Tao and Zhao, Hengshuang and Zhao, Zhou}, title = {SpatialCLIP: Learning 3D-aware Image Representations from Spatially Discriminative Language}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29656-29666} }
Mono2Stereo: A Benchmark and Empirical Study for Stereo Conversion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Songsong and Chen, Yuxin and Qi, Zhongang and Xie, Zeke and Wang, Yifan and Wang, Lijun and Shan, Ying and Lu, Huchuan}, title = {Mono2Stereo: A Benchmark and Empirical Study for Stereo Conversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21847-21856} }
Towards Open-Vocabulary Audio-Visual Event Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jinxing and Guo, Dan and Guo, Ruohao and Mao, Yuxin and Hu, Jingjing and Zhong, Yiran and Chang, Xiaojun and Wang, Meng}, title = {Towards Open-Vocabulary Audio-Visual Event Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8362-8371} }
One-shot 3D Object Canonicalization based on Geometric and Semantic Consistency-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Li and Wang, Yujie and Chen, Wenzheng and Dai, Qiyu and Gao, Qingzhe and Qin, Xueying and Chen, Baoquan}, title = {One-shot 3D Object Canonicalization based on Geometric and Semantic Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16850-16859} }
Inst3D-LMM: Instance-Aware 3D Scene Understanding with Multi-modal Instruction Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Hanxun and Li, Wentong and Wang, Song and Chen, Junbo and Zhu, Jianke}, title = {Inst3D-LMM: Instance-Aware 3D Scene Understanding with Multi-modal Instruction Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14147-14157} }
S2Gaussian: Sparse-View Super-Resolution 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wan_2025_CVPR, author = {Wan, Yecong and Shao, Mingwen and Cheng, Yuanshuo and Zuo, Wangmeng}, title = {S2Gaussian: Sparse-View Super-Resolution 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {711-721} }
HIIF: Hierarchical Encoding based Implicit Image Function for Continuous Super-resolution-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yuxuan and Kwan, Ho Man and Peng, Tianhao and Gao, Ge and Zhang, Fan and Zhu, Xiaoqing and Sole, Joel and Bull, David}, title = {HIIF: Hierarchical Encoding based Implicit Image Function for Continuous Super-resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2289-2299} }
Motion Prompting: Controlling Video Generation with Motion Trajectories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Geng_2025_CVPR, author = {Geng, Daniel and Herrmann, Charles and Hur, Junhwa and Cole, Forrester and Zhang, Serena and Pfaff, Tobias and Lopez-Guevara, Tatiana and Aytar, Yusuf and Rubinstein, Michael and Sun, Chen and Wang, Oliver and Owens, Andrew and Sun, Deqing}, title = {Motion Prompting: Controlling Video Generation with Motion Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1-12} }
VERA: Explainable Video Anomaly Detection via Verbalized Learning of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Muchao and Liu, Weiyang and He, Pan}, title = {VERA: Explainable Video Anomaly Detection via Verbalized Learning of Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8679-8688} }
ProHOC: Probabilistic Hierarchical Out-of-Distribution Classification via Multi-Depth Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wallin_2025_CVPR, author = {Wallin, Erik and Kahl, Fredrik and Hammarstrand, Lars}, title = {ProHOC: Probabilistic Hierarchical Out-of-Distribution Classification via Multi-Depth Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20612-20621} }
CCIN: Compositional Conflict Identification and Neutralization for Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Likai and Zhao, Jian and Hu, Zechao and Yang, Zhengwei and Li, Hao and Jin, Lei and Wang, Zheng and Li, Xuelong}, title = {CCIN: Compositional Conflict Identification and Neutralization for Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3974-3983} }
CLIP is Strong Enough to Fight Back: Test-time Counterattacks towards Zero-shot Adversarial Robustness of CLIP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Songlong and Zhao, Zhengyu and Sebe, Nicu}, title = {CLIP is Strong Enough to Fight Back: Test-time Counterattacks towards Zero-shot Adversarial Robustness of CLIP}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15172-15182} }
OverLoCK: An Overview-first-Look-Closely-next ConvNet with Context-Mixing Dynamic Kernels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lou_2025_CVPR, author = {Lou, Meng and Yu, Yizhou}, title = {OverLoCK: An Overview-first-Look-Closely-next ConvNet with Context-Mixing Dynamic Kernels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {128-138} }
SoftShadow: Leveraging Soft Masks for Penumbra-Aware Shadow Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xinrui and Guo, Lanqing and Wang, Xiyu and Huang, Siyu and Wen, Bihan}, title = {SoftShadow: Leveraging Soft Masks for Penumbra-Aware Shadow Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23217-23226} }
Graph-Embedded Structure-Aware Perceptual Hashing for Neural Network Protection and Piracy Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Ruiheng and Chen, Haozhe and Zhao, Boyao and Chen, Kejiang and Zhang, Weiming}, title = {Graph-Embedded Structure-Aware Perceptual Hashing for Neural Network Protection and Piracy Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20169-20178} }
VTON-HandFit: Virtual Try-on for Arbitrary Hand Pose Guided by Hand Priors Embedding-
[pdf]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Yujie and Hu, Xiaobin and Jiang, Boyuan and Luo, Donghao and Peng, Xu and Wu, Kai and Xu, Chengming and Han, Wenhui and Jin, Taisong and Wang, Chengjie and Ji, Rongrong}, title = {VTON-HandFit: Virtual Try-on for Arbitrary Hand Pose Guided by Hand Priors Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22616-22626} }
Interleaved-Modal Chain-of-Thought-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Jun and Li, Yongqi and Cao, Ziqiang and Li, Wenjie}, title = {Interleaved-Modal Chain-of-Thought}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19520-19529} }
Uni-Renderer: Unifying Rendering and Inverse Rendering Via Dual Stream Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zhifei and Xu, Tianshuo and Ge, Wenhang and Wu, Leyi and Yan, Dongyu and He, Jing and Wang, Luozhou and Zeng, Lu and Zhang, Shunsi and Chen, Ying-Cong}, title = {Uni-Renderer: Unifying Rendering and Inverse Rendering Via Dual Stream Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26504-26513} }
Enhancing Adversarial Transferability with Checkpoints of a Single Model's Training-
[pdf]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Shixin and He, Chaoxiang and Ma, Xiaojing and Zhu, Bin Benjamin and Wang, Shuo and Hu, Hongsheng and Zhang, Dongmei and Yu, Linchen}, title = {Enhancing Adversarial Transferability with Checkpoints of a Single Model's Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20685-20694} }
POSTA: A Go-to Framework for Customized Artistic Poster Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Haoyu and Xu, Xiaojie and Li, Wenbo and Ren, Jingjing and Ye, Tian and Liu, Songhua and Chen, Ying-Cong and Zhu, Lei and Wang, Xinchao}, title = {POSTA: A Go-to Framework for Customized Artistic Poster Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28694-28704} }
NSD-Imagery: A Benchmark Dataset for Extending fMRI Vision Decoding Methods to Mental Imagery-
[pdf]
[supp]
[bibtex]@InProceedings{Kneeland_2025_CVPR, author = {Kneeland, Reese and Scotti, Paul S. and St-Yves, Ghislain and Breedlove, Jesse and Kay, Kendrick and Naselaris, Thomas}, title = {NSD-Imagery: A Benchmark Dataset for Extending fMRI Vision Decoding Methods to Mental Imagery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28852-28862} }
VLsI: Verbalized Layers-to-Interactions from Large to Small Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Byung-Kwan and Hachiuma, Ryo and Wang, Yu-Chiang Frank and Ro, Yong Man and Wu, Yueh-Hua}, title = {VLsI: Verbalized Layers-to-Interactions from Large to Small Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29545-29557} }
O-TPT: Orthogonality Constraints for Calibrating Test-time Prompt Tuning in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Sharifdeen_2025_CVPR, author = {Sharifdeen, Ashshak and Munir, Muhammad Akhtar and Baliah, Sanoojan and Khan, Salman and Khan, Muhammad Haris}, title = {O-TPT: Orthogonality Constraints for Calibrating Test-time Prompt Tuning in Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19942-19951} }
Just Dance with pi! A Poly-modal Inductor for Weakly-supervised Video Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Majhi_2025_CVPR, author = {Majhi, Snehashis and D'Amicantonio, Giacomo and Dantcheva, Antitza and Kong, Quan and Garattoni, Lorenzo and Francesca, Gianpiero and Bondarev, Egor and Bremond, Francois}, title = {Just Dance with pi! A Poly-modal Inductor for Weakly-supervised Video Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24265-24274} }
Flash3D: Super-scaling Point Transformers through Joint Hardware-Geometry Locality-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Liyan and Meyer, Gregory P. and Zhang, Zaiwei and Wolff, Eric M. and Vernaza, Paul}, title = {Flash3D: Super-scaling Point Transformers through Joint Hardware-Geometry Locality}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6595-6604} }
Analyzing the Synthetic-to-Real Domain Gap in 3D Hand Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zhuoran and Yang, Linlin and Sun, Pengzhan and Hui, Pan and Yao, Angela}, title = {Analyzing the Synthetic-to-Real Domain Gap in 3D Hand Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12255-12265} }
Feature4X: Bridging Any Monocular Video to 4D Agentic AI with Versatile Gaussian Feature Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Shijie and Ren, Hui and Weng, Yijia and Zhang, Shuwang and Wang, Zhen and Xu, Dejia and Fan, Zhiwen and You, Suya and Wang, Zhangyang and Guibas, Leonidas and Kadambi, Achuta}, title = {Feature4X: Bridging Any Monocular Video to 4D Agentic AI with Versatile Gaussian Feature Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14179-14190} }
Comprehensive Relighting: Generalizable and Consistent Monocular Human Relighting and Harmonization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Junying and Liu, Jingyuan and Sun, Xin and Singh, Krishna Kumar and Shu, Zhixin and Zhang, He and Yang, Jimei and Zhao, Nanxuan and Wang, Tuanfeng Y. and Chen, Simon S. and Neumann, Ulrich and Yoon, Jae Shin}, title = {Comprehensive Relighting: Generalizable and Consistent Monocular Human Relighting and Harmonization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {380-390} }
Hyperspectral Pansharpening via Diffusion Models with Iteratively Zero-Shot Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Jin-Liang and Huang, Ting-Zhu and Deng, Liang-Jian and Lin, Guang and Cao, Zihan and Li, Chao and Zhao, Qibin}, title = {Hyperspectral Pansharpening via Diffusion Models with Iteratively Zero-Shot Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12669-12678} }
EASEMVC:Efficient Dual Selection Mechanism for Deep Multi-View Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Baili and Dong, Zhibin and Liang, Ke and Liu, Suyuan and Wang, Siwei and Liu, Tianrui and Hu, Xingchen and Zhu, En and Liu, Xinwang}, title = {EASEMVC:Efficient Dual Selection Mechanism for Deep Multi-View Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20716-20726} }
Efficient Motion-Aware Video MLLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zijia and Huo, Yuqi and Yue, Tongtian and Guo, Longteng and Lu, Haoyu and Wang, Bingning and Chen, Weipeng and Liu, Jing}, title = {Efficient Motion-Aware Video MLLM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24159-24168} }
DSPNet: Dual-vision Scene Perception for Robust 3D Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Jingzhou and Liu, Yang and Chen, Weixing and Li, Zhen and Wang, Yaowei and Li, Guanbin and Lin, Liang}, title = {DSPNet: Dual-vision Scene Perception for Robust 3D Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14169-14178} }
Zero-Shot 4D Lidar Panoptic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yushan and O\v{s}ep, Aljo\v{s}a and Leal-Taix\'e, Laura and Meinhardt, Tim}, title = {Zero-Shot 4D Lidar Panoptic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24506-24517} }
MAtCha Gaussians: Atlas of Charts for High-Quality Geometry and Photorealism From Sparse Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guedon_2025_CVPR, author = {Guedon, Antoine and Ichikawa, Tomoki and Yamashita, Kohei and Nishino, Ko}, title = {MAtCha Gaussians: Atlas of Charts for High-Quality Geometry and Photorealism From Sparse Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6001-6011} }
Extreme Rotation Estimation in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bezalel_2025_CVPR, author = {Bezalel, Hana and Ankri, Dotan and Cai, Ruojin and Averbach-Elor, Hadar}, title = {Extreme Rotation Estimation in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1061-1070} }
ADU: Adaptive Detection of Unknown Categories in Black-Box Domain Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Yushan and Li, Guowen and Liang, Haoyuan and Zheng, Juepeng and Ye, Zhiyu}, title = {ADU: Adaptive Detection of Unknown Categories in Black-Box Domain Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30588-30598} }
EmotiveTalk: Expressive Talking Head Generation through Audio Information Decoupling and Emotional Video Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Haotian and Weng, Yuzhe and Li, Yueyan and Guo, Zilu and Du, Jun and Niu, Shutong and Ma, Jiefeng and He, Shan and Wu, Xiaoyan and Hu, Qiming and Yin, Bing and Liu, Cong and Liu, Qingfeng}, title = {EmotiveTalk: Expressive Talking Head Generation through Audio Information Decoupling and Emotional Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26212-26221} }
Traversing Distortion-Perception Tradeoff using a Single Score-Based Generative Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuhan and Bi, Suzhi and Zhang, Ying-Jun Angela and Yuan, Xiaojun}, title = {Traversing Distortion-Perception Tradeoff using a Single Score-Based Generative Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2377-2386} }
IceDiff: High Resolution and High-Quality Arctic Sea Ice Forecasting with Generative Diffusion Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Jingyi and Tu, Siwei and Yang, Weidong and Fei, Ben and Li, Shuhao and Liu, Keyi and Luo, Yeqi and Ma, Lipeng and Bai, Lei}, title = {IceDiff: High Resolution and High-Quality Arctic Sea Ice Forecasting with Generative Diffusion Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10567-10576} }
DTOS: Dynamic Time Object Sensing with Large Multimodal Model-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Jirui and Zhang, Jinrong and Liu, Shenglan and Xu, Luhao and Huang, Zhixiong and Huang, Gao}, title = {DTOS: Dynamic Time Object Sensing with Large Multimodal Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13810-13820} }
How to Merge Your Multimodal Models Over Time?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dziadzio_2025_CVPR, author = {Dziadzio, Sebastian and Udandarao, Vishaal and Roth, Karsten and Prabhu, Ameya and Akata, Zeynep and Albanie, Samuel and Bethge, Matthias}, title = {How to Merge Your Multimodal Models Over Time?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20479-20491} }
Identifying and Mitigating Position Bias of Multi-image Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Xinyu and Zou, Shu and Yang, Zhaoyuan and Zhang, Jing}, title = {Identifying and Mitigating Position Bias of Multi-image Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10599-10609} }
Unsupervised Foundation Model-Agnostic Slide-Level Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lenz_2025_CVPR, author = {Lenz, Tim and Neidlinger, Peter and Ligero, Marta and W\"olflein, Georg and van Treeck, Marko and Kather, Jakob N.}, title = {Unsupervised Foundation Model-Agnostic Slide-Level Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30807-30817} }
Exploring CLIP's Dense Knowledge for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Zhiwei and Meng, Yucong and Fu, Kexue and Tang, Feilong and Wang, Shuo and Song, Zhijian}, title = {Exploring CLIP's Dense Knowledge for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20223-20232} }
UNIALIGN: Scaling Multimodal Alignment within One Unified Model-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Bo and Li, Liulei and Wang, Yujia and Liu, Huafeng and Yao, Yazhou and Wang, Wenguan}, title = {UNIALIGN: Scaling Multimodal Alignment within One Unified Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29644-29655} }
ShowHowTo: Generating Scene-Conditioned Step-by-Step Visual Instructions-
[pdf]
[supp]
[bibtex]@InProceedings{Soucek_2025_CVPR, author = {Sou\v{c}ek, Tom\'a\v{s} and Gatti, Prajwal and Wray, Michael and Laptev, Ivan and Damen, Dima and Sivic, Josef}, title = {ShowHowTo: Generating Scene-Conditioned Step-by-Step Visual Instructions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27435-27445} }
Exploration-Driven Generative Interactive Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Savov_2025_CVPR, author = {Savov, Nedko and Kazemi, Naser and Mahdi, Mohammad and Paudel, Danda Pani and Wang, Xi and Van Gool, Luc}, title = {Exploration-Driven Generative Interactive Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27597-27607} }
Task-Agnostic Guided Feature Expansion for Class-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Bowen and Zhou, Da-Wei and Ye, Han-Jia and Zhan, De-Chuan}, title = {Task-Agnostic Guided Feature Expansion for Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10099-10109} }
ShowUI: One Vision-Language-Action Model for GUI Visual Agent-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Kevin Qinghong and Li, Linjie and Gao, Difei and Yang, Zhengyuan and Wu, Shiwei and Bai, Zechen and Lei, Stan Weixian and Wang, Lijuan and Shou, Mike Zheng}, title = {ShowUI: One Vision-Language-Action Model for GUI Visual Agent}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19498-19508} }
Let's Chorus: Partner-aware Hybrid Song-Driven 3D Head Animation-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Xiumei and Huang, Zikai and Xu, Wenhao and Xiao, Peng and Xu, Xuemiao and Zhang, Huaidong}, title = {Let's Chorus: Partner-aware Hybrid Song-Driven 3D Head Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5467-5476} }
Twinner: Shining Light on Digital Twins in a Few Snaps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zarzar_2025_CVPR, author = {Zarzar, Jesus and Monnier, Tom and Shapovalov, Roman and Vedaldi, Andrea and Novotny, David}, title = {Twinner: Shining Light on Digital Twins in a Few Snaps}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5859-5869} }
Infinity: Scaling Bitwise AutoRegressive Modeling for High-Resolution Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Jian and Liu, Jinlai and Jiang, Yi and Yan, Bin and Zhang, Yuqi and Yuan, Zehuan and Peng, Bingyue and Liu, Xiaobing}, title = {Infinity: Scaling Bitwise AutoRegressive Modeling for High-Resolution Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15733-15744} }
DreamText: High Fidelity Scene Text Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yibin and Zhang, Weizhong and Xu, Honghui and Jin, Cheng}, title = {DreamText: High Fidelity Scene Text Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28555-28563} }
MonoPlace3D: Learning 3D-Aware Object Placement for 3D Monocular Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parihar_2025_CVPR, author = {Parihar, Rishubh and Sarkar, Srinjay and Vora, Sarthak and Kundu, Jogendra Nath and Babu, R. Venkatesh}, title = {MonoPlace3D: Learning 3D-Aware Object Placement for 3D Monocular Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6531-6541} }
HumanDreamer: Generating Controllable Human-Motion Videos via Decoupled Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Boyuan and Wang, Xiaofeng and Ni, Chaojun and Zhao, Guosheng and Yang, Zhiqin and Zhu, Zheng and Zhang, Muyang and Zhou, Yukun and Chen, Xinze and Huang, Guan and Liu, Lihong and Wang, Xingang}, title = {HumanDreamer: Generating Controllable Human-Motion Videos via Decoupled Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12391-12401} }
ReVisionLLM: Recursive Vision-Language Model for Temporal Grounding in Hour-Long Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hannan_2025_CVPR, author = {Hannan, Tanveer and Islam, Md Mohaiminul and Gu, Jindong and Seidl, Thomas and Bertasius, Gedas}, title = {ReVisionLLM: Recursive Vision-Language Model for Temporal Grounding in Hour-Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19012-19022} }
ArtiFade: Learning to Generate High-quality Subject from Blemished Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Shuya and Hao, Shaozhe and Cao, Yukang and Wong, Kwan-Yee K.}, title = {ArtiFade: Learning to Generate High-quality Subject from Blemished Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13167-13177} }
SGCR: Spherical Gaussians for Efficient 3D Curve Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Xinran and Ji, Donghao and Li, Yuanqi and Guo, Jie and Guo, Yanwen and Xie, Junyuan}, title = {SGCR: Spherical Gaussians for Efficient 3D Curve Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5793-5803} }
Prompting Depth Anything for 4K Resolution Accurate Metric Depth Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Haotong and Peng, Sida and Chen, Jingxiao and Peng, Songyou and Sun, Jiaming and Liu, Minghuan and Bao, Hujun and Feng, Jiashi and Zhou, Xiaowei and Kang, Bingyi}, title = {Prompting Depth Anything for 4K Resolution Accurate Metric Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17070-17080} }
ProKeR: A Kernel Perspective on Few-Shot Adaptation of Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bendou_2025_CVPR, author = {Bendou, Yassir and Ouasfi, Amine and Gripon, Vincent and Boukhayma, Adnane}, title = {ProKeR: A Kernel Perspective on Few-Shot Adaptation of Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25092-25102} }
Advancing Semantic Future Prediction through Multimodal Visual Sequence Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karypidis_2025_CVPR, author = {Karypidis, Efstathios and Kakogeorgiou, Ioannis and Gidaris, Spyros and Komodakis, Nikos}, title = {Advancing Semantic Future Prediction through Multimodal Visual Sequence Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3793-3803} }
GET: Unlocking the Multi-modal Potential of CLIP for Generalized Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Enguang and Peng, Zhimao and Xie, Zhengyuan and Yang, Fei and Liu, Xialei and Cheng, Ming-Ming}, title = {GET: Unlocking the Multi-modal Potential of CLIP for Generalized Category Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20296-20306} }
On the Out-Of-Distribution Generalization of Large Multimodal Models-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xingxuan and Li, Jiansheng and Chu, Wenjing and hai, junjia and Xu, Renzhe and Yang, Yuqing and Guan, Shikai and Xu, Jiazheng and Jing, Liping and Cui, Peng}, title = {On the Out-Of-Distribution Generalization of Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10315-10326} }
Enhanced Contrastive Learning with Multi-view Longitudinal Data for Chest X-ray Report Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Kang and Ma, Zhuoqi and Kang, Xiaolu and Li, Yunan and Xie, Kun and Jiao, Zhicheng and Miao, Qiguang}, title = {Enhanced Contrastive Learning with Multi-view Longitudinal Data for Chest X-ray Report Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10348-10359} }
MonoTAKD: Teaching Assistant Knowledge Distillation for Monocular 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Hou-I and Wu, Christine and Cheng, Jen-Hao and Chai, Wenhao and Wang, Shian-Yun and Liu, Gaowen and Latapie, Hugo and Wu, Jhih-Ciang and Hwang, Jenq-Neng and Shuai, Hong-Han and Cheng, Wen-Huang}, title = {MonoTAKD: Teaching Assistant Knowledge Distillation for Monocular 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22266-22275} }
Test-Time Domain Generalization via Universe Learning: A Multi-Graph Matching Approach for Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2025_CVPR, author = {Lv, Xingguo and Dong, Xingbo and Wang, Liwen and Yang, Jiewen and Zhao, Lei and Pu, Bin and Jin, Zhe and Li, Xuejun}, title = {Test-Time Domain Generalization via Universe Learning: A Multi-Graph Matching Approach for Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15621-15631} }
Easy-editable Image Vectorization with Multi-layer Multi-scale Distributed Visual Feature Embedding-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Ye and Hu, Zhangli and Zhao, Zhongyin and Zhu, Yupeng and Shi, Yue and Xiong, Yuxuan and Ni, Bingbing}, title = {Easy-editable Image Vectorization with Multi-layer Multi-scale Distributed Visual Feature Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23345-23354} }
Acquire and then Adapt: Squeezing out Text-to-Image Model for Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Junyuan and Wu, Xinyi and Yang, Yongxing and Zhu, Congchao and Wang, Song and Wu, Zhenyao}, title = {Acquire and then Adapt: Squeezing out Text-to-Image Model for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23195-23206} }
DeDe: Detecting Backdoor Samples for SSL Encoders via Decoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hou_2025_CVPR, author = {Hou, Sizai and Li, Songze and Yao, Duanyi}, title = {DeDe: Detecting Backdoor Samples for SSL Encoders via Decoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20675-20684} }
Towards Scalable Human-aligned Benchmark for Text-guided Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ryu_2025_CVPR, author = {Ryu, Suho and Kim, Kihyun and Baek, Eugene and Shin, Dongsoo and Lee, Joonseok}, title = {Towards Scalable Human-aligned Benchmark for Text-guided Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18292-18301} }
Devils in Middle Layers of Large Vision-Language Models: Interpreting, Detecting and Mitigating Object Hallucinations via Attention Lens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Zhangqi and Chen, Junkai and Zhu, Beier and Luo, Tingjin and Shen, Yankun and Yang, Xu}, title = {Devils in Middle Layers of Large Vision-Language Models: Interpreting, Detecting and Mitigating Object Hallucinations via Attention Lens}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25004-25014} }
SpectroMotion: Dynamic 3D Reconstruction of Specular Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Cheng-De and Chang, Chen-Wei and Liu, Yi-Ruei and Lee, Jie-Ying and Huang, Jiun-Long and Tseng, Yu-Chee and Liu, Yu-Lun}, title = {SpectroMotion: Dynamic 3D Reconstruction of Specular Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21328-21338} }
Scaling Inference Time Compute for Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Nanye and Tong, Shangyuan and Jia, Haolin and Hu, Hexiang and Su, Yu-Chuan and Zhang, Mingda and Yang, Xuan and Li, Yandong and Jaakkola, Tommi and Jia, Xuhui and Xie, Saining}, title = {Scaling Inference Time Compute for Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2523-2534} }
Coeff-Tuning: A Graph Filter Subspace View for Tuning Attention-Based Large Models-
[pdf]
[supp]
[bibtex]@InProceedings{Miao_2025_CVPR, author = {Miao, Zichen and Chen, Wei and Qiu, Qiang}, title = {Coeff-Tuning: A Graph Filter Subspace View for Tuning Attention-Based Large Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20146-20157} }
VTON 360: High-Fidelity Virtual Try-On from Any Viewing Direction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Zijian and Ning, Yuwei and Qin, Yipeng and Wang, Guangrun and Yang, Sibei and Lin, Liang and Li, Guanbin}, title = {VTON 360: High-Fidelity Virtual Try-On from Any Viewing Direction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26388-26398} }
MVBoost: Boost 3D Reconstruction with Multi-View Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xiangyu and Zhang, Xiaomei and Ma, Zhiyuan and Zhu, Xiangyu and Lei, Zhen}, title = {MVBoost: Boost 3D Reconstruction with Multi-View Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21664-21673} }
Chat-based Person Retrieval via Dialogue-Refined Cross-Modal Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2025_CVPR, author = {Bai, Yang and Ji, Yucheng and Cao, Min and Wang, Jinqiao and Ye, Mang}, title = {Chat-based Person Retrieval via Dialogue-Refined Cross-Modal Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3952-3962} }
Category-Agnostic Neural Object Rigging-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Guangzhao and Geng, Chen and Wu, Shangzhe and Wu, Jiajun}, title = {Category-Agnostic Neural Object Rigging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22078-22088} }
AVF-MAE++: Scaling Affective Video Facial Masked Autoencoders via Efficient Audio-Visual Self-Supervised Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Xuecheng and Sun, Heli and Wang, Yifan and Nie, Jiayu and Zhang, Jie and Wang, Yabing and Xue, Junxiao and He, Liang}, title = {AVF-MAE++: Scaling Affective Video Facial Masked Autoencoders via Efficient Audio-Visual Self-Supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9142-9153} }
POPEN: Preference-Based Optimization and Ensemble for LVLM-Based Reasoning Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Lanyun and Chen, Tianrun and Xu, Qianxiong and Liu, Xuanyi and Ji, Deyi and Wu, Haiyang and Soh, De Wen and Liu, Jun}, title = {POPEN: Preference-Based Optimization and Ensemble for LVLM-Based Reasoning Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30231-30240} }
DiffusionSfM: Predicting Structure and Motion via Ray Origin and Endpoint Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Qitao and Lin, Amy and Tan, Jeff and Zhang, Jason Y. and Ramanan, Deva and Tulsiani, Shubham}, title = {DiffusionSfM: Predicting Structure and Motion via Ray Origin and Endpoint Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6317-6326} }
GroundingFace: Fine-grained Face Understanding via Pixel Grounding Multimodal Large Language Model-
[pdf]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Yue and Zhang, Jiangning and Zhu, Junwei and Hou, Runze and Ji, Xiaozhong and Lin, Chuming and Hu, Xiaobin and Xue, Zhucun and Liu, Yong}, title = {GroundingFace: Fine-grained Face Understanding via Pixel Grounding Multimodal Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3942-3951} }
Self-Supervised Cross-View Correspondence with Predictive Cycle Consistency-
[pdf]
[supp]
[bibtex]@InProceedings{Baade_2025_CVPR, author = {Baade, Alan and Chen, Changan}, title = {Self-Supervised Cross-View Correspondence with Predictive Cycle Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16753-16763} }
MMAudio: Taming Multimodal Joint Training for High-Quality Video-to-Audio Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Ho Kei and Ishii, Masato and Hayakawa, Akio and Shibuya, Takashi and Schwing, Alexander and Mitsufuji, Yuki}, title = {MMAudio: Taming Multimodal Joint Training for High-Quality Video-to-Audio Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28901-28911} }
CryptoFace: End-to-End Encrypted Face Recognition-
[pdf]
[bibtex]@InProceedings{Ao_2025_CVPR, author = {Ao, Wei and Boddeti, Vishnu Naresh}, title = {CryptoFace: End-to-End Encrypted Face Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19197-19206} }
Relation-Rich Visual Document Generator for Visual Information Extraction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Zi-Han and Lin, Chien-Wei and Li, Wei-Hua and Liu, Hsuan-Tung and Yeh, Yi-Ren and Chen, Chu-Song}, title = {Relation-Rich Visual Document Generator for Visual Information Extraction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14449-14459} }
DynFocus: Dynamic Cooperative Network Empowers LLMs with Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Yudong and Guo, Qingpei and Pan, Liyuan and Liu, Liu and Guan, Yu and Yang, Ming}, title = {DynFocus: Dynamic Cooperative Network Empowers LLMs with Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8512-8522} }
Mimic In-Context Learning for Multimodal Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yuchu and Fu, Jiale and Hao, Chenduo and Hu, Xinting and Peng, Yingzhe and Geng, Xin and Yang, Xu}, title = {Mimic In-Context Learning for Multimodal Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29825-29835} }
PromptHash:Affinity-Prompted Collaborative Cross-Modal Learning for Adaptive Hashing Retrieval-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zou_2025_CVPR, author = {Zou, Qiang and Cheng, Shuli and Chen, Jiayi}, title = {PromptHash:Affinity-Prompted Collaborative Cross-Modal Learning for Adaptive Hashing Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19649-19658} }
V-Stylist: Video Stylization via Collaboration and Reflection of MLLM Agents-
[pdf]
[supp]
[bibtex]@InProceedings{Yue_2025_CVPR, author = {Yue, Zhengrong and Zhuang, Shaobin and Li, Kunchang and Ding, Yanbo and Wang, Yali}, title = {V-Stylist: Video Stylization via Collaboration and Reflection of MLLM Agents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3195-3205} }
Vision-Language Models Do Not Understand Negation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alhamoud_2025_CVPR, author = {Alhamoud, Kumail and Alshammari, Shaden and Tian, Yonglong and Li, Guohao and Torr, Philip H.S. and Kim, Yoon and Ghassemi, Marzyeh}, title = {Vision-Language Models Do Not Understand Negation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29612-29622} }
ID-Patch: Robust ID Association for Group Photo Personalization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yimeng and Zhi, Tiancheng and Liu, Jing and Sang, Shen and Jiang, Liming and Yan, Qing and Liu, Sijia and Luo, Linjie}, title = {ID-Patch: Robust ID Association for Group Photo Personalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2986-2996} }
iG-6DoF: Model-free 6DoF Pose Estimation for Unseen Object via Iterative 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Tuo and Luo, Fei and Qin, Jiongming and Jiang, Yu and Wang, Yusen and Xiao, Chunxia}, title = {iG-6DoF: Model-free 6DoF Pose Estimation for Unseen Object via Iterative 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6436-6446} }
NexusGS: Sparse View Synthesis with Epipolar Depth Priors in 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Yulong and Jiang, Zicheng and He, Shengfeng and Sun, Yandu and Dong, Junyu and Zhang, Huaidong and Du, Yong}, title = {NexusGS: Sparse View Synthesis with Epipolar Depth Priors in 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26800-26809} }
HyperNet Fields: Efficiently Training Hypernetworks without Ground Truth by Learning Weight Trajectories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hedlin_2025_CVPR, author = {Hedlin, Eric and Hayat, Munawar and Porikli, Fatih and Yi, Kwang Moo and Mahajan, Shweta}, title = {HyperNet Fields: Efficiently Training Hypernetworks without Ground Truth by Learning Weight Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22129-22138} }
Universal Scene Graph Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Shengqiong and Fei, Hao and Chua, Tat-seng}, title = {Universal Scene Graph Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14158-14168} }
RICCARDO: Radar Hit Prediction and Convolution for Camera-Radar 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Long_2025_CVPR, author = {Long, Yunfei and Kumar, Abhinav and Liu, Xiaoming and Morris, Daniel}, title = {RICCARDO: Radar Hit Prediction and Convolution for Camera-Radar 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22276-22285} }
ForestLPR: LiDAR Place Recognition in Forests Attentioning Multiple BEV Density Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Yanqing and Tuna, Turcan and Hutter, Marco and Cadena, Cesar and Zheng, Nanning}, title = {ForestLPR: LiDAR Place Recognition in Forests Attentioning Multiple BEV Density Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6659-6669} }
BLADE: Single-view Body Mesh Estimation through Accurate Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shengze and Li, Jiefeng and Li, Tianye and Yuan, Ye and Fuchs, Henry and Nagano, Koki and De Mello, Shalini and Stengel, Michael}, title = {BLADE: Single-view Body Mesh Estimation through Accurate Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21991-22000} }
AdaMMS: Model Merging for Heterogeneous Multimodal Large Language Models with Unsupervised Coefficient Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Yiyang and Wang, Xiaochen and Chen, Chi and Ye, Jiabo and Wang, Yiru and Li, Peng and Yan, Ming and Zhang, Ji and Huang, Fei and Sui, Zhifang and Sun, Maosong and Liu, Yang}, title = {AdaMMS: Model Merging for Heterogeneous Multimodal Large Language Models with Unsupervised Coefficient Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9413-9422} }
MoEE: Mixture of Emotion Experts for Audio-Driven Portrait Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Huaize and Sun, Wenzhang and Di, Donglin and Sun, Shibo and Yang, Jiahui and Zou, Changqing and Bao, Hujun}, title = {MoEE: Mixture of Emotion Experts for Audio-Driven Portrait Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26222-26231} }
ReCap: Better Gaussian Relighting with Cross-Environment Captures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jingzhi and Wu, Zongwei and Zamfir, Eduard and Timofte, Radu}, title = {ReCap: Better Gaussian Relighting with Cross-Environment Captures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21307-21316} }
Split Adaptation for Pre-trained Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Lixu and Shang, Bingqi and Li, Yi and Mohapatra, Payal and Dong, Wei and Wang, Xiao and Zhu, Qi}, title = {Split Adaptation for Pre-trained Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20092-20102} }
SpatialLLM: A Compound 3D-Informed Design towards Spatially-Intelligent Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Wufei and Ye, Luoxin and de Melo, Celso M and Yuille, Alan and Chen, Jieneng}, title = {SpatialLLM: A Compound 3D-Informed Design towards Spatially-Intelligent Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17249-17260} }
SLVR: Super-Light Visual Reconstruction via Blueprint Controllable Convolutions and Exploring Feature Diversity Representation-
[pdf]
[bibtex]@InProceedings{Ni_2025_CVPR, author = {Ni, Ning and Zhang, Libao}, title = {SLVR: Super-Light Visual Reconstruction via Blueprint Controllable Convolutions and Exploring Feature Diversity Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {400-410} }
Vision-Language Embodiment for Monocular Depth Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinchang and Lu, Guoyu}, title = {Vision-Language Embodiment for Monocular Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29479-29489} }
Layered Image Vectorization via Semantic Simplification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhenyu and Huang, Jianxi and Sun, Zhida and Gong, Yuanhao and Cohen-Or, Daniel and Lu, Min}, title = {Layered Image Vectorization via Semantic Simplification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7728-7738} }
Learning Occlusion-Robust Vision Transformers for Real-Time UAV Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, You and Wang, Xucheng and Yang, Xiangyang and Liu, Mengyuan and Zeng, Dan and Ye, Hengzhou and Li, Shuiwang}, title = {Learning Occlusion-Robust Vision Transformers for Real-Time UAV Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17103-17113} }
Plug-and-Play Versatile Compressed Video Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_CVPR, author = {Zeng, Huimin and Li, Jiacheng and Xiong, Zhiwei}, title = {Plug-and-Play Versatile Compressed Video Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17767-17777} }
UltraFusion: Ultra High Dynamic Imaging using Exposure Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zixuan and Wang, Yujin and Cai, Xin and You, Zhiyuan and Lu, Zheming and Zhang, Fan and Guo, Shi and Xue, Tianfan}, title = {UltraFusion: Ultra High Dynamic Imaging using Exposure Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16111-16121} }
Hearing Anywhere in Any Environment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xiulong and Kumar, Anurag and Calamia, Paul and Amengual, Sebastia V. and Murdock, Calvin and Ananthabhotla, Ishwarya and Robinson, Philip and Shlizerman, Eli and Ithapu, Vamsi Krishna and Gao, Ruohan}, title = {Hearing Anywhere in Any Environment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5732-5741} }
Automated Proof of Polynomial Inequalities via Reinforcement Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Banglong and Qi, Niuniu and Zeng, Xia and Dehbi, Lydia and Yang, Zhengfeng}, title = {Automated Proof of Polynomial Inequalities via Reinforcement Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5052-5060} }
Noise-Resistant Video Anomaly Detection via RGB Error-Guided Multiscale Predictive Coding and Dynamic Memory-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Han and Du, Wenli and Liao, Peng and Wang, Bing and Fan, Siyuan}, title = {Noise-Resistant Video Anomaly Detection via RGB Error-Guided Multiscale Predictive Coding and Dynamic Memory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19109-19119} }
Frequency Dynamic Convolution for Dense Image Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Linwei and Gu, Lin and Li, Liang and Yan, Chenggang and Fu, Ying}, title = {Frequency Dynamic Convolution for Dense Image Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30178-30188} }
IDEA: Inverted Text with Cooperative Deformable Aggregation for Multi-modal Object Re-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuhao and Lv, Yongfeng and Zhang, Pingping and Lu, Huchuan}, title = {IDEA: Inverted Text with Cooperative Deformable Aggregation for Multi-modal Object Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29701-29710} }
SAM-I2V: Upgrading SAM to Support Promptable Video Segmentation with Less than 0.2% Training Cost-
[pdf]
[supp]
[bibtex]@InProceedings{Mei_2025_CVPR, author = {Mei, Haiyang and Zhang, Pengyu and Shou, Mike Zheng}, title = {SAM-I2V: Upgrading SAM to Support Promptable Video Segmentation with Less than 0.2\% Training Cost}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3417-3426} }
GroupMamba: Efficient Group-Based Visual State Space Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shaker_2025_CVPR, author = {Shaker, Abdelrahman and Wasim, Syed Talal and Khan, Salman and Gall, Juergen and Khan, Fahad Shahbaz}, title = {GroupMamba: Efficient Group-Based Visual State Space Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14912-14922} }
How Do I Do That? Synthesizing 3D Hand Motion and Contacts for Everyday Interactions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Prakash_2025_CVPR, author = {Prakash, Aditya and Lundell, Benjamin and Andreychuk, Dmitry and Forsyth, David and Gupta, Saurabh and Sawhney, Harpreet}, title = {How Do I Do That? Synthesizing 3D Hand Motion and Contacts for Everyday Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7026-7036} }
Escaping Plato's Cave: Towards the Alignment of 3D and Text Latent Spaces-
[pdf]
[supp]
[bibtex]@InProceedings{Hadgi_2025_CVPR, author = {Hadgi, Souhail and Moschella, Luca and Santilli, Andrea and Gomez, Diego and Huang, Qixing and Rodol\`a, Emanuele and Melzi, Simone and Ovsjanikov, Maks}, title = {Escaping Plato's Cave: Towards the Alignment of 3D and Text Latent Spaces}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19825-19835} }
Consistency Posterior Sampling for Diverse Image Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Purohit_2025_CVPR, author = {Purohit, Vishal and Repasky, Matthew and Lu, Jianfeng and Qiu, Qiang and Xie, Yao and Cheng, Xiuyuan}, title = {Consistency Posterior Sampling for Diverse Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28327-28336} }
IMFine: 3D Inpainting via Geometry-guided Multi-view Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Zhihao and Huo, Dong and Zhou, Yuhongze and Min, Yan and Lu, Juwei and Zuo, Xinxin}, title = {IMFine: 3D Inpainting via Geometry-guided Multi-view Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26694-26703} }
ActiveGAMER: Active GAussian Mapping through Efficient Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Liyan and Zhan, Huangying and Chen, Kevin and Xu, Xiangyu and Yan, Qingan and Cai, Changjiang and Xu, Yi}, title = {ActiveGAMER: Active GAussian Mapping through Efficient Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16486-16497} }
DeepCompress-ViT: Rethinking Model Compression to Enhance Efficiency of Vision Transformers at the Edge-
[pdf]
[bibtex]@InProceedings{Ahmed_2025_CVPR, author = {Ahmed, Sabbir and Al Arafat, Abdullah and Najafi, Deniz and Mahmood, Akhlak and Rizve, Mamshad Nayeem and Al Nahian, Mohaiminul and Zhou, Ranyang and Angizi, Shaahin and Rakin, Adnan Siraj}, title = {DeepCompress-ViT: Rethinking Model Compression to Enhance Efficiency of Vision Transformers at the Edge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30147-30156} }
EvOcc: Accurate Semantic Occupancy for Automated Driving Using Evidence Theory-
[pdf]
[supp]
[bibtex]@InProceedings{Kalble_2025_CVPR, author = {K\"alble, Jonas and Wirges, Sascha and Tatarchenko, Maxim and Ilg, Eddy}, title = {EvOcc: Accurate Semantic Occupancy for Automated Driving Using Evidence Theory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27467-27476} }
Positive2Negative: Breaking the Information-Lossy Barrier in Self-Supervised Single Image Denoising-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Tong and Wang, Lizhi and Xu, Zhiyuan and Zhu, Lin and Lu, Wanxuan and Huang, Hua}, title = {Positive2Negative: Breaking the Information-Lossy Barrier in Self-Supervised Single Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17924-17934} }
Towards Continual Universal Segmentation-
[pdf]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Zihan and Wang, Zilei and Wang, Xu}, title = {Towards Continual Universal Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29417-29427} }
PGC: Physics-Based Gaussian Cloth from a Single Pose-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Michelle and Chiang, Matt Jen-Yuan and Santesteban, Igor and Sarafianos, Nikolaos and Chen, Hsiao-yu and Halimi, Oshri and Bo\v{z}i\v{c}, Alja\v{z} and Saito, Shunsuke and Wu, Jiajun and Liu, C. Karen and Stuyck, Tuur and Larionov, Egor}, title = {PGC: Physics-Based Gaussian Cloth from a Single Pose}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21215-21225} }
Joint Vision-Language Social Bias Removal for CLIP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Haoyu and Guo, Yangyang and Kankanhalli, Mohan}, title = {Joint Vision-Language Social Bias Removal for CLIP}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4246-4255} }
MV-DUSt3R+: Single-Stage Scene Reconstruction from Sparse Views In 2 Seconds-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Zhenggang and Fan, Yuchen and Wang, Dilin and Xu, Hongyu and Ranjan, Rakesh and Schwing, Alexander and Yan, Zhicheng}, title = {MV-DUSt3R+: Single-Stage Scene Reconstruction from Sparse Views In 2 Seconds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5283-5293} }
Explicit Depth-Aware Blurry Video Frame Interpolation Guided by Differential Curves-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Zaoming and Lei, Pengcheng and Wang, Tingting and Fang, Faming and Zhang, Junkang and Huang, Yaomin and Song, Haichuan}, title = {Explicit Depth-Aware Blurry Video Frame Interpolation Guided by Differential Curves}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1994-2004} }
OFER: Occluded Face Expression Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Selvaraju_2025_CVPR, author = {Selvaraju, Pratheba and Abrevaya, Victoria Fernandez and Bolkart, Timo and Akkerman, Rick and Ding, Tianyu and Amjadi, Faezeh and Zharkov, Ilya}, title = {OFER: Occluded Face Expression Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26985-26995} }
SeaLion: Semantic Part-Aware Latent Point Diffusion Models for 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Dekai and Di, Yan and Gavranovic, Stefan and Ilic, Slobodan}, title = {SeaLion: Semantic Part-Aware Latent Point Diffusion Models for 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11789-11798} }
MonSter: Marry Monodepth to Stereo Unleashes Power-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Junda and Liu, Longliang and Xu, Gangwei and Wang, Xianqi and Zhang, Zhaoxing and Deng, Yong and Zang, Jinliang and Chen, Yurui and Cai, Zhipeng and Yang, Xin}, title = {MonSter: Marry Monodepth to Stereo Unleashes Power}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6273-6282} }
Toward Real-world BEV Perception: Depth Uncertainty Estimation via Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Shu-Wei and Tsai, Yi-Hsuan and Chen, Yi-Ting}, title = {Toward Real-world BEV Perception: Depth Uncertainty Estimation via Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17124-17133} }
Efficient Fine-Tuning and Concept Suppression for Pruned Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shirkavand_2025_CVPR, author = {Shirkavand, Reza and Yu, Peiran and Gao, Shangqian and Somepalli, Gowthami and Goldstein, Tom and Huang, Heng}, title = {Efficient Fine-Tuning and Concept Suppression for Pruned Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18619-18629} }
Cubify Anything: Scaling Indoor 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lazarow_2025_CVPR, author = {Lazarow, Justin and Griffiths, David and Kohavi, Gefen and Crespo, Francisco and Dehghan, Afshin}, title = {Cubify Anything: Scaling Indoor 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22225-22233} }
WildGS-SLAM: Monocular Gaussian Splatting SLAM in Dynamic Environments-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Jianhao and Zhu, Zihan and Bieri, Valentin and Pollefeys, Marc and Peng, Songyou and Armeni, Iro}, title = {WildGS-SLAM: Monocular Gaussian Splatting SLAM in Dynamic Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11461-11471} }
A Tale of Two Classes: Adapting Supervised Contrastive Learning to Binary Imbalanced Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mildenberger_2025_CVPR, author = {Mildenberger, David and Hager, Paul and Rueckert, Daniel and Menten, Martin J.}, title = {A Tale of Two Classes: Adapting Supervised Contrastive Learning to Binary Imbalanced Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10305-10314} }
DEAL: Data-Efficient Adversarial Learning for High-Quality Infrared Imaging-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhu and Wang, Zijun and Liu, Jinyuan and Meng, Fanqi and Ma, Long and Liu, Risheng}, title = {DEAL: Data-Efficient Adversarial Learning for High-Quality Infrared Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28198-28207} }
RePerformer: Immersive Human-centric Volumetric Videos from Playback to Photoreal Reperformance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yuheng and Shen, Zhehao and Guo, Chengcheng and Hong, Yu and Su, Zhuo and Zhang, Yingliang and Habermann, Marc and Xu, Lan}, title = {RePerformer: Immersive Human-centric Volumetric Videos from Playback to Photoreal Reperformance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11349-11360} }
CheXWorld: Exploring Image World Modeling for Radiograph Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2025_CVPR, author = {Yue, Yang and Wang, Yulin and Tao, Chenxin and Liu, Pan and Song, Shiji and Huang, Gao}, title = {CheXWorld: Exploring Image World Modeling for Radiograph Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20778-20788} }
Towards Long-Horizon Vision-Language Navigation: Platform, Benchmark and Method-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Xinshuai and Chen, Weixing and Liu, Yang and Chen, Weikai and Li, Guanbin and Lin, Liang}, title = {Towards Long-Horizon Vision-Language Navigation: Platform, Benchmark and Method}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12078-12088} }
Learning Class Prototypes for Unified Sparse-Supervised 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yun and Hui, Le and Yang, Hang and Qian, Jianjun and Xie, Jin and Yang, Jian}, title = {Learning Class Prototypes for Unified Sparse-Supervised 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9911-9920} }
BimArt: A Unified Approach for the Synthesis of 3D Bimanual Interaction with Articulated Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Wanyue and Dabral, Rishabh and Golyanik, Vladislav and Choutas, Vasileios and Alvarado, Eduardo and Beeler, Thabo and Habermann, Marc and Theobalt, Christian}, title = {BimArt: A Unified Approach for the Synthesis of 3D Bimanual Interaction with Articulated Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27694-27705} }
Open-World Amodal Appearance Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ao_2025_CVPR, author = {Ao, Jiayang and Jiang, Yanbei and Ke, Qiuhong and Ehinger, Krista A.}, title = {Open-World Amodal Appearance Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6490-6499} }
AIGV-Assessor: Benchmarking and Evaluating the Perceptual Quality of Text-to-Video Generation with LMM-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jiarui and Duan, Huiyu and Zhai, Guangtao and Wang, Juntong and Min, Xiongkuo}, title = {AIGV-Assessor: Benchmarking and Evaluating the Perceptual Quality of Text-to-Video Generation with LMM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18869-18880} }
CoSpace: Benchmarking Continuous Space Perception Ability for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yiqi and Wang, Ziyue and Zhang, Can and Li, Peng and Liu, Yang}, title = {CoSpace: Benchmarking Continuous Space Perception Ability for Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29569-29579} }
Autoregressive Distillation of Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Yeongmin and Anagnostidis, Sotiris and Du, Yuming and Sch\"onfeld, Edgar and Kohler, Jonas and Georgopoulos, Markos and Pumarola, Albert and Thabet, Ali and Sanakoyeu, Artsiom}, title = {Autoregressive Distillation of Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15745-15756} }
RivuletMLP: An MLP-based Architecture for Efficient Compressed Video Quality Enhancement-
[pdf]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Gang and Wang, Weiran and Quan, Guancheng and Wang, Shihao and Zhou, Dajiang and Li, Yunsong}, title = {RivuletMLP: An MLP-based Architecture for Efficient Compressed Video Quality Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7342-7352} }
OmniManip: Towards General Robotic Manipulation via Object-Centric Interaction Primitives as Spatial Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2025_CVPR, author = {Pan, Mingjie and Zhang, Jiyao and Wu, Tianshu and Zhao, Yinghao and Gao, Wenlong and Dong, Hao}, title = {OmniManip: Towards General Robotic Manipulation via Object-Centric Interaction Primitives as Spatial Constraints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17359-17369} }
FreeTimeGS: Free Gaussian Primitives at Anytime Anywhere for Dynamic Scene Reconstruction-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yifan and Yang, Peishan and Xu, Zhen and Sun, Jiaming and Zhang, Zhanhua and Chen, Yong and Bao, Hujun and Peng, Sida and Zhou, Xiaowei}, title = {FreeTimeGS: Free Gaussian Primitives at Anytime Anywhere for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21750-21760} }
Show and Tell: Visually Explainable Deep Neural Nets via Spatially-Aware Concept Bottleneck Models-
[pdf]
[supp]
[bibtex]@InProceedings{Benou_2025_CVPR, author = {Benou, Itay and Raviv, Tammy Riklin}, title = {Show and Tell: Visually Explainable Deep Neural Nets via Spatially-Aware Concept Bottleneck Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30063-30072} }
DiscoVLA: Discrepancy Reduction in Vision, Language, and Alignment for Parameter-Efficient Video-Text Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Leqi and Gong, Guoqiang and Hao, Tianxiang and He, Tao and Zhang, Yifeng and Liu, Pengzhang and Zhao, Sicheng and Han, Jungong and Ding, Guiguang}, title = {DiscoVLA: Discrepancy Reduction in Vision, Language, and Alignment for Parameter-Efficient Video-Text Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19702-19712} }
Reanimating Images using Neural Representations of Dynamic Stimuli-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yeung_2025_CVPR, author = {Yeung, Jacob and Luo, Andrew F. and Sarch, Gabriel and Henderson, Margaret M. and Ramanan, Deva and Tarr, Michael J.}, title = {Reanimating Images using Neural Representations of Dynamic Stimuli}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5331-5343} }
Visual-Instructed Degradation Diffusion for All-in-One Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Wenyang and Qin, Haina and Chen, Zewen and Wang, Libin and Zheng, Dandan and Li, Yuming and Liu, Yufan and Li, Bing and Hu, Weiming}, title = {Visual-Instructed Degradation Diffusion for All-in-One Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12764-12777} }
Insightful Instance Features for 3D Instance Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Roh_2025_CVPR, author = {Roh, Wonseok and Jung, Hwanhee and Nam, Giljoo and Lee, Dong In and Park, Hyeongcheol and Yoon, Sang Ho and Joo, Jungseock and Kim, Sangpil}, title = {Insightful Instance Features for 3D Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14057-14067} }
Learning 4D Panoptic Scene Graph Generation from Rich 2D Visual Scene-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Shengqiong and Fei, Hao and Yang, Jingkang and Li, Xiangtai and Li, Juncheng and Zhang, Hanwang and Chua, Tat-seng}, title = {Learning 4D Panoptic Scene Graph Generation from Rich 2D Visual Scene}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24539-24549} }
Knowledge Bridger: Towards Training-Free Missing Modality Completion-
[pdf]
[supp]
[bibtex]@InProceedings{Ke_2025_CVPR, author = {Ke, Guanzhou and He, Shengfeng and Wang, Xiaoli and Wang, Bo and Chao, Guoqing and Zhang, Yuanyang and Xie, Yi and Su, Hexing}, title = {Knowledge Bridger: Towards Training-Free Missing Modality Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25864-25873} }
EmoDubber: Towards High Quality and Emotion Controllable Movie Dubbing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cong_2025_CVPR, author = {Cong, Gaoxiang and Pan, Jiadong and Li, Liang and Qi, Yuankai and Peng, Yuxin and van den Hengel, Anton and Yang, Jian and Huang, Qingming}, title = {EmoDubber: Towards High Quality and Emotion Controllable Movie Dubbing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15863-15873} }
DepthCrafter: Generating Consistent Long Depth Sequences for Open-world Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Wenbo and Gao, Xiangjun and Li, Xiaoyu and Zhao, Sijie and Cun, Xiaodong and Zhang, Yong and Quan, Long and Shan, Ying}, title = {DepthCrafter: Generating Consistent Long Depth Sequences for Open-world Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2005-2015} }
TexGarment: Consistent Garment UV Texture Generation via Efficient 3D Structure-Guided Diffusion Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jialun and Wu, Jinbo and Gao, Xiaobo and Hu, Jiakui and Xiong, Bojun and Liu, Xing and Zhao, Chen and Pei, Hongbin and Feng, Haocheng and Li, Yingying and Ding, Errui and Wang, Jingdong}, title = {TexGarment: Consistent Garment UV Texture Generation via Efficient 3D Structure-Guided Diffusion Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26566-26575} }
A Hubness Perspective on Representation Learning for Graph-Based Multi-View Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Zheming and Liu, He and Lang, Congyan and Wang, Tao and Li, Yidong and Kampffmeyer, Michael C.}, title = {A Hubness Perspective on Representation Learning for Graph-Based Multi-View Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15528-15537} }
Spatial-Temporal Graph Diffusion Policy with Kinematic Modeling for Bimanual Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2025_CVPR, author = {Lv, Qi and Li, Hao and Deng, Xiang and Shao, Rui and Li, Yinchuan and Hao, Jianye and Gao, Longxiang and Wang, Michael Yu and Nie, Liqiang}, title = {Spatial-Temporal Graph Diffusion Policy with Kinematic Modeling for Bimanual Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17394-17404} }
Semi-Supervised State-Space Model with Dynamic Stacking Filter for Real-World Video Deraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Shangquan and Ren, Wenqi and Zhou, Juxiang and Wang, Shu and Gan, Jianhou and Cao, Xiaochun}, title = {Semi-Supervised State-Space Model with Dynamic Stacking Filter for Real-World Video Deraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26114-26124} }
Rethinking Vision-Language Model in Face Forensics: Multi-Modal Interpretable Forged Face Detector-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Xiao and Song, Xiufeng and Zhang, Yue and Liu, Xiaohong and Liu, Xiaoming}, title = {Rethinking Vision-Language Model in Face Forensics: Multi-Modal Interpretable Forged Face Detector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {105-116} }
TIDE: Training Locally Interpretable Domain Generalization Models Enables Test-time Correction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Agarwal_2025_CVPR, author = {Agarwal, Aishwarya and Karanam, Srikrishna and Gandhi, Vineet}, title = {TIDE: Training Locally Interpretable Domain Generalization Models Enables Test-time Correction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30210-30220} }
VSNet: Focusing on the Linguistic Characteristics of Sign Language-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yuhao and Chen, Xinyue and Li, Hongkai and Pu, Xiaorong and Jin, Peng and Ren, Yazhou}, title = {VSNet: Focusing on the Linguistic Characteristics of Sign Language}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24320-24330} }
Active Hyperspectral Imaging Using an Event Camera-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Bohan and Liang, Jinxiu and Wang, Zhuofeng and Fan, Bin and Subpa-asa, Art and Shi, Boxin and Sato, Imari}, title = {Active Hyperspectral Imaging Using an Event Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {929-939} }
Bridging the Gap between Gaussian Diffusion Models and Universal Quantization for Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Relic_2025_CVPR, author = {Relic, Lucas and Azevedo, Roberto and Zhang, Yang and Gross, Markus and Schroers, Christopher}, title = {Bridging the Gap between Gaussian Diffusion Models and Universal Quantization for Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2449-2458} }
ZeroVO: Visual Odometry with Minimal Assumptions-
[pdf]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Lei and Yin, Zekai and Ohn-Bar, Eshed}, title = {ZeroVO: Visual Odometry with Minimal Assumptions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17092-17102} }
VideoRefer Suite: Advancing Spatial-Temporal Object Understanding with Video LLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Yuqian and Zhang, Hang and Li, Wentong and Cheng, Zesen and Zhang, Boqiang and Li, Long and Li, Xin and Zhao, Deli and Zhang, Wenqiao and Zhuang, Yueting and Zhu, Jianke and Bing, Lidong}, title = {VideoRefer Suite: Advancing Spatial-Temporal Object Understanding with Video LLM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18970-18980} }
Learning to Sample Effective and Diverse Prompts for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yun_2025_CVPR, author = {Yun, Taeyoung and Zhang, Dinghuai and Park, Jinkyoo and Pan, Ling}, title = {Learning to Sample Effective and Diverse Prompts for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23625-23635} }
Multi-modal Medical Diagnosis via Large-small Model Collaboration-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Wanyi and Zhao, Zihua and Yao, Jiangchao and Zhang, Ya and Bu, Jiajun and Wang, Haishuai}, title = {Multi-modal Medical Diagnosis via Large-small Model Collaboration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30763-30773} }
SAMBLE: Shape-Specific Point Cloud Sampling for an Optimal Trade-Off Between Local Detail and Global Uniformity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Chengzhi and Wan, Yuxin and Fu, Hao and Pfrommer, Julius and Zhong, Zeyun and Zheng, Junwei and Zhang, Jiaming and Beyerer, J\"urgen}, title = {SAMBLE: Shape-Specific Point Cloud Sampling for an Optimal Trade-Off Between Local Detail and Global Uniformity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1342-1352} }
Image Referenced Sketch Colorization Based on Animation Creation Workflow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Dingkun and Wang, Xinrui and Li, Zhuoru and Saito, Suguru and Iwasawa, Yusuke and Matsuo, Yutaka and Guo, Jiaxian}, title = {Image Referenced Sketch Colorization Based on Animation Creation Workflow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23391-23400} }
HoVLE: Unleashing the Power of Monolithic Vision-Language Models with Holistic Vision-Language Embedding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2025_CVPR, author = {Tao, Chenxin and Su, Shiqian and Zhu, Xizhou and Zhang, Chenyu and Chen, Zhe and Liu, Jiawen and Wang, Wenhai and Lu, Lewei and Huang, Gao and Qiao, Yu and Dai, Jifeng}, title = {HoVLE: Unleashing the Power of Monolithic Vision-Language Models with Holistic Vision-Language Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14559-14569} }
Gen3DEval: Using vLLMs for Automatic Evaluation of Generated 3D Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maiti_2025_CVPR, author = {Maiti, Shalini and Agapito, Lourdes and Kokkinos, Filippos}, title = {Gen3DEval: Using vLLMs for Automatic Evaluation of Generated 3D Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18552-18562} }
SkySense-O: Towards Open-World Remote Sensing Interpretation with Vision-Centric Visual-Language Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Qi and Lao, Jiangwei and Ji, Deyi and Luo, Junwei and Wu, Kang and Zhang, Yingying and Ru, Lixiang and Wang, Jian and Chen, Jingdong and Yang, Ming and Liu, Dong and Zhao, Feng}, title = {SkySense-O: Towards Open-World Remote Sensing Interpretation with Vision-Centric Visual-Language Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14733-14744} }
AdaDARE-gamma: Balancing Stability and Plasticity in Multi-modal LLMs through Efficient Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Jingyi and Yang, Jintao and Luo, Zhunchen and Cao, Yunbo and Gao, Qiang and Zhang, Mengyuan and Hu, Wenpeng}, title = {AdaDARE-gamma: Balancing Stability and Plasticity in Multi-modal LLMs through Efficient Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19758-19768} }
Driving by the Rules: A Benchmark for Integrating Traffic Sign Regulations into Vectorized HD Map-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2025_CVPR, author = {Chang, Xinyuan and Xue, Maixuan and Liu, Xinran and Pan, Zheng and Wei, Xing}, title = {Driving by the Rules: A Benchmark for Integrating Traffic Sign Regulations into Vectorized HD Map}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6823-6833} }
LeviTor: 3D Trajectory Oriented Image-to-Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Hanlin and Ouyang, Hao and Wang, Qiuyu and Wang, Wen and Cheng, Ka Leong and Chen, Qifeng and Shen, Yujun and Wang, Limin}, title = {LeviTor: 3D Trajectory Oriented Image-to-Video Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12490-12500} }
GaPT-DAR: Category-level Garments Pose Tracking via Integrated 2D Deformation and 3D Reconstruction-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Li and Xu, Mingliang and Wang, Jianan and Yu, Qiaojun and Yang, Lixin and Li, Yonglu and Lu, Cewu and Wang, Rujing and Liu, Liu}, title = {GaPT-DAR: Category-level Garments Pose Tracking via Integrated 2D Deformation and 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22638-22647} }
ProbPose: A Probabilistic Approach to 2D Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Purkrabek_2025_CVPR, author = {Purkrabek, Miroslav and Matas, Jiri}, title = {ProbPose: A Probabilistic Approach to 2D Human Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27124-27133} }
SapiensID: Foundation for Human Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Minchul and Ye, Dingqiang and Su, Yiyang and Liu, Feng and Liu, Xiaoming}, title = {SapiensID: Foundation for Human Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13937-13947} }
MIDI: Multi-Instance Diffusion for Single Image to 3D Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zehuan and Guo, Yuan-Chen and An, Xingqiao and Yang, Yunhan and Li, Yangguang and Zou, Zi-Xin and Liang, Ding and Liu, Xihui and Cao, Yan-Pei and Sheng, Lu}, title = {MIDI: Multi-Instance Diffusion for Single Image to 3D Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23646-23657} }
S4-Driver: Scalable Self-Supervised Driving Multimodal Large Language Model with Spatio-Temporal Visual Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Yichen and Xu, Runsheng and He, Tong and Hwang, Jyh-Jing and Luo, Katie and Ji, Jingwei and Lin, Hubert and Chen, Letian and Lu, Yiren and Leng, Zhaoqi and Anguelov, Dragomir and Tan, Mingxing}, title = {S4-Driver: Scalable Self-Supervised Driving Multimodal Large Language Model with Spatio-Temporal Visual Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1622-1632} }
Extrapolating and Decoupling Image-to-Video Generation Models: Motion Modeling is Easier Than You Think-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Jie and Qu, Xiaoye and Lu, Zhenyi and Wei, Wei and Liu, Sichen and Cheng, Yu}, title = {Extrapolating and Decoupling Image-to-Video Generation Models: Motion Modeling is Easier Than You Think}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12512-12521} }
FreeCloth: Free-form Generation Enhances Challenging Clothed Human Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Hang and Ma, Xiaoxuan and Ci, Hai and Zhu, Wentao and Wang, Yizhou}, title = {FreeCloth: Free-form Generation Enhances Challenging Clothed Human Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15987-15997} }
ABC-Former: Auxiliary Bimodal Cross-domain Transformer with Interactive Channel Attention for White Balance-
[pdf]
[supp]
[bibtex]@InProceedings{Chiu_2025_CVPR, author = {Chiu, Yu-Cheng and Chen, Guan-Rong and Chen, Zihao and Peng, Yan-Tsung}, title = {ABC-Former: Auxiliary Bimodal Cross-domain Transformer with Interactive Channel Attention for White Balance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21258-21266} }
Science-T2I: Addressing Scientific Illusions in Image Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jialuo and Chai, Wenhao and Fu, Xingyu and Xu, Haiyang and Xie, Saining}, title = {Science-T2I: Addressing Scientific Illusions in Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2734-2744} }
Fingerprinting Denoising Diffusion Probabilistic Models-
[pdf]
[supp]
[bibtex]@InProceedings{Teng_2025_CVPR, author = {Teng, Huan and Quan, Yuhui and Wang, Chengyu and Huang, Jun and Ji, Hui}, title = {Fingerprinting Denoising Diffusion Probabilistic Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28811-28820} }
MoST: Efficient Monarch Sparse Tuning for 3D Representation Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Xu and Tang, Yuan and Xu, Jinfeng and Li, Xianzhi}, title = {MoST: Efficient Monarch Sparse Tuning for 3D Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6584-6594} }
Re-thinking Temporal Search for Long-Form Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Jinhui and Wang, Zihan and Sun, Haosen and Chandrasegaran, Keshigeyan and Durante, Zane and Eyzaguirre, Cristobal and Bisk, Yonatan and Niebles, Juan Carlos and Adeli, Ehsan and Fei-Fei, Li and Wu, Jiajun and Li, Manling}, title = {Re-thinking Temporal Search for Long-Form Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8579-8591} }
InstanceGaussian: Appearance-Semantic Joint Gaussian Representation for 3D Instance-Level Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Haijie and Wu, Yanmin and Meng, Jiarui and Gao, Qiankun and Zhang, Zhiyao and Wang, Ronggang and Zhang, Jian}, title = {InstanceGaussian: Appearance-Semantic Joint Gaussian Representation for 3D Instance-Level Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14078-14088} }
When Domain Generalization meets Generalized Category Discovery: An Adaptive Task-Arithmetic Driven Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rathore_2025_CVPR, author = {Rathore, Vaibhav and B, Shubhranil and Dutta, Saikat and Mehrotra, Sarthak and Kira, Zsolt and Banerjee, Biplab}, title = {When Domain Generalization meets Generalized Category Discovery: An Adaptive Task-Arithmetic Driven Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4905-4915} }
CSC-PA: Cross-image Semantic Correlation via Prototype Attentions for Single-network Semi-supervised Breast Tumor Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2025_CVPR, author = {Ding, Zhenhui and Chen, Guilian and Zhang, Qin and Wu, Huisi and Qin, Jing}, title = {CSC-PA: Cross-image Semantic Correlation via Prototype Attentions for Single-network Semi-supervised Breast Tumor Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15632-15641} }
BIP3D: Bridging 2D Images and 3D Perception for Embodied Intelligence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Xuewu and Lin, Tianwei and Huang, Lichao and Xie, Hongyu and Su, Zhizhong}, title = {BIP3D: Bridging 2D Images and 3D Perception for Embodied Intelligence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9007-9016} }
Query Efficient Black-Box Visual Prompting with Subspace Learning-
[pdf]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhaogeng and Zhang, Haozhen and Zhang, Hualin and Li, Xingchen and Shi, Wanli and Gu, Bin and Chang, Yi}, title = {Query Efficient Black-Box Visual Prompting with Subspace Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4322-4331} }
VisionPAD: A Vision-Centric Pre-training Paradigm for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Haiming and Zhou, Wending and Zhu, Yiyao and Yan, Xu and Gao, Jiantao and Bai, Dongfeng and Cai, Yingjie and Liu, Bingbing and Cui, Shuguang and Li, Zhen}, title = {VisionPAD: A Vision-Centric Pre-training Paradigm for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17165-17175} }
Detecting Adversarial Data Using Perturbation Forgery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Qian and Li, Chen and Luo, Yuchen and Ling, Hefei and Huang, Shijuan and Jia, Ruoxi and Yu, Ning}, title = {Detecting Adversarial Data Using Perturbation Forgery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13917-13926} }
CoA: Towards Real Image Dehazing via Compression-and-Adaptation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Long and Feng, Yuxin and Zhang, Yan and Liu, Jinyuan and Wang, Weimin and Chen, Guang-Yong and Xu, Chengpei and Su, Zhuo}, title = {CoA: Towards Real Image Dehazing via Compression-and-Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11197-11206} }
NightAdapter: Learning a Frequency Adapter for Generalizable Night-time Scene Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Bi_2025_CVPR, author = {Bi, Qi and Yi, Jingjun and Huang, Huimin and Zheng, Hao and Zhan, Haolan and Huang, Yawen and Li, Yuexiang and Wu, Xian and Zheng, Yefeng}, title = {NightAdapter: Learning a Frequency Adapter for Generalizable Night-time Scene Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23838-23849} }
UMFN: Unified Multi-Domain Face Normalization for Joint Cross-domain Prototype Learning and Heterogeneous Face Recognition-
[pdf]
[bibtex]@InProceedings{Pang_2025_CVPR, author = {Pang, Meng and Zhang, Wenjun and Zhou, Nanrun and Chen, Shengbo and Rao, Hong}, title = {UMFN: Unified Multi-Domain Face Normalization for Joint Cross-domain Prototype Learning and Heterogeneous Face Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29299-29308} }
TopV: Compatible Token Pruning with Inference Time Optimization for Fast and Low-Memory Multimodal Vision Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Cheng and Sui, Yang and Xiao, Jinqi and Huang, Lingyi and Gong, Yu and Li, Chendi and Yan, Jinghua and Bai, Yu and Sadayappan, Ponnuswamy and Hu, Xia and Yuan, Bo}, title = {TopV: Compatible Token Pruning with Inference Time Optimization for Fast and Low-Memory Multimodal Vision Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19803-19813} }
Improving Autoregressive Visual Generation with Cluster-Oriented Token Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Teng and Zhang, Jiangning and Yi, Ran and Weng, Jieyu and Wang, Yabiao and Zeng, Xianfang and Xue, Zhucun and Ma, Lizhuang}, title = {Improving Autoregressive Visual Generation with Cluster-Oriented Token Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9351-9360} }
Learned Binocular-Encoding Optics for RGBD Imaging Using Joint Stereo and Focus Cues-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuhui and Ou, Liangxun and Fu, Qiang and Amata, Hadi and Heidrich, Wolfgang and Peng, Yifan}, title = {Learned Binocular-Encoding Optics for RGBD Imaging Using Joint Stereo and Focus Cues}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15833-15842} }
Dual-view X-ray Detection: Can AI Detect Prohibited Items from Dual-view X-ray Images like Humans?-
[pdf]
[bibtex]@InProceedings{Tao_2025_CVPR, author = {Tao, Renshuai and Wang, Haoyu and Guo, Yuzhe and Chen, Hairong and Zhang, Li and Liu, Xianglong and Wei, Yunchao and Zhao, Yao}, title = {Dual-view X-ray Detection: Can AI Detect Prohibited Items from Dual-view X-ray Images like Humans?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10338-10347} }
LUCAS: Layered Universal Codec Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Di and Deng, Teng and Nam, Giljoo and Rong, Yu and Pidhorskyi, Stanislav and Li, Junxuan and Saragih, Jason and Metaxas, Dimitris N. and Cao, Chen}, title = {LUCAS: Layered Universal Codec Avatars}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21127-21137} }
MobilePortrait: Real-Time One-Shot Neural Head Avatars on Mobile Devices-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Jianwen and Lin, Gaojie and Rong, Zhengkun and Liang, Chao and Zhu, Yongming and Yang, Jiaqi and Zhong, Tianyun}, title = {MobilePortrait: Real-Time One-Shot Neural Head Avatars on Mobile Devices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15920-15929} }
D^3: Scaling Up Deepfake Detection by Learning from Discrepancy-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Yongqi and Qian, Zhihao and Zhu, Ye and Russakovsky, Olga and Wu, Yu}, title = {D{\textasciicircum}3: Scaling Up Deepfake Detection by Learning from Discrepancy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23850-23859} }
Jailbreaking the Non-Transferable Barrier via Test-Time Data Disguising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2025_CVPR, author = {Xiang, Yongli and Hong, Ziming and Yao, Lina and Wang, Dadong and Liu, Tongliang}, title = {Jailbreaking the Non-Transferable Barrier via Test-Time Data Disguising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30671-30681} }
Light3R-SfM: Towards Feed-forward Structure-from-Motion-
[pdf]
[supp]
[bibtex]@InProceedings{Elflein_2025_CVPR, author = {Elflein, Sven and Zhou, Qunjie and Leal-Taix\'e, Laura}, title = {Light3R-SfM: Towards Feed-forward Structure-from-Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16774-16784} }
Robotic Visual Instruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yanbang and Gong, Ziyang and Li, Haoyang and Huang, Xiaoqi and Kang, Haolan and Bai, Guangping and Ma, Xianzheng}, title = {Robotic Visual Instruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12155-12165} }
Solving Instance Detection from an Open-World Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Qianqian and Zhao, Yunhan and Kwon, Nahyun and Kim, Jeeeun and Li, Yanan and Kong, Shu}, title = {Solving Instance Detection from an Open-World Perspective}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9901-9910} }
Percept, Memory, and Imagine: World Feature Simulating for Open-Domain Unknown Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Aming and Deng, Cheng}, title = {Percept, Memory, and Imagine: World Feature Simulating for Open-Domain Unknown Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4682-4691} }
Efficient Depth Estimation for Unstable Stereo Camera Systems on AR Glasses-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yongfan and Kwon, Hyoukjun}, title = {Efficient Depth Estimation for Unstable Stereo Camera Systems on AR Glasses}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6252-6261} }
3D-GRAND: A Million-Scale Dataset for 3D-LLMs with Better Grounding and Less Hallucination-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jianing and Chen, Xuweiyi and Madaan, Nikhil and Iyengar, Madhavan and Qian, Shengyi and Fouhey, David F. and Chai, Joyce}, title = {3D-GRAND: A Million-Scale Dataset for 3D-LLMs with Better Grounding and Less Hallucination}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29501-29512} }
LiDAR-RT: Gaussian-based Ray Tracing for Dynamic LiDAR Re-simulation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Chenxu and Fu, Lvchang and Peng, Sida and Yan, Yunzhi and Zhang, Zhanhua and Chen, Yong and Xia, Jiazhi and Zhou, Xiaowei}, title = {LiDAR-RT: Gaussian-based Ray Tracing for Dynamic LiDAR Re-simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1538-1548} }
Generative Zero-Shot Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Lan and Ao, Wei and Boddeti, Vishnu Naresh and Lim, Ser-Nam}, title = {Generative Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29690-29700} }
Large-Scale Text-to-Image Model with Inpainting is a Zero-Shot Subject-Driven Image Generator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shin_2025_CVPR, author = {Shin, Chaehun and Choi, Jooyoung and Kim, Heeseung and Yoon, Sungroh}, title = {Large-Scale Text-to-Image Model with Inpainting is a Zero-Shot Subject-Driven Image Generator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7986-7996} }
MASt3R-SLAM: Real-Time Dense SLAM with 3D Reconstruction Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Murai_2025_CVPR, author = {Murai, Riku and Dexheimer, Eric and Davison, Andrew J.}, title = {MASt3R-SLAM: Real-Time Dense SLAM with 3D Reconstruction Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16695-16705} }
Flow-NeRF: Joint Learning of Geometry, Poses, and Dense Flow within Unified Neural Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Xunzhi and Xu, Dan}, title = {Flow-NeRF: Joint Learning of Geometry, Poses, and Dense Flow within Unified Neural Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {993-1002} }
Viewpoint Rosetta Stone: Unlocking Unpaired Ego-Exo Videos for View-invariant Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Mi and Xue, Zihui and Dimakis, Alex and Grauman, Kristen}, title = {Viewpoint Rosetta Stone: Unlocking Unpaired Ego-Exo Videos for View-invariant Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15802-15812} }
Cross-modal Information Flow in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhi and Yadav, Srishti and Han, Fengze and Shutova, Ekaterina}, title = {Cross-modal Information Flow in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19781-19791} }
Consistent and Controllable Image Animation with Motion Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Xin and Wang, Yaohui and Jia, Gengyun and Chen, Xinyuan and Wong, Tien-Tsin and Li, Yuan-Fang and Chen, Cunjian}, title = {Consistent and Controllable Image Animation with Motion Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7288-7298} }
Towards Better Alignment: Training Diffusion Models with Reinforcement Learning Against Sparse Rewards-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Zijing and Zhang, Fengda and Chen, Long and Kuang, Kun and Li, Jiahui and Gao, Kaifeng and Xiao, Jun and Wang, Xin and Zhu, Wenwu}, title = {Towards Better Alignment: Training Diffusion Models with Reinforcement Learning Against Sparse Rewards}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23604-23614} }
Spatial457: A Diagnostic Benchmark for 6D Spatial Reasoning of Large Mutimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xingrui and Ma, Wufei and Zhang, Tiezheng and de Melo, Celso M and Chen, Jieneng and Yuille, Alan}, title = {Spatial457: A Diagnostic Benchmark for 6D Spatial Reasoning of Large Mutimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24669-24679} }
Omnidirectional Multi-Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Kai and Shi, Hao and Wu, Sheng and Teng, Fei and Duan, Mengfei and Huang, Chang and Wang, Yuhang and Wang, Kaiwei and Yang, Kailun}, title = {Omnidirectional Multi-Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21959-21969} }
Potential Field Based Deep Metric Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bhatnagar_2025_CVPR, author = {Bhatnagar, Shubhang and Ahuja, Narendra}, title = {Potential Field Based Deep Metric Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25549-25559} }
Enhancing Vision-Language Compositional Understanding with Multimodal Synthetic Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Haoxin and Li, Boyang}, title = {Enhancing Vision-Language Compositional Understanding with Multimodal Synthetic Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24849-24861} }
Directional Label Diffusion Model for Learning from Noisy Labels-
[pdf]
[supp]
[bibtex]@InProceedings{Hou_2025_CVPR, author = {Hou, Senyu and Jiang, Gaoxia and Zhang, Jia and Yang, Shangrong and Guo, Husheng and Guo, Yaqing and Wang, Wenjian}, title = {Directional Label Diffusion Model for Learning from Noisy Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25738-25748} }
AA-CLIP: Enhancing Zero-Shot Anomaly Detection via Anomaly-Aware CLIP-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Wenxin and Zhang, Xu and Yao, Qingsong and Tang, Fenghe and Wu, Chenxu and Li, Yingtai and Yan, Rui and Jiang, Zihang and Zhou, S.Kevin}, title = {AA-CLIP: Enhancing Zero-Shot Anomaly Detection via Anomaly-Aware CLIP}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4744-4754} }
HybridGS: Decoupling Transients and Statics with 2D and 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Jingyu and Gu, Jiaqi and Fan, Lubin and Wu, Bojian and Lou, Yujing and Chen, Renjie and Liu, Ligang and Ye, Jieping}, title = {HybridGS: Decoupling Transients and Statics with 2D and 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {788-797} }
Keyframe-Guided Creative Video Inpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Yuwei and Yang, Ceyuan and Rao, Anyi and Meng, Chenlin and Bar-Tal, Omer and Ding, Shuangrui and Agrawala, Maneesh and Lin, Dahua and Dai, Bo}, title = {Keyframe-Guided Creative Video Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13009-13020} }
Channel Consistency Prior and Self-Reconstruction Strategy Based Unsupervised Image Deraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Guanglu and Zheng, Tianheng and Cao, Yuanzhouhan and Qing, Linbo and Ren, Chao}, title = {Channel Consistency Prior and Self-Reconstruction Strategy Based Unsupervised Image Deraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7469-7479} }
MobileMamba: Lightweight Multi-Receptive Visual Mamba Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Haoyang and Zhang, Jiangning and Cai, Yuxuan and Chen, Hongxu and Hu, Xiaobin and Gan, Zhenye and Wang, Yabiao and Wang, Chengjie and Wu, Yunsheng and Xie, Lei}, title = {MobileMamba: Lightweight Multi-Receptive Visual Mamba Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4497-4507} }
EdgeTAM: On-Device Track Anything Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Chong and Zhu, Chenchen and Xiong, Yunyang and Suri, Saksham and Xiao, Fanyi and Wu, Lemeng and Krishnamoorthi, Raghuraman and Dai, Bo and Loy, Chen Change and Chandra, Vikas and Soran, Bilge}, title = {EdgeTAM: On-Device Track Anything Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13832-13842} }
SimLTD: Simple Supervised and Semi-Supervised Long-Tailed Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tran_2025_CVPR, author = {Tran, Phi Vu}, title = {SimLTD: Simple Supervised and Semi-Supervised Long-Tailed Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4672-4681} }
EarthDial: Turning Multi-sensory Earth Observations to Interactive Dialogues-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Soni_2025_CVPR, author = {Soni, Sagar and Dudhane, Akshay and Debary, Hiyam and Fiaz, Mustansar and Munir, Muhammad Akhtar and Danish, Muhammad Sohail and Fraccaro, Paolo and Watson, Campbell D and Klein, Levente J and Khan, Fahad Shahbaz and Khan, Salman}, title = {EarthDial: Turning Multi-sensory Earth Observations to Interactive Dialogues}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14303-14313} }
Learning Endogenous Attention for Incremental Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Xiang and He, Yuhang and Li, Jingyuan and Wang, Qiang and Gong, Yihong}, title = {Learning Endogenous Attention for Incremental Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30354-30364} }
StarGen: A Spatiotemporal Autoregression Framework with Video Diffusion Model for Scalable and Controllable Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhai_2025_CVPR, author = {Zhai, Shangjin and Ye, Zhichao and Liu, Jialin and Xie, Weijian and Hu, Jiaqi and Peng, Zhen and Xue, Hua and Chen, Danpeng and Wang, Xiaomeng and Yang, Lei and Wang, Nan and Liu, Haomin and Zhang, Guofeng}, title = {StarGen: A Spatiotemporal Autoregression Framework with Video Diffusion Model for Scalable and Controllable Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26822-26833} }
HyperSeg: Hybrid Segmentation Assistant with Fine-grained Visual Perceiver-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Cong and Zhong, Yujie and Tan, Haoxian and Liu, Yong and Hu, Jie and Li, Dengjie and Zhao, Zheng and Yang, Yujiu}, title = {HyperSeg: Hybrid Segmentation Assistant with Fine-grained Visual Perceiver}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8931-8941} }
Diffusion-based Event Generation for High-Quality Image Deblurring-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Xinan and Zhang, Qing and Zheng, Wei-Shi}, title = {Diffusion-based Event Generation for High-Quality Image Deblurring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2194-2203} }
Video Summarization with Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Min Jung and Gong, Dayoung and Cho, Minsu}, title = {Video Summarization with Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18981-18991} }
Sketchtopia: A Dataset and Foundational Agents for Benchmarking Asynchronous Multimodal Communication with Iconic Feedback-
[pdf]
[supp]
[bibtex]@InProceedings{Khan_2025_CVPR, author = {Khan, Mohd Hozaifa and Sarvadevabhatla, Ravi Kiran}, title = {Sketchtopia: A Dataset and Foundational Agents for Benchmarking Asynchronous Multimodal Communication with Iconic Feedback}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18176-18186} }
Consistency-aware Self-Training for Iterative-based Stereo Matching-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jingyi and Ye, Peng and Zhang, Haoyu and Yuan, Jiakang and Qiang, Rao and YangChenXu, Liu and Cailin, Wu and Xu, Feng and Chen, Tao}, title = {Consistency-aware Self-Training for Iterative-based Stereo Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16641-16650} }
MV-MATH: Evaluating Multimodal Math Reasoning in Multi-Visual Contexts-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Peijie and Li, Zhong-Zhi and Yin, Fei and Ran, Dekang and Liu, Cheng-Lin}, title = {MV-MATH: Evaluating Multimodal Math Reasoning in Multi-Visual Contexts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19541-19551} }
Balanced Rate-Distortion Optimization in Learned Image Compression-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yichi and Duan, Zhihao and Huang, Yuning and Zhu, Fengqing}, title = {Balanced Rate-Distortion Optimization in Learned Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2428-2438} }
Bridge the Gap: From Weak to Full Supervision for Temporal Action Localization with PseudoFormer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Ziyi and Liu, Yangcen}, title = {Bridge the Gap: From Weak to Full Supervision for Temporal Action Localization with PseudoFormer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8711-8720} }
HomoGen: Enhanced Video Inpainting via Homography Propagation and Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2025_CVPR, author = {Ding, Ding and Pan, Yueming and Feng, Ruoyu and Dai, Qi and Qiu, Kai and Bao, Jianmin and Luo, Chong and Chen, Zhenzhong}, title = {HomoGen: Enhanced Video Inpainting via Homography Propagation and Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22953-22962} }
Generalized Few-shot 3D Point Cloud Segmentation with Vision-Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2025_CVPR, author = {An, Zhaochong and Sun, Guolei and Liu, Yun and Li, Runjia and Han, Junlin and Konukoglu, Ender and Belongie, Serge}, title = {Generalized Few-shot 3D Point Cloud Segmentation with Vision-Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16997-17007} }
Do ImageNet-trained Models Learn Shortcuts? The Impact of Frequency Shortcuts on Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shunxin and Veldhuis, Raymond and Strisciuglio, Nicola}, title = {Do ImageNet-trained Models Learn Shortcuts? The Impact of Frequency Shortcuts on Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25198-25207} }
HORP: Human-Object Relation Priors Guided HOI Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Geng_2025_CVPR, author = {Geng, Pei and Yang, Jian and Zhang, Shanshan}, title = {HORP: Human-Object Relation Priors Guided HOI Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25325-25335} }
Building a Mind Palace: Structuring Environment-Grounded Semantic Graphs for Effective Long Video Analysis with LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zeyi and Ji, Yuyang and Wang, Xiaofang and Mehta, Nikhil and Xiao, Tong and Lee, Donghyun and Vanvalkenburgh, Sigmund and Zha, Shengxin and Lai, Bolin and Yu, Licheng and Zhang, Ning and Lee, Yong Jae and Liu, Miao}, title = {Building a Mind Palace: Structuring Environment-Grounded Semantic Graphs for Effective Long Video Analysis with LLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24169-24179} }
Back