Papers
- Back
Deterministic Image-to-Image Translation via Denoising Brownian Bridge Models with Dual Approximators-
[pdf]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Bohan and Wang, Peiyong and He, Qisheng and Dong, Ming}, title = {Deterministic Image-to-Image Translation via Denoising Brownian Bridge Models with Dual Approximators}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28232-28241} }
Task Preference Optimization: Improving Multimodal Large Language Models with Vision Task Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Ziang and Li, Zhilin and He, Yinan and Wang, Chenting and Li, Kunchang and Li, Xinhao and Zeng, Xiangyu and Wang, Zilei and Wang, Yali and Qiao, Yu and Wang, Limin and Wang, Yi}, title = {Task Preference Optimization: Improving Multimodal Large Language Models with Vision Task Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29880-29892} }
Cross-modal Causal Relation Alignment for Video Question Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Weixing and Liu, Yang and Chen, Binglin and Su, Jiandong and Zheng, Yongsen and Lin, Liang}, title = {Cross-modal Causal Relation Alignment for Video Question Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24087-24096} }
Diffusion Renderer: Neural Inverse and Forward Rendering with Video Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Ruofan and Gojcic, Zan and Ling, Huan and Munkberg, Jacob and Hasselgren, Jon and Lin, Chih-Hao and Gao, Jun and Keller, Alexander and Vijaykumar, Nandita and Fidler, Sanja and Wang, Zian}, title = {Diffusion Renderer: Neural Inverse and Forward Rendering with Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26069-26080} }
Omni-Scene: Omni-Gaussian Representation for Ego-Centric Sparse-View Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Dongxu and Li, Zhiqi and Liu, Peidong}, title = {Omni-Scene: Omni-Gaussian Representation for Ego-Centric Sparse-View Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22317-22327} }
3DTopia-XL: Scaling High-quality 3D Asset Generation via Primitive Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zhaoxi and Tang, Jiaxiang and Dong, Yuhao and Cao, Ziang and Hong, Fangzhou and Lan, Yushi and Wang, Tengfei and Xie, Haozhe and Wu, Tong and Saito, Shunsuke and Pan, Liang and Lin, Dahua and Liu, Ziwei}, title = {3DTopia-XL: Scaling High-quality 3D Asset Generation via Primitive Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26576-26586} }
Missing Target-Relevant Information Prediction with World Model for Accurate Zero-Shot Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Yuanmin and Yu, Jing and Gai, Keke and Zhuang, Jiamin and Xiong, Gang and Gou, Gaopeng and Wu, Qi}, title = {Missing Target-Relevant Information Prediction with World Model for Accurate Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24785-24795} }
DiffSensei: Bridging Multi-Modal LLMs and Diffusion Models for Customized Manga Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Jianzong and Tang, Chao and Wang, Jingbo and Zeng, Yanhong and Li, Xiangtai and Tong, Yunhai}, title = {DiffSensei: Bridging Multi-Modal LLMs and Diffusion Models for Customized Manga Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28684-28693} }
Narrating the Video: Boosting Text-Video Retrieval via Comprehensive Utilization of Frame-Level Captions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hur_2025_CVPR, author = {Hur, Chan and Hong, Jeong-hun and Lee, Dong-hun and Kang, Dabin and Myeong, Semin and Park, Sang-hyo and Park, Hyeyoung}, title = {Narrating the Video: Boosting Text-Video Retrieval via Comprehensive Utilization of Frame-Level Captions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24077-24086} }
CARL: A Framework for Equivariant Image Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Greer_2025_CVPR, author = {Greer, Hastings and Tian, Lin and Vialard, Fran\c{c}ois-Xavier and Kwitt, Roland and Estepar, Raul San Jose and Niethammer, Marc}, title = {CARL: A Framework for Equivariant Image Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26014-26023} }
FlashGS: Efficient 3D Gaussian Splatting for Large-scale and High-resolution Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Guofeng and Chen, Siyan and Fu, Rong and Liao, Zimu and Wang, Yi and Liu, Tao and Hu, Boni and Xu, Linning and Pei, Zhilin and Li, Hengjie and Li, Xiuhong and Sun, Ninghui and Zhang, Xingcheng and Dai, Bo}, title = {FlashGS: Efficient 3D Gaussian Splatting for Large-scale and High-resolution Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26652-26662} }
Chat2SVG: Vector Graphics Generation with Large Language Models and Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Ronghuan and Su, Wanchao and Liao, Jing}, title = {Chat2SVG: Vector Graphics Generation with Large Language Models and Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23690-23700} }
Inference-Scale Complexity in ANN-SNN Conversion for High-Performance and Low-Power Applications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bu_2025_CVPR, author = {Bu, Tong and Li, Maohua and Yu, Zhaofei}, title = {Inference-Scale Complexity in ANN-SNN Conversion for High-Performance and Low-Power Applications}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24387-24397} }
MVDoppler-Pose: Multi-Modal Multi-View mmWave Sensing for Long-Distance Self-Occluded Human Walking Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2025_CVPR, author = {Choi, Jaeho and Hor, Soheil and Yang, Shubo and Arbabian, Amin}, title = {MVDoppler-Pose: Multi-Modal Multi-View mmWave Sensing for Long-Distance Self-Occluded Human Walking Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27750-27759} }
TopNet: Transformer-Efficient Occupancy Prediction Network for Octree-Structured Point Cloud Geometry Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xinjie and Zhang, Yifan and Liu, Ting and Liu, Xinpu and Xu, Ke and Wan, Jianwei and Guo, Yulan and Wang, Hanyun}, title = {TopNet: Transformer-Efficient Occupancy Prediction Network for Octree-Structured Point Cloud Geometry Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27305-27314} }
Gain from Neighbors: Boosting Model Robustness in the Wild via Adversarial Perturbations Toward Neighboring Classes-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Zhou and Feng, Mingtao and Huang, Tao and Wu, Fangfang and Dong, Weisheng and Li, Xin and Shi, Guangming}, title = {Gain from Neighbors: Boosting Model Robustness in the Wild via Adversarial Perturbations Toward Neighboring Classes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25497-25507} }
M^3-VOS: Multi-Phase, Multi-Transition, and Multi-Scenery Video Object Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zixuan and Li, Jiaxin and Liang, Junxuan and Tan, Liming and Guo, Yejie and Lu, Cewu and Li, Yong-Lu}, title = {M{\textasciicircum}3-VOS: Multi-Phase, Multi-Transition, and Multi-Scenery Video Object Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29193-29202} }
Everything to the Synthetic: Diffusion-driven Test-time Adaptation via Synthetic-Domain Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Jiayi and Zhao, Junhao and Du, Chaoqun and Wang, Yulin and Ge, Chunjiang and Ni, Zanlin and Song, Shiji and Shi, Humphrey and Huang, Gao}, title = {Everything to the Synthetic: Diffusion-driven Test-time Adaptation via Synthetic-Domain Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30503-30513} }
Multi-Granularity Class Prototype Topology Distillation for Class-Incremental Source-Free Unsupervised Domain Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Peihua and Zhang, Jiehua and Sheng, Xichun and Yan, Chenggang and Sun, Yaoqi and Fu, Ying and Li, Liang}, title = {Multi-Granularity Class Prototype Topology Distillation for Class-Incremental Source-Free Unsupervised Domain Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30566-30576} }
A Polarization-Aided Transformer for Image Deblurring via Motion Vector Decomposition-
[pdf]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Duosheng and Zhou, Shihao and Pan, Jinshan and Shi, Jinglei and Qu, Lishen and Yang, Jufeng}, title = {A Polarization-Aided Transformer for Image Deblurring via Motion Vector Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28061-28070} }
CocoER: Aligning Multi-Level Feature by Competition and Coordination for Emotion Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Xuli and Cai, Hua and Shen, Weilin and Xu, Qing and Yu, Dingding and Ge, Weifeng and Xue, Xiangyang}, title = {CocoER: Aligning Multi-Level Feature by Competition and Coordination for Emotion Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29591-29600} }
Enhancing Creative Generation on Stable Diffusion-based Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Jiyeon and Kwon, Dahee and Lee, Gayoung and Kim, Junho and Choi, Jaesik}, title = {Enhancing Creative Generation on Stable Diffusion-based Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28609-28618} }
Denoising Functional Maps: Diffusion Models for Shape Correspondence-
[pdf]
[supp]
[bibtex]@InProceedings{Zhuravlev_2025_CVPR, author = {Zhuravlev, Aleksei and L\"ahner, Zorah and Golyanik, Vladislav}, title = {Denoising Functional Maps: Diffusion Models for Shape Correspondence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26899-26909} }
ProReflow: Progressive Reflow with Decomposed Velocity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2025_CVPR, author = {Ke, Lei and Xu, Haohang and Ning, Xuefei and Li, Yu and Li, Jiajun and Li, Haoling and Lin, Yuxuan and Jiang, Dongsheng and Yang, Yujiu and Zhang, Linfeng}, title = {ProReflow: Progressive Reflow with Decomposed Velocity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28029-28038} }
Devil is in the Detail: Towards Injecting Fine Details of Image Prompt in Image Generation via Conflict-free Guidance and Stratified Attention-
[pdf]
[supp]
[bibtex]@InProceedings{Jo_2025_CVPR, author = {Jo, Kyungmin and Yun, Jooyeol and Choo, Jaegul}, title = {Devil is in the Detail: Towards Injecting Fine Details of Image Prompt in Image Generation via Conflict-free Guidance and Stratified Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23595-23603} }
MetaShadow: Object-Centered Shadow Detection, Removal, and Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Tianyu and Zhang, Jianming and Zheng, Haitian and Ding, Zhihong and Cohen, Scott and Lin, Zhe and Xiong, Wei and Fu, Chi-Wing and Figueroa, Luis and Kim, Soo Ye}, title = {MetaShadow: Object-Centered Shadow Detection, Removal, and Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28252-28262} }
TANGO: Training-free Embodied AI Agents for Open-world Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ziliotto_2025_CVPR, author = {Ziliotto, Filippo and Campari, Tommaso and Serafini, Luciano and Ballan, Lamberto}, title = {TANGO: Training-free Embodied AI Agents for Open-world Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24603-24613} }
Stealthy Backdoor Attack in Self-Supervised Learning Vision Encoders for Large Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhaoyi and Zhang, Huan}, title = {Stealthy Backdoor Attack in Self-Supervised Learning Vision Encoders for Large Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25060-25070} }
SAM2-LOVE: Segment Anything Model 2 in Language-aided Audio-Visual Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuji and Xu, Haoran and Liu, Yong and Li, Jiaze and Tang, Yansong}, title = {SAM2-LOVE: Segment Anything Model 2 in Language-aided Audio-Visual Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28932-28941} }
GIVEPose: Gradual Intra-class Variation Elimination for RGB-based Category-Level Object Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Ziqin and Wang, Gu and Zhang, Chenyangguang and Zhang, Ruida and Li, Xiu and Ji, Xiangyang}, title = {GIVEPose: Gradual Intra-class Variation Elimination for RGB-based Category-Level Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22055-22066} }
Sketch Down the FLOPs: Towards Efficient Networks for Human Sketch-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sain_2025_CVPR, author = {Sain, Aneeshan and Maity, Subhajit and Chowdhury, Pinaki Nath and Koley, Shubhadeep and Bhunia, Ayan Kumar and Song, Yi-Zhe}, title = {Sketch Down the FLOPs: Towards Efficient Networks for Human Sketch}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28383-28393} }
Rethinking Decoder Design: Improving Biomarker Segmentation Using Depth-to-Space Restoration and Residual Linear Attention-
[pdf]
[supp]
[bibtex]@InProceedings{Wazir_2025_CVPR, author = {Wazir, Saad and Kim, Daeyoung}, title = {Rethinking Decoder Design: Improving Biomarker Segmentation Using Depth-to-Space Restoration and Residual Linear Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30861-30871} }
SynTab-LLaVA: Enhancing Multimodal Table Understanding with Decoupled Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Bangbang and Gao, Zuan and Wang, Zixiao and Zhang, Boqiang and Wang, Yuxin and Chen, Zhineng and Xie, Hongtao}, title = {SynTab-LLaVA: Enhancing Multimodal Table Understanding with Decoupled Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24796-24806} }
Edit Away and My Face Will not Stay: Personal Biometric Defense against Malicious Generative Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Hanhui and Zhang, Yihua and Bai, Ruizheng and Zhao, Yue and Liu, Sijia and Tu, Zhengzhong}, title = {Edit Away and My Face Will not Stay: Personal Biometric Defense against Malicious Generative Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23806-23816} }
Improving Accuracy and Calibration via Differentiated Deep Mutual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Han and Cui, Peng and Wang, Bingning and Chen, Weipeng and Zhang, Yupeng and Zhu, Jun and Hu, Xiaolin}, title = {Improving Accuracy and Calibration via Differentiated Deep Mutual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25812-25821} }
Infighting in the Dark: Multi-Label Backdoor Attack in Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Ye and Zhao, Yanchao and Zhu, Chengcheng and Zhang, Jiale}, title = {Infighting in the Dark: Multi-Label Backdoor Attack in Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25770-25779} }
Tartan IMU: A Light Foundation Model for Inertial Positioning in Robotics-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shibo and Zhou, Sifan and Blanchard, Raphael and Qiu, Yuheng and Wang, Wenshan and Scherer, Sebastian}, title = {Tartan IMU: A Light Foundation Model for Inertial Positioning in Robotics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22520-22529} }
Event Ellipsometer: Event-based Mueller-Matrix Video Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maeda_2025_CVPR, author = {Maeda, Ryota and Moon, Yunseong and Baek, Seung-Hwan}, title = {Event Ellipsometer: Event-based Mueller-Matrix Video Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21804-21813} }
End-to-End HOI Reconstruction Transformer with Graph-based Encoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhenrong and Zheng, Qi and Ma, Sihan and Ye, Maosheng and Zhan, Yibing and Li, Dongjiang}, title = {End-to-End HOI Reconstruction Transformer with Graph-based Encoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27706-27715} }
Disco4D: Disentangled 4D Human Generation and Animation from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pang_2025_CVPR, author = {Pang, Hui En and Liu, Shuai and Cai, Zhongang and Yang, Lei and Zhang, Tianwei and Liu, Ziwei}, title = {Disco4D: Disentangled 4D Human Generation and Animation from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26331-26344} }
IDOL: Instant Photorealistic 3D Human Creation from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2025_CVPR, author = {Zhuang, Yiyu and Lv, Jiaxi and Wen, Hao and Shuai, Qing and Zeng, Ailing and Zhu, Hao and Chen, Shifeng and Yang, Yujiu and Cao, Xun and Liu, Wei}, title = {IDOL: Instant Photorealistic 3D Human Creation from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26308-26319} }
SketchVideo: Sketch-based Video Generation and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Feng-Lin and Fu, Hongbo and Wang, Xintao and Ye, Weicai and Wan, Pengfei and Zhang, Di and Gao, Lin}, title = {SketchVideo: Sketch-based Video Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23379-23390} }
Taste More, Taste Better: Diverse Data and Strong Model Boost Semi-Supervised Crowd Counting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Maochen and Li, Zekun and Zhang, Jian and Qi, Lei and Shi, Yinghuan}, title = {Taste More, Taste Better: Diverse Data and Strong Model Boost Semi-Supervised Crowd Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24440-24451} }
AnyDressing: Customizable Multi-Garment Virtual Dressing via Latent Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xinghui and Sun, Qichao and Zhang, Pengze and Ye, Fulong and Liao, Zhichao and Feng, Wanquan and Zhao, Songtao and He, Qian}, title = {AnyDressing: Customizable Multi-Garment Virtual Dressing via Latent Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23723-23733} }
Latent Space Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Souza_2025_CVPR, author = {Souza, Matheus and Zheng, Yidan and Kang, Kaizhang and Mishra, Yogeshwar Nath and Fu, Qiang and Heidrich, Wolfgang}, title = {Latent Space Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28295-28305} }
Balanced Direction from Multifarious Choices: Arithmetic Meta-Learning for Domain Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xiran and Zhang, Jian and Qi, Lei and Shi, Yinghuan}, title = {Balanced Direction from Multifarious Choices: Arithmetic Meta-Learning for Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30577-30587} }
Anatomical Consistency and Adaptive Prior-informed Transformation for Multi-contrast MR Image Synthesis via Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Shin_2025_CVPR, author = {Shin, Yejee and Lee, Yeeun and Jang, Hanbyol and Son, Geonhui and Kim, Hyeongyu and Hwang, Dosik}, title = {Anatomical Consistency and Adaptive Prior-informed Transformation for Multi-contrast MR Image Synthesis via Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30918-30927} }
SeCap: Self-Calibrating and Adaptive Prompts for Cross-view Person Re-Identification in Aerial-Ground Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shining and Wang, Yunlong and Wu, Ruiqi and Jiao, Bingliang and Wang, Wenxuan and Wang, Peng}, title = {SeCap: Self-Calibrating and Adaptive Prompts for Cross-view Person Re-Identification in Aerial-Ground Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22119-22128} }
Don't Shake the Wheel: Momentum-Aware Planning in End-to-End Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Ziying and Jia, Caiyan and Liu, Lin and Pan, Hongyu and Zhang, Yongchang and Wang, Junming and Zhang, Xingyu and Xu, Shaoqing and Yang, Lei and Luo, Yadan}, title = {Don't Shake the Wheel: Momentum-Aware Planning in End-to-End Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22432-22441} }
Neural Motion Simulator Pushing the Limit of World Models in Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hao_2025_CVPR, author = {Hao, Chenjie and Lu, Weyl and Xu, Yifan and Chen, Yubei}, title = {Neural Motion Simulator Pushing the Limit of World Models in Reinforcement Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27608-27617} }
Adversarial Diffusion Compression for Real-World Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Bin and Li, Gehui and Wu, Rongyuan and Zhang, Xindong and Chen, Jie and Zhang, Jian and Zhang, Lei}, title = {Adversarial Diffusion Compression for Real-World Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28208-28220} }
DiSciPLE: Learning Interpretable Programs for Scientific Visual Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mall_2025_CVPR, author = {Mall, Utkarsh and Phoo, Cheng Perng and Chiquier, Mia and Hariharan, Bharath and Bala, Kavita and Vondrick, Carl}, title = {DiSciPLE: Learning Interpretable Programs for Scientific Visual Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29258-29267} }
SOLAMI: Social Vision-Language-Action Modeling for Immersive Interaction with 3D Autonomous Characters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Jianping and Xiao, Weiye and Lin, Zhengyu and Zhang, Huaizhong and Ren, Tianxiang and Gao, Yang and Lin, Zhiqian and Cai, Zhongang and Yang, Lei and Liu, Ziwei}, title = {SOLAMI: Social Vision-Language-Action Modeling for Immersive Interaction with 3D Autonomous Characters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26887-26898} }
EntropyMark: Towards More Harmless Backdoor Watermark via Entropy-based Constraint for Open-source Dataset Copyright Protection-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Ming and Wang, Rui and Zhu, Zixuan and Jing, Lihua and Guo, Yuanfang}, title = {EntropyMark: Towards More Harmless Backdoor Watermark via Entropy-based Constraint for Open-source Dataset Copyright Protection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30692-30701} }
Adaptive Markup Language Generation for Contextually-Grounded Visual Document Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Han and Xie, Yina and Tan, Guanxin and Chen, Yinghao and Hu, Rui and Wang, Ke and Zhou, Aojun and Li, Hao and Shao, Hao and Lu, Xudong and Gao, Peng and Wen, Yafei and Chen, Xiaoxin and Ren, Shuai and Li, Hongsheng}, title = {Adaptive Markup Language Generation for Contextually-Grounded Visual Document Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29558-29568} }
Towards Universal AI-Generated Image Detection by Variational Information Bottleneck Network-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Haifeng and He, Qinghui and Bi, Xiuli and Li, Weisheng and Liu, Bo and Xiao, Bin}, title = {Towards Universal AI-Generated Image Detection by Variational Information Bottleneck Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23828-23837} }
HSI: A Holistic Style Injector for Arbitrary Style Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shuhao and Kang, Hui and Liu, Yang and Mei, Fang and Li, Hongjuan}, title = {HSI: A Holistic Style Injector for Arbitrary Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23433-23442} }
V2V3D: View-to-View Denoised 3D Reconstruction for Light Field Microscopy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jiayin and Fu, Zhenqi and Yu, Tao and Qiao, Hui}, title = {V2V3D: View-to-View Denoised 3D Reconstruction for Light Field Microscopy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26451-26461} }
Splatter-360: Generalizable 360 Gaussian Splatting for Wide-baseline Panoramic Images-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zheng and Wu, Chenming and Shen, Zhelun and Zhao, Chen and Ye, Weicai and Feng, Haocheng and Ding, Errui and Zhang, Song-Hai}, title = {Splatter-360: Generalizable 360 Gaussian Splatting for Wide-baseline Panoramic Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21590-21599} }
Towards Understanding How Knowledge Evolves in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Sudong and Zhang, Yunjian and Zhu, Yao and Li, Jianing and Wang, Zizhe and Liu, Yanwei and Ji, Xiangyang}, title = {Towards Understanding How Knowledge Evolves in Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29858-29868} }
A Unified, Resilient, and Explainable Adversarial Patch Detector-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2025_CVPR, author = {Kumar, Vishesh and Agarwal, Akshay}, title = {A Unified, Resilient, and Explainable Adversarial Patch Detector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30387-30397} }
Structured 3D Latents for Scalable and Versatile 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2025_CVPR, author = {Xiang, Jianfeng and Lv, Zelong and Xu, Sicheng and Deng, Yu and Wang, Ruicheng and Zhang, Bowen and Chen, Dong and Tong, Xin and Yang, Jiaolong}, title = {Structured 3D Latents for Scalable and Versatile 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21469-21480} }
Self-Cross Diffusion Guidance for Text-to-Image Synthesis of Similar Subjects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2025_CVPR, author = {Qiu, Weimin and Wang, Jieke and Tang, Meng}, title = {Self-Cross Diffusion Guidance for Text-to-Image Synthesis of Similar Subjects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23528-23538} }
Adv-CPG: A Customized Portrait Generation Framework with Facial Adversarial Attacks-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Junying and Zhang, Hongyuan and Yuan, Yuan}, title = {Adv-CPG: A Customized Portrait Generation Framework with Facial Adversarial Attacks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21001-21010} }
Fish-Vista: A Multi-Purpose Dataset for Understanding & Identification of Traits from Images-
[pdf]
[supp]
[bibtex]@InProceedings{Mehrab_2025_CVPR, author = {Mehrab, Kazi Sajeed and Maruf, M. and Daw, Arka and Neog, Abhilash and Manogaran, Harish Babu and Khurana, Mridul and Feng, Zhenyang and Altintas, Bahadir and Bakis, Yasin and Campolongo, Elizabeth G and Thompson, Matthew J and Wang, Xiaojun and Lapp, Hilmar and Berger-Wolf, Tanya and Mabee, Paula and Bart, Henry and Chao, Wei-Lun and Dahdul, Wasila M and Karpatne, Anuj}, title = {Fish-Vista: A Multi-Purpose Dataset for Understanding \& Identification of Traits from Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24275-24285} }
PCM : Picard Consistency Model for Fast Parallel Sampling of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{So_2025_CVPR, author = {So, Junhyuk and Shin, Jiwoong and Jang, Chaeyeon and Park, Eunhyeok}, title = {PCM : Picard Consistency Model for Fast Parallel Sampling of Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23313-23322} }
CoMapGS: Covisibility Map-based Gaussian Splatting for Sparse Novel View Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Youngkyoon and P\'erez-Pellitero, Eduardo}, title = {CoMapGS: Covisibility Map-based Gaussian Splatting for Sparse Novel View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26779-26788} }
Training Data Provenance Verification: Did Your Model Use Synthetic Data from My Generative Model for Training?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Yuechen and Song, Jie and Wang, Huiqiong and Song, Mingli}, title = {Training Data Provenance Verification: Did Your Model Use Synthetic Data from My Generative Model for Training?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23817-23827} }
Improving the Training of Data-Efficient GANs via Quality Aware Dynamic Discriminator Rejection Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhaoyu and Hua, Yang and Sun, Guanxiong and Wang, Hui and McLoone, Se\'an}, title = {Improving the Training of Data-Efficient GANs via Quality Aware Dynamic Discriminator Rejection Sampling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30682-30691} }
MotionStone: Decoupled Motion Intensity Modulation with Diffusion Transformer for Image-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Shuwei and Gong, Biao and Chen, Xi and Zheng, Dandan and Tan, Shuai and Yang, Zizheng and Li, Yuyuan and He, Jingwen and Zheng, Kecheng and Chen, Jingdong and Yang, Ming and Zheng, Yinqiang}, title = {MotionStone: Decoupled Motion Intensity Modulation with Diffusion Transformer for Image-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22864-22874} }
Advancing Generalizable Tumor Segmentation with Anomaly-Aware Open-Vocabulary Attention Maps and Frozen Foundation Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yankai and Zhang, Peng and Yang, Donglin and Tian, Yuan and Lin, Hai and Wang, Xiaosong}, title = {Advancing Generalizable Tumor Segmentation with Anomaly-Aware Open-Vocabulary Attention Maps and Frozen Foundation Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25971-25981} }
Towards Generalizable Scene Change Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Jae-Woo and Kim, Ue-Hwan}, title = {Towards Generalizable Scene Change Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24463-24473} }
Incomplete Multi-modal Brain Tumor Segmentation via Learnable Sorting State Space Model-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zheyu and Lu, Yayuan and Ma, Feipeng and Zhang, Yueyi and Yue, Huanjing and Sun, Xiaoyan}, title = {Incomplete Multi-modal Brain Tumor Segmentation via Learnable Sorting State Space Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25982-25992} }
FedAWA: Adaptive Optimization of Aggregation Weights in Federated Learning Using Client Vectors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Changlong and Zhao, He and Zhang, Bingjie and Zhou, Mingyuan and Guo, Dandan and Chang, Yi}, title = {FedAWA: Adaptive Optimization of Aggregation Weights in Federated Learning Using Client Vectors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30651-30660} }
Rethinking Diffusion for Text-Driven Human Motion Generation: Redundant Representations, Evaluation, and Masked Autoregression-
[pdf]
[supp]
[bibtex]@InProceedings{Meng_2025_CVPR, author = {Meng, Zichong and Xie, Yiming and Peng, Xiaogang and Han, Zeyu and Jiang, Huaizu}, title = {Rethinking Diffusion for Text-Driven Human Motion Generation: Redundant Representations, Evaluation, and Masked Autoregression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27859-27871} }
Stretching Each Dollar: Diffusion Training from Scratch on a Micro-Budget-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sehwag_2025_CVPR, author = {Sehwag, Vikash and Kong, Xianghao and Li, Jingtao and Spranger, Michael and Lyu, Lingjuan}, title = {Stretching Each Dollar: Diffusion Training from Scratch on a Micro-Budget}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28596-28608} }
Guiding Human-Object Interactions with Rich Geometry and Relations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2025_CVPR, author = {Xue, Mengqing and Liu, Yifei and Guo, Ling and Huang, Shaoli and Ding, Changxing}, title = {Guiding Human-Object Interactions with Rich Geometry and Relations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22714-22723} }
CADDreamer: CAD Object Generation from Single-view Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yuan and Lin, Cheng and Liu, Yuan and Long, Xiaoxiao and Zhang, Chenxu and Wang, Ningna and Li, Xin and Wang, Wenping and Guo, Xiaohu}, title = {CADDreamer: CAD Object Generation from Single-view Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21448-21457} }
Where's the Liability in the Generative Era? Recovery-based Black-Box Detection of AI-Generated Content-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2025_CVPR, author = {Bai, Haoyue and Sun, Yiyou and Cheng, Wei and Chen, Haifeng}, title = {Where's the Liability in the Generative Era? Recovery-based Black-Box Detection of AI-Generated Content}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28821-28830} }
DiTASK: Multi-Task Fine-Tuning with Diffeomorphic Transformations-
[pdf]
[supp]
[bibtex]@InProceedings{Mantri_2025_CVPR, author = {Mantri, Krishna Sri Ipsit and Sch\"onlieb, Carola-Bibiane and Ribeiro, Bruno and Baskin, Chaim and Eliasof, Moshe}, title = {DiTASK: Multi-Task Fine-Tuning with Diffeomorphic Transformations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25218-25229} }
OW-OVD: Unified Open World and Open Vocabulary Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Xi_2025_CVPR, author = {Xi, Xing and Huang, Yangyang and Luo, Ronghua and Qiu, Yu}, title = {OW-OVD: Unified Open World and Open Vocabulary Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25454-25464} }
Improving Diffusion Inverse Problem Solving with Decoupled Noise Annealing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Bingliang and Chu, Wenda and Berner, Julius and Meng, Chenlin and Anandkumar, Anima and Song, Yang}, title = {Improving Diffusion Inverse Problem Solving with Decoupled Noise Annealing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20895-20905} }
DesignDiffusion: High-Quality Text-to-Design Image Generation with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhendong and Bao, Jianmin and Gu, Shuyang and Chen, Dong and Zhou, Wengang and Li, Houqiang}, title = {DesignDiffusion: High-Quality Text-to-Design Image Generation with Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20906-20915} }
SAR3D: Autoregressive 3D Object Generation and Understanding via Multi-scale 3D VQVAE-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Yongwei and Lan, Yushi and Zhou, Shangchen and Wang, Tengfei and Pan, Xingang}, title = {SAR3D: Autoregressive 3D Object Generation and Understanding via Multi-scale 3D VQVAE}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28371-28382} }
Dual-Interrelated Diffusion Model for Few-Shot Anomaly Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Ying and Peng, Jinlong and He, Qingdong and Hu, Teng and Wu, Jiafu and Chen, Hao and Wang, Haoxuan and Zhu, Wenbing and Chi, Mingmin and Liu, Jun and Wang, Yabiao}, title = {Dual-Interrelated Diffusion Model for Few-Shot Anomaly Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30420-30429} }
Interactive Medical Image Analysis with Concept-based Similarity Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huy_2025_CVPR, author = {Huy, Ta Duc and Tran, Sen Kim and Nguyen, Phan and Tran, Nguyen Hoang and Sam, Tran Bao and van den Hengel, Anton and Liao, Zhibin and Verjans, Johan W. and To, Minh-Son and Phan, Vu Minh Hieu}, title = {Interactive Medical Image Analysis with Concept-based Similarity Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30797-30806} }
h-Edit: Effective and Flexible Diffusion-Based Editing via Doob's h-Transform-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Toan and Do, Kien and Kieu, Duc and Nguyen, Thin}, title = {h-Edit: Effective and Flexible Diffusion-Based Editing via Doob's h-Transform}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28490-28501} }
Are Spatial-Temporal Graph Convolution Networks for Human Action Recognition Over-Parameterized?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Jianyang and Zhao, Yitian and Meng, Yanda and Zhao, He and Nguyen, Anh and Zheng, Yalin}, title = {Are Spatial-Temporal Graph Convolution Networks for Human Action Recognition Over-Parameterized?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24309-24319} }
Spectral State Space Model for Rotation-Invariant Visual Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dastani_2025_CVPR, author = {Dastani, Sahar and Bahri, Ali and Yazdanpanah, Moslem and Noori, Mehrdad and Osowiechi, David and Hakim, Gustavo Adolfo Vargas and Beizaee, Farzad and Cheraghalikhani, Milad and Mondal, Arnab Kumar and Lombaert, Herve and Desrosiers, Christian}, title = {Spectral State Space Model for Rotation-Invariant Visual Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23881-23890} }
Sharp-It: A Multi-view to Multi-view Diffusion Model for 3D Synthesis and Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Edelstein_2025_CVPR, author = {Edelstein, Yiftach and Patashnik, Or and Cohen-Bar, Dana and Zelnik-Manor, Lihi}, title = {Sharp-It: A Multi-view to Multi-view Diffusion Model for 3D Synthesis and Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21458-21468} }
URWKV: Unified RWKV Model with Multi-state Perspective for Low-light Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Rui and Niu, Yuzhen and Li, Yuezhou and Xu, Huangbiao and Liu, Wenxi and Chen, Yuzhong}, title = {URWKV: Unified RWKV Model with Multi-state Perspective for Low-light Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21267-21276} }
Functionality Understanding and Segmentation in 3D Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Corsetti_2025_CVPR, author = {Corsetti, Jaime and Giuliari, Francesco and Fasoli, Alice and Boscaini, Davide and Poiesi, Fabio}, title = {Functionality Understanding and Segmentation in 3D Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24550-24559} }
Dragin3D: Image Editing by Dragging in 3D Space-
[pdf]
[supp]
[bibtex]@InProceedings{Guang_2025_CVPR, author = {Guang, Weiran and Gu, Xiaoguang and Huang, Mengqi and Mao, Zhendong}, title = {Dragin3D: Image Editing by Dragging in 3D Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21502-21512} }
Towards Stable and Storage-efficient Dataset Distillation: Matching Convexified Trajectory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_CVPR, author = {Zhong, Wenliang and Tang, Haoyu and Zheng, Qinghai and Xu, Mingzhu and Hu, Yupeng and Guan, Weili}, title = {Towards Stable and Storage-efficient Dataset Distillation: Matching Convexified Trajectory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25581-25589} }
TSAM: Temporal SAM Augmented with Multimodal Prompts for Referring Audio-Visual Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Radman_2025_CVPR, author = {Radman, Abduljalil and Laaksonen, Jorma}, title = {TSAM: Temporal SAM Augmented with Multimodal Prompts for Referring Audio-Visual Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23947-23956} }
Invisible Backdoor Attack against Self-supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Hanrong and Wang, Zhenting and Li, Boheng and Lin, Fulin and Han, Tingxu and Jin, Mingyu and Zhan, Chenlu and Du, Mengnan and Wang, Hongwei and Ma, Shiqing}, title = {Invisible Backdoor Attack against Self-supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25790-25801} }
Perceptually Accurate 3D Talking Head Generation: New Definitions, Speech-Mesh Representation, and Evaluation Metrics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chae-Yeon_2025_CVPR, author = {Chae-Yeon, Lee and Hyun-Bin, Oh and EunGi, Han and Sung-Bin, Kim and Nam, Suekyeong and Oh, Tae-Hyun}, title = {Perceptually Accurate 3D Talking Head Generation: New Definitions, Speech-Mesh Representation, and Evaluation Metrics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21065-21074} }
BWFormer: Building Wireframe Reconstruction from Airborne LiDAR Point Cloud with Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuzhou and Zhu, Lingjie and Ye, Hanqiao and Huang, Shangfeng and Gao, Xiang and Zheng, Xianwei and Shen, Shuhan}, title = {BWFormer: Building Wireframe Reconstruction from Airborne LiDAR Point Cloud with Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22215-22224} }
Diffusion-4K: Ultra-High-Resolution Image Synthesis with Latent Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinjin and Huang, Qiuyu and Liu, Junjie and Guo, Xiefan and Huang, Di}, title = {Diffusion-4K: Ultra-High-Resolution Image Synthesis with Latent Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23464-23473} }
OmniDrive: A Holistic Vision-Language Dataset for Autonomous Driving with Counterfactual Reasoning-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shihao and Yu, Zhiding and Jiang, Xiaohui and Lan, Shiyi and Shi, Min and Chang, Nadine and Kautz, Jan and Li, Ying and Alvarez, Jose M.}, title = {OmniDrive: A Holistic Vision-Language Dataset for Autonomous Driving with Counterfactual Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22442-22452} }
MeGA: Hybrid Mesh-Gaussian Head Avatar for High-Fidelity Rendering and Head Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Cong and Kang, Di and Sun, Heyi and Qian, Shenhan and Wang, Zixuan and Bao, Linchao and Zhang, Song-Hai}, title = {MeGA: Hybrid Mesh-Gaussian Head Avatar for High-Fidelity Rendering and Head Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26274-26284} }
Comprehensive Information Bottleneck for Unveiling Universal Attribution to Interpret Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2025_CVPR, author = {Hong, Jung-Ho and Kim, Ho-Joong and Jeon, Kyu-Sung and Lee, Seong-Whan}, title = {Comprehensive Information Bottleneck for Unveiling Universal Attribution to Interpret Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25166-25175} }
Dataset Distillation with Neural Characteristic Function: A Minmax Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shaobo and Yang, Yicun and Liu, Zhiyuan and Sun, Chenghao and Hu, Xuming and He, Conghui and Zhang, Linfeng}, title = {Dataset Distillation with Neural Characteristic Function: A Minmax Perspective}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25570-25580} }
Free-viewpoint Human Animation with Pose-correlated Reference Selection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2025_CVPR, author = {Hong, Fa-Ting and Xu, Zhan and Liu, Haiyang and Lin, Qinjie and Song, Luchuan and Shu, Zhixin and Zhou, Yang and Ceylan, Duygu and Xu, Dan}, title = {Free-viewpoint Human Animation with Pose-correlated Reference Selection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26253-26262} }
PillarHist: A Quantization-aware Pillar Feature Encoder based on Height-aware Histogram-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Sifan and Yuan, Zhihang and Yang, Dawei and Hu, Xing and Qian, Jian and Zhao, Ziyu}, title = {PillarHist: A Quantization-aware Pillar Feature Encoder based on Height-aware Histogram}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27336-27345} }
Semantic and Expressive Variations in Image Captions Across Languages-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Andre and Santy, Sebastin and Hwang, Jena D. and Zhang, Amy X. and Krishna, Ranjay}, title = {Semantic and Expressive Variations in Image Captions Across Languages}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29667-29679} }
ATP-LLaVA: Adaptive Token Pruning for Large Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Xubing and Gan, Yukang and Ge, Yixiao and Zhang, Xiao-Ping and Tang, Yansong}, title = {ATP-LLaVA: Adaptive Token Pruning for Large Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24972-24982} }
ADD: Attribution-Driven Data Augmentation Framework for Boosting Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Mi_2025_CVPR, author = {Mi, Ze-Yu and Yang, Yu-Bin}, title = {ADD: Attribution-Driven Data Augmentation Framework for Boosting Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23101-23110} }
CroCoDL: Cross-device Collaborative Dataset for Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Blum_2025_CVPR, author = {Blum, Hermann and Mercurio, Alessandro and O'Reilly, Joshua and Engelbracht, Tim and Dusmanu, Mihai and Pollefeys, Marc and Bauer, Zuria}, title = {CroCoDL: Cross-device Collaborative Dataset for Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27424-27434} }
CLIP is Almost All You Need: Towards Parameter-Efficient Scene Text Retrieval without OCR-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Xugong and Zhang, Peng and Yang, Jun Jie Ou and Zeng, Gangyan and Li, Yubo and Wang, Yuanyuan and Zhang, Wanqian and Dai, Pengwen}, title = {CLIP is Almost All You Need: Towards Parameter-Efficient Scene Text Retrieval without OCR}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24873-24883} }
What Makes a Good Dataset for Knowledge Distillation?-
[pdf]
[arXiv]
[bibtex]@InProceedings{Frank_2025_CVPR, author = {Frank, Logan and Davis, Jim}, title = {What Makes a Good Dataset for Knowledge Distillation?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23755-23764} }
Rectification-specific Supervision and Constrained Estimator for Online Stereo Rectification-
[pdf]
[bibtex]@InProceedings{Gong_2025_CVPR, author = {Gong, Rui and Yap, Kim-Hui and Liu, Weide and Yang, Xulei and Cheng, Jun}, title = {Rectification-specific Supervision and Constrained Estimator for Online Stereo Rectification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22348-22358} }
Shape and Texture: What Influences Reliable Optical Flow Estimation?-
[pdf]
[supp]
[bibtex]@InProceedings{Long_2025_CVPR, author = {Long, Libo and Hu, Xiao and Lang, Jochen}, title = {Shape and Texture: What Influences Reliable Optical Flow Estimation?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27894-27903} }
Precise, Fast, and Low-cost Concept Erasure in Value Space: Orthogonal Complement Matters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuan and Li, Ouxiang and Mu, Tingting and Hao, Yanbin and Liu, Kuien and Wang, Xiang and He, Xiangnan}, title = {Precise, Fast, and Low-cost Concept Erasure in Value Space: Orthogonal Complement Matters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28759-28768} }
HOIGen-1M: A Large-scale Dataset for Human-Object Interaction Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Kun and Liu, Qi and Liu, Xinchen and Li, Jie and Zhang, Yongdong and Luo, Jiebo and He, Xiaodong and Liu, Wu}, title = {HOIGen-1M: A Large-scale Dataset for Human-Object Interaction Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24001-24010} }
Order-One Rolling Shutter Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hahn_2025_CVPR, author = {Hahn, Marvin Anas and Kohn, Kathl\'en and Marigliano, Orlando and Pajdla, Tomas}, title = {Order-One Rolling Shutter Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27007-27016} }
Animate and Sound an Image-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xihua and Song, Ruihua and Li, Chongxuan and Cheng, Xin and Li, Boyuan and Wu, Yihan and Wang, Yuyue and Xu, Hongteng and Wang, Yunfeng}, title = {Animate and Sound an Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23369-23378} }
Foveated Instance Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_CVPR, author = {Zeng, Hongyi and Liu, Wenxuan and Xia, Tianhua and Chen, Jinhui and Li, Ziyun and Zhang, Sai Qian}, title = {Foveated Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24496-24505} }
Emphasizing Discriminative Features for Dataset Distillation in Complex Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Kai and Li, Zekai and Cheng, Zhi-Qi and Khaki, Samir and Sajedi, Ahmad and Vedantam, Ramakrishna and Plataniotis, Konstantinos N and Hauptmann, Alexander and You, Yang}, title = {Emphasizing Discriminative Features for Dataset Distillation in Complex Scenarios}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30451-30461} }
Segment This Thing: Foveated Tokenization for Efficient Point-Prompted Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Schmidt_2025_CVPR, author = {Schmidt, Tanner and Newcombe, Richard}, title = {Segment This Thing: Foveated Tokenization for Efficient Point-Prompted Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29428-29437} }
Task-Specific Gradient Adaptation for Few-Shot One-Class Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yunlong and Liu, Xiabi and Pan, Liyuan and Ren, Yuchen}, title = {Task-Specific Gradient Adaptation for Few-Shot One-Class Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30556-30565} }
3D Gaussian Inpainting with Depth-Guided Cross-View Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Sheng-Yu and Chou, Zi-Ting and Wang, Yu-Chiang Frank}, title = {3D Gaussian Inpainting with Depth-Guided Cross-View Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26704-26713} }
Floxels: Fast Unsupervised Voxel Based Scene Flow Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hoffmann_2025_CVPR, author = {Hoffmann, David T. and Raza, Syed Haseeb and Jiang, Hanqiu and Tananaev, Denis and Klingenhoefer, Steffen and Meinke, Martin}, title = {Floxels: Fast Unsupervised Voxel Based Scene Flow Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22328-22337} }
LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Joya and Zeng, Ziyun and Lin, Yiqi and Li, Wei and Ma, Zejun and Shou, Mike Zheng}, title = {LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29083-29095} }
FlexiDiT: Your Diffusion Transformer Can Easily Generate High-Quality Samples with Less Compute-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Anagnostidis_2025_CVPR, author = {Anagnostidis, Sotiris and Bachmann, Gregor and Kim, Yeongmin and Kohler, Jonas and Georgopoulos, Markos and Sanakoyeu, Artsiom and Du, Yuming and Pumarola, Albert and Thabet, Ali and Sch\"onfeld, Edgar}, title = {FlexiDiT: Your Diffusion Transformer Can Easily Generate High-Quality Samples with Less Compute}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28316-28326} }
HyperGLM: HyperGraph for Video Scene Graph Generation and Anticipation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Trong-Thuan and Nguyen, Pha and Cothren, Jackson and Yilmaz, Alper and Luu, Khoa}, title = {HyperGLM: HyperGraph for Video Scene Graph Generation and Anticipation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29150-29160} }
FSFM: A Generalizable Face Security Foundation Model via Self-Supervised Facial Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Gaojian and Lin, Feng and Wu, Tong and Liu, Zhenguang and Ba, Zhongjie and Ren, Kui}, title = {FSFM: A Generalizable Face Security Foundation Model via Self-Supervised Facial Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24364-24376} }
AlignMamba: Enhancing Multimodal Mamba with Local and Global Cross-modal Alignment-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yan and Xing, Yifei and Lan, Xiangyuan and Li, Xin and Chen, Haifeng and Jiang, Dongmei}, title = {AlignMamba: Enhancing Multimodal Mamba with Local and Global Cross-modal Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24774-24784} }
VideoComp: Advancing Fine-Grained Compositional and Temporal Alignment in Video-Text Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Dahun and Piergiovanni, AJ and Mallya, Ganesh and Angelova, Anelia}, title = {VideoComp: Advancing Fine-Grained Compositional and Temporal Alignment in Video-Text Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29060-29070} }
One Model for ALL: Low-Level Task Interaction Is a Key to Task-Agnostic Image Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Chunyang and Xu, Tianyang and Feng, Zhenhua and Wu, Xiaojun and Tang, Zhangyong and Li, Hui and Zhang, Zeyang and Atito, Sara and Awais, Muhammad and Kittler, Josef}, title = {One Model for ALL: Low-Level Task Interaction Is a Key to Task-Agnostic Image Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28102-28112} }
Can Text-to-Video Generation help Video-Language Alignment?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zanella_2025_CVPR, author = {Zanella, Luca and Mancini, Massimiliano and Menapace, Willi and Tulyakov, Sergey and Wang, Yiming and Ricci, Elisa}, title = {Can Text-to-Video Generation help Video-Language Alignment?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24097-24107} }
Weakly Supervised Contrastive Adversarial Training for Learning Robust Features from Semi-supervised Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Lilin and Wu, Chengpei and Yang, Ning}, title = {Weakly Supervised Contrastive Adversarial Training for Learning Robust Features from Semi-supervised Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25718-25727} }
From Poses to Identity: Training-Free Person Re-Identification via Feature Centralization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Chao and Zhang, Guiwei and Ma, Changxiao and Zhang, Tianyi and Niu, Guanglin}, title = {From Poses to Identity: Training-Free Person Re-Identification via Feature Centralization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24409-24418} }
MIMO: A Medical Vision Language Model with Visual Referring Multimodal Input and Pixel Grounding Multimodal Output-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Yanyuan and Xu, Dexuan and Huang, Yu and Zhan, Songkun and Wang, Hanpin and Chen, Dongxue and Wang, Xueping and Qiu, Meikang and Li, Hang}, title = {MIMO: A Medical Vision Language Model with Visual Referring Multimodal Input and Pixel Grounding Multimodal Output}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24732-24741} }
Bias for Action: Video Implicit Neural Representations with Bias Modulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kayabasi_2025_CVPR, author = {Kayabasi, Alper and Vadathya, Anil Kumar and Balakrishnan, Guha and Saragadam, Vishwanath}, title = {Bias for Action: Video Implicit Neural Representations with Bias Modulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27999-28008} }
Segment Anything, Even Occluded-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tai_2025_CVPR, author = {Tai, Wei-En and Shih, Yu-Lin and Sun, Cheng and Wang, Yu-Chiang Frank and Chen, Hwann-Tzong}, title = {Segment Anything, Even Occluded}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29385-29394} }
LOGICZSL: Exploring Logic-induced Representation for Compositional Zero-shot Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Peng and Lu, Xiankai and Hu, Hao and Xian, Yongqin and Shen, Jianbing and Wang, Wenguan}, title = {LOGICZSL: Exploring Logic-induced Representation for Compositional Zero-shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30301-30311} }
Universal Actions for Enhanced Embodied Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Jinliang and Li, Jianxiong and Liu, Dongxiu and Zheng, Yinan and Wang, Zhihao and Ou, Zhonghong and Liu, Yu and Liu, Jingjing and Zhang, Ya-Qin and Zhan, Xianyuan}, title = {Universal Actions for Enhanced Embodied Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22508-22519} }
FaithDiff: Unleashing Diffusion Priors for Faithful Image Super-resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Junyang and Pan, Jinshan and Dong, Jiangxin}, title = {FaithDiff: Unleashing Diffusion Priors for Faithful Image Super-resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28188-28197} }
Scene-agnostic Pose Regression for Visual Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Junwei and Liu, Ruiping and Chen, Yufan and Chen, Zhenfang and Yang, Kailun and Zhang, Jiaming and Stiefelhagen, Rainer}, title = {Scene-agnostic Pose Regression for Visual Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27092-27102} }
Divide and Conquer: Heterogeneous Noise Integration for Diffusion-based Adversarial Purification-
[pdf]
[arXiv]
[bibtex]@InProceedings{Pei_2025_CVPR, author = {Pei, Gaozheng and Lyu, Shaojie and Chen, Gong and Ma, Ke and Xu, Qianqian and Sun, Yingfei and Huang, Qingming}, title = {Divide and Conquer: Heterogeneous Noise Integration for Diffusion-based Adversarial Purification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29268-29277} }
SEC-Prompt:SEmantic Complementary Prompting for Few-Shot Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Ye and Yang, Meng}, title = {SEC-Prompt:SEmantic Complementary Prompting for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25643-25656} }
LiMoE: Mixture of LiDAR Representation Learners from Automotive Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Xiang and Kong, Lingdong and Shuai, Hui and Pan, Liang and Liu, Ziwei and Liu, Qingshan}, title = {LiMoE: Mixture of LiDAR Representation Learners from Automotive Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27368-27379} }
PI-HMR: Towards Robust In-bed Temporal Human Shape Reconstruction with Contact Pressure Sensing-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Ziyu and Xiong, Yufan and Niu, Mengting and Xie, Fangting and Wan, Quan and Ying, Qijun and Liu, Boyan and Cai, Xiaohui}, title = {PI-HMR: Towards Robust In-bed Temporal Human Shape Reconstruction with Contact Pressure Sensing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27739-27749} }
CheckManual: A New Challenge and Benchmark for Manual-based Appliance Manipulation-
[pdf]
[bibtex]@InProceedings{Long_2025_CVPR, author = {Long, Yuxing and Zhang, Jiyao and Pan, Mingjie and Wu, Tianshu and Kim, Taewhan and Dong, Hao}, title = {CheckManual: A New Challenge and Benchmark for Manual-based Appliance Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22595-22604} }
SEEN-DA: SEmantic ENtropy guided Domain-aware Attention for Domain Adaptive Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Haochen and Zhang, Rui and Yao, Hantao and Zhang, Xin and Hao, Yifan and Song, Xinkai and Peng, Shaohui and Zhao, Yongwei and Zhao, Chen and Wu, Yanjun and Li, Ling}, title = {SEEN-DA: SEmantic ENtropy guided Domain-aware Attention for Domain Adaptive Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25465-25475} }
Blind Bitstream-corrupted Video Recovery via Metadata-guided Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shuyun and Zhang, Hu and Shen, Xin and Wang, Dadong and Yu, Xin}, title = {Blind Bitstream-corrupted Video Recovery via Metadata-guided Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22975-22984} }
Mind the Trojan Horse: Image Prompt Adapter Enabling Scalable and Deceptive Jailbreaking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Junxi and Dong, Junhao and Xie, Xiaohua}, title = {Mind the Trojan Horse: Image Prompt Adapter Enabling Scalable and Deceptive Jailbreaking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23785-23794} }
GEM: A Generalizable Ego-Vision Multimodal World Model for Fine-Grained Ego-Motion, Object Dynamics, and Scene Composition Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hassan_2025_CVPR, author = {Hassan, Mariam and Stapf, Sebastian and Rahimi, Ahmad and Rezende, Pedro M B and Haghighi, Yasaman and Br\"uggemann, David and Katircioglu, Isinsu and Zhang, Lin and Chen, Xiaoran and Saha, Suman and Cannici, Marco and Aljalbout, Elie and Ye, Botao and Wang, Xi and Davtyan, Aram and Salzmann, Mathieu and Scaramuzza, Davide and Pollefeys, Marc and Favaro, Paolo and Alahi, Alexandre}, title = {GEM: A Generalizable Ego-Vision Multimodal World Model for Fine-Grained Ego-Motion, Object Dynamics, and Scene Composition Control}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22404-22415} }
Scene-Centric Unsupervised Panoptic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hahn_2025_CVPR, author = {Hahn, Oliver and Reich, Christoph and Araslanov, Nikita and Cremers, Daniel and Rupprecht, Christian and Roth, Stefan}, title = {Scene-Centric Unsupervised Panoptic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24485-24495} }
Learning Physics From Video: Unsupervised Physical Parameter Estimation for Continuous Dynamical Systems-
[pdf]
[supp]
[bibtex]@InProceedings{Garcia_2025_CVPR, author = {Garcia, Alejandro Casta\~neda and Warchocki, Jan and van Gemert, Jan and Brinks, Daan and Tomen, Nergis}, title = {Learning Physics From Video: Unsupervised Physical Parameter Estimation for Continuous Dynamical Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27924-27933} }
ProAPO: Progressively Automatic Prompt Optimization for Visual Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Xiangyan and Gou, Gaopeng and Zhuang, Jiamin and Yu, Jing and Song, Kun and Wang, Qihao and Li, Yili and Xiong, Gang}, title = {ProAPO: Progressively Automatic Prompt Optimization for Visual Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25145-25155} }
Black Swan: Abductive and Defeasible Video Reasoning in Unpredictable Events-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chinchure_2025_CVPR, author = {Chinchure, Aditya and Ravi, Sahithya and Ng, Raymond and Shwartz, Vered and Li, Boyang and Sigal, Leonid}, title = {Black Swan: Abductive and Defeasible Video Reasoning in Unpredictable Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24201-24210} }
RNG: Relightable Neural Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Jiahui and Luan, Fujun and Yang, Jian and Hasan, Milos and Wang, Beibei}, title = {RNG: Relightable Neural Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26525-26534} }
Towards Realistic Example-based Modeling via 3D Gaussian Stitching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Xinyu and Yang, Ziyi and Gong, Bingchen and Han, Xiaoguang and Yang, Sipeng and Jin, Xiaogang}, title = {Towards Realistic Example-based Modeling via 3D Gaussian Stitching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26597-26607} }
Generative Sparse-View Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Kong_2025_CVPR, author = {Kong, Hanyang and Yang, Xingyi and Wang, Xinchao}, title = {Generative Sparse-View Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26745-26755} }
Generative Inbetweening through Frame-wise Conditions-Driven Video Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Tianyi and Ren, Dongwei and Wang, Qilong and Wu, Xiaohe and Zuo, Wangmeng}, title = {Generative Inbetweening through Frame-wise Conditions-Driven Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27968-27978} }
DexGrasp Anything: Towards Universal Robotic Dexterous Grasping with Physics Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_CVPR, author = {Zhong, Yiming and Jiang, Qi and Yu, Jingyi and Ma, Yuexin}, title = {DexGrasp Anything: Towards Universal Robotic Dexterous Grasping with Physics Awareness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22584-22594} }
CustAny: Customizing Anything from A Single Example-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2025_CVPR, author = {Kong, Lingjie and Wu, Kai and Xu, Chengming and Hu, Xiaobin and Han, Wenhui and Peng, Jinlong and Luo, Donghao and Li, Mengtian and Zhang, Jiangning and Wang, Chengjie and Fu, Yanwei}, title = {CustAny: Customizing Anything from A Single Example}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20916-20925} }
PoseTraj: Pose-Aware Trajectory Control in Video Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2025_CVPR, author = {Ji, Longbin and Zhong, Lei and Wei, Pengfei and Li, Changjian}, title = {PoseTraj: Pose-Aware Trajectory Control in Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22776-22785} }
VL2Lite: Task-Specific Knowledge Distillation from Large Vision-Language Models to Lightweight Networks-
[pdf]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Jinseong and Ma, Chunfei and Lee, Byeongwon}, title = {VL2Lite: Task-Specific Knowledge Distillation from Large Vision-Language Models to Lightweight Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30073-30083} }
StageDesigner: Artistic Stage Generation for Scenography via Theater Scripts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gan_2025_CVPR, author = {Gan, Zhaoxing and Li, Mengtian and Chen, Ruhua and Ji, Zhongxia and Guo, Sichen and Hu, Huanling and Ye, Guangnan and Hu, Zuo}, title = {StageDesigner: Artistic Stage Generation for Scenography via Theater Scripts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28705-28714} }
Interpreting Object-level Foundation Models via Visual Precision Search-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Ruoyu and Liang, Siyuan and Li, Jingzhi and Liu, Shiming and Li, Maosen and Huang, Zhen and Zhang, Hua and Cao, Xiaochun}, title = {Interpreting Object-level Foundation Models via Visual Precision Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30042-30052} }
Foley-Flow: Coordinated Video-to-Audio Generation with Masked Audio-Visual Alignment and Dynamic Conditional Flows-
[pdf]
[bibtex]@InProceedings{Mo_2025_CVPR, author = {Mo, Shentong and Song, Yibing}, title = {Foley-Flow: Coordinated Video-to-Audio Generation with Masked Audio-Visual Alignment and Dynamic Conditional Flows}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28912-28921} }
All-directional Disparity Estimation for Real-world QPD Images-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Hongtao and Song, Shaohui and Sun, Lihu and Su, Wenkai and Yang, Xiaodong and Liu, Chengming}, title = {All-directional Disparity Estimation for Real-world QPD Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21836-21846} }
Using Diffusion Priors for Video Amodal Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Kaihua and Ramanan, Deva and Khurana, Tarasha}, title = {Using Diffusion Priors for Video Amodal Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22890-22900} }
Dyn-HaMR: Recovering 4D Interacting Hand Motion from a Dynamic Camera-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Zhengdi and Zafeiriou, Stefanos and Birdal, Tolga}, title = {Dyn-HaMR: Recovering 4D Interacting Hand Motion from a Dynamic Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27716-27726} }
The Scene Language: Representing Scenes with Programs, Words, and Embeddings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yunzhi and Li, Zizhang and Zhou, Matt and Wu, Shangzhe and Wu, Jiajun}, title = {The Scene Language: Representing Scenes with Programs, Words, and Embeddings}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24625-24634} }
Learning Physics-Based Full-Body Human Reaching and Grasping from Brief Walking References-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yitang and Lin, Mingxian and Lin, Zhuo and Deng, Yipeng and Cao, Yue and Yi, Li}, title = {Learning Physics-Based Full-Body Human Reaching and Grasping from Brief Walking References}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27673-27682} }
EmoEdit: Evoking Emotions through Image Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jingyuan and Feng, Jiawei and Luo, Weibin and Lischinski, Dani and Cohen-Or, Daniel and Huang, Hui}, title = {EmoEdit: Evoking Emotions through Image Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24690-24699} }
SparseAlign: a Fully Sparse Framework for Cooperative Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Yunshuang and Xia, Yan and Cremers, Daniel and Sester, Monika}, title = {SparseAlign: a Fully Sparse Framework for Cooperative Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22296-22305} }
Data Distributional Properties As Inductive Bias for Systematic Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{del_Rio_2025_CVPR, author = {del Rio, Felipe and Raymond-Saez, Alain and Florea, Daniel and Icarte, Rodrigo Toro and Hurtado, Julio and Calderon, Cristian B. and Soto, Alvaro}, title = {Data Distributional Properties As Inductive Bias for Systematic Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25590-25601} }
TopoCellGen: Generating Histopathology Cell Topology with a Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Meilong and Gupta, Saumya and Hu, Xiaoling and Li, Chen and Abousamra, Shahira and Samaras, Dimitris and Prasanna, Prateek and Chen, Chao}, title = {TopoCellGen: Generating Histopathology Cell Topology with a Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20979-20989} }
Meta-Learning Hyperparameters for Parameter Efficient Fine-Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Zichen and Liu, Yaoyao and Sun, Qianru}, title = {Meta-Learning Hyperparameters for Parameter Efficient Fine-Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23037-23047} }
TriTex: Learning Texture from a Single Mesh via Triplane Semantic Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cohen-Bar_2025_CVPR, author = {Cohen-Bar, Dana and Cohen-Or, Daniel and Chechik, Gal and Kasten, Yoni}, title = {TriTex: Learning Texture from a Single Mesh via Triplane Semantic Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21403-21413} }
Wavelet and Prototype Augmented Query-based Transformer for Pixel-level Surface Defect Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Feng and Jiang, Xiaoheng and Lu, Yang and Cao, Jiale and Chen, Dong and Xu, Mingliang}, title = {Wavelet and Prototype Augmented Query-based Transformer for Pixel-level Surface Defect Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23860-23869} }
Alignment, Mining and Fusion: Representation Alignment with Hard Negative Mining and Selective Knowledge Fusion for Medical Visual Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2025_CVPR, author = {Zou, Yuanhao and Yin, Zhaozheng}, title = {Alignment, Mining and Fusion: Representation Alignment with Hard Negative Mining and Selective Knowledge Fusion for Medical Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29623-29633} }
Language-Guided Audio-Visual Learning for Long-Term Sports Assessment-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Huangbiao and Ke, Xiao and Wu, Huanqi and Xu, Rui and Li, Yuezhou and Guo, Wenzhong}, title = {Language-Guided Audio-Visual Learning for Long-Term Sports Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23967-23977} }
Teller: Real-Time Streaming Audio-Driven Portrait Animation with Autoregressive Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhen_2025_CVPR, author = {Zhen, Dingcheng and Yin, Shunshun and Qin, Shiyang and Yi, Hou and Zhang, Ziwei and Liu, Siyuan and Qi, Gan and Tao, Ming}, title = {Teller: Real-Time Streaming Audio-Driven Portrait Animation with Autoregressive Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21075-21085} }
PersonaHOI: Effortlessly Improving Face Personalization in Human-Object Interaction Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Xinting and Wang, Haoran and Lenssen, Jan Eric and Schiele, Bernt}, title = {PersonaHOI: Effortlessly Improving Face Personalization in Human-Object Interaction Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23775-23784} }
Video-Panda: Parameter-efficient Alignment for Encoder-free Video-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Yi_2025_CVPR, author = {Yi, Jinhui and Wasim, Syed Talal and Luo, Yanan and Naseer, Muzammal and Gall, Juergen}, title = {Video-Panda: Parameter-efficient Alignment for Encoder-free Video-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24119-24128} }
MonoSplat: Generalizable 3D Gaussian Splatting from Monocular Depth Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yifan and Fan, Keyu and Yu, Weihao and Li, Chenxin and Lu, Hao and Yuan, Yixuan}, title = {MonoSplat: Generalizable 3D Gaussian Splatting from Monocular Depth Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21570-21579} }
Hybrid Global-Local Representation with Augmented Spatial Guidance for Zero-Shot Referring Image Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Ting and Li, Siyuan}, title = {Hybrid Global-Local Representation with Augmented Spatial Guidance for Zero-Shot Referring Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29634-29643} }
Probability Density Geodesics in Image Diffusion Latent Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Qingtao and Singh, Jaskirat and Yang, Zhaoyuan and Tu, Peter Henry and Zhang, Jing and Li, Hongdong and Hartley, Richard and Campbell, Dylan}, title = {Probability Density Geodesics in Image Diffusion Latent Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27989-27998} }
EgoLife: Towards Egocentric Life Assistant-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jingkang and Liu, Shuai and Guo, Hongming and Dong, Yuhao and Zhang, Xiamengwei and Zhang, Sicheng and Wang, Pengyun and Zhou, Zitang and Xie, Binzhu and Wang, Ziyue and Ouyang, Bei and Lin, Zhengyu and Cominelli, Marco and Cai, Zhongang and Li, Bo and Zhang, Yuanhan and Zhang, Peiyuan and Hong, Fangzhou and Widmer, Joerg and Gringoli, Francesco and Yang, Lei and Liu, Ziwei}, title = {EgoLife: Towards Egocentric Life Assistant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28885-28900} }
BrepGiff: Lightweight Generation of Complex B-rep with 3D GAT Diffusion-
[pdf]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Hao and Huang, Xiaoshui and jiacheng, Hao and Bai, Yunpeng and Gan, Hongping and Shi, Yilei}, title = {BrepGiff: Lightweight Generation of Complex B-rep with 3D GAT Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26587-26596} }
Towards Fine-Grained Interpretability: Counterfactual Explanations for Misclassification with Saliency Partition-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Lintong and Yin, Kang and Lee, Seong-Whan}, title = {Towards Fine-Grained Interpretability: Counterfactual Explanations for Misclassification with Saliency Partition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30053-30062} }
Joint Scheduling of Causal Prompts and Tasks for Multi-Task Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Chaoyang and Qin, Jianyang and Cui, Jinhao and Liu, Zeyu and Hu, Ning and Liao, Qing}, title = {Joint Scheduling of Causal Prompts and Tasks for Multi-Task Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25124-25134} }
3DGUT: Enabling Distorted Cameras and Secondary Rays in Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Qi and Esturo, Janick Martinez and Mirzaei, Ashkan and Mo\"enne-Loccoz, Nicolas and Gojcic, Zan}, title = {3DGUT: Enabling Distorted Cameras and Secondary Rays in Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26036-26046} }
It's a (Blind) Match! Towards Vision-Language Correspondence without Parallel Data-
[pdf]
[supp]
[bibtex]@InProceedings{Schnaus_2025_CVPR, author = {Schnaus, Dominik and Araslanov, Nikita and Cremers, Daniel}, title = {It's a (Blind) Match! Towards Vision-Language Correspondence without Parallel Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24983-24992} }
Open Set Label Shift with Test Time Out-of-Distribution Reference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Changkun and Tsuchida, Russell and Petersson, Lars and Barnes, Nick}, title = {Open Set Label Shift with Test Time Out-of-Distribution Reference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30619-30629} }
GaussianFormer-2: Probabilistic Gaussian Superposition for Efficient 3D Occupancy Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yuanhui and Thammatadatrakoon, Amonnut and Zheng, Wenzhao and Zhang, Yunpeng and Du, Dalong and Lu, Jiwen}, title = {GaussianFormer-2: Probabilistic Gaussian Superposition for Efficient 3D Occupancy Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27477-27486} }
Flexible Frame Selection for Efficient Video Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Buch_2025_CVPR, author = {Buch, Shyamal and Nagrani, Arsha and Arnab, Anurag and Schmid, Cordelia}, title = {Flexible Frame Selection for Efficient Video Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29071-29082} }
EventGPT: Event Stream Understanding with Multimodal Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Shaoyu and Li, Jianing and Zhao, Guanghui and Zhang, Yunjian and Meng, Xin and Yu, Fei Richard and Ji, Xiangyang and Li, Ming}, title = {EventGPT: Event Stream Understanding with Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29139-29149} }
MITracker: Multi-View Integration for Visual Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Mengjie and Zhu, Yitao and Jiang, Haotian and Li, Jiaming and Shen, Zhenrong and Wang, Sheng and Huang, Haolin and Wang, Xinyu and Zhang, Han and Yang, Qing and Wang, Qian}, title = {MITracker: Multi-View Integration for Visual Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27176-27185} }
Not Only Text: Exploring Compositionality of Visual Representations in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Berasi_2025_CVPR, author = {Berasi, Davide and Farina, Matteo and Mancini, Massimiliano and Ricci, Elisa and Strisciuglio, Nicola}, title = {Not Only Text: Exploring Compositionality of Visual Representations in Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24917-24927} }
Exploring Scene Affinity for Semi-Supervised LiDAR Semantic Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Chuandong and Weng, Xingxing and Jiang, Shuguo and Li, Pengcheng and Yu, Lei and Xia, Gui-Song}, title = {Exploring Scene Affinity for Semi-Supervised LiDAR Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27380-27389} }
Minority-Focused Text-to-Image Generation via Prompt Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Um_2025_CVPR, author = {Um, Soobin and Ye, Jong Chul}, title = {Minority-Focused Text-to-Image Generation via Prompt Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20926-20936} }
MANTA: A Large-Scale Multi-View and Visual-Text Anomaly Detection Dataset for Tiny Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Lei and Fan, Dongdong and Hu, Zhiguang and Ding, Yiwen and Di, Donglin and Yi, Kai and Pagnucco, Maurice and Song, Yang}, title = {MANTA: A Large-Scale Multi-View and Visual-Text Anomaly Detection Dataset for Tiny Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25518-25527} }
SCSegamba: Lightweight Structure-Aware Vision Mamba for Crack Segmentation in Structures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Hui and Jia, Chen and Shi, Fan and Cheng, Xu and Chen, Shengyong}, title = {SCSegamba: Lightweight Structure-Aware Vision Mamba for Crack Segmentation in Structures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29406-29416} }
Collaborative Tree Search for Enhancing Embodied Multi-Agent Collaboration-
[pdf]
[supp]
[bibtex]@InProceedings{Zu_2025_CVPR, author = {Zu, Lizheng and Lin, Lin and Fu, Song and Zhao, Na and Zhou, Pan}, title = {Collaborative Tree Search for Enhancing Embodied Multi-Agent Collaboration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29513-29522} }
Text-Driven Fashion Image Editing with Compositional Concept Learning and Counterfactual Abduction-
[pdf]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Shanshan and Li, Haoxuan and Zheng, Chunyuan and Ge, Mingyuan and Gao, Wei and Wang, Lei and Liu, Li}, title = {Text-Driven Fashion Image Editing with Compositional Concept Learning and Counterfactual Abduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28726-28735} }
Adapting Text-to-Image Generation with Feature Difference Instruction for Generic Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Chao and Fan, Hehe and Yang, Huichen and Karimi, Sarvnaz and Yao, Lina and Yang, Yi}, title = {Adapting Text-to-Image Generation with Feature Difference Instruction for Generic Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23539-23550} }
ReCon: Enhancing True Correspondence Discrimination through Relation Consistency for Robust Noisy Correspondence Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zha_2025_CVPR, author = {Zha, Quanxing and Liu, Xin and Peng, Shu-Juan and Cheung, Yiu-ming and Xu, Xing and Wang, Nannan}, title = {ReCon: Enhancing True Correspondence Discrimination through Relation Consistency for Robust Noisy Correspondence Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29680-29689} }
Preconditioners for the Stochastic Training of Neural Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chng_2025_CVPR, author = {Chng, Shin-Fang and Saratchandran, Hemanth and Lucey, Simon}, title = {Preconditioners for the Stochastic Training of Neural Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27222-27232} }
ECBench: Can Multi-modal Foundation Models Understand the Egocentric World? A Holistic Embodied Cognition Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dang_2025_CVPR, author = {Dang, Ronghao and Yuan, Yuqian and Zhang, Wenqi and Xin, Yifei and Zhang, Boqiang and Li, Long and Wang, Liuyi and Zeng, Qinyang and Li, Xin and Bing, Lidong}, title = {ECBench: Can Multi-modal Foundation Models Understand the Egocentric World? A Holistic Embodied Cognition Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24593-24602} }
SfM-Free 3D Gaussian Splatting via Hierarchical Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2025_CVPR, author = {Ji, Bo and Yao, Angela}, title = {SfM-Free 3D Gaussian Splatting via Hierarchical Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21654-21663} }
CASAGPT: Cuboid Arrangement and Scene Assembly for Interior Design-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Weitao and Zhou, Hang and Liao, Jing and Cheng, Li and Zhou, Wenbo}, title = {CASAGPT: Cuboid Arrangement and Scene Assembly for Interior Design}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29173-29182} }
MINIMA: Modality Invariant Image Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Jiangwei and Jiang, Xingyu and Li, Zizhuo and Liang, Dingkang and Zhou, Xin and Bai, Xiang}, title = {MINIMA: Modality Invariant Image Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23059-23068} }
3D Convex Splatting: Radiance Field Rendering with 3D Smooth Convexes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Held_2025_CVPR, author = {Held, Jan and Vandeghen, Renaud and Hamdi, Abdullah and Deliege, Adrien and Cioppa, Anthony and Giancola, Silvio and Vedaldi, Andrea and Ghanem, Bernard and Van Droogenbroeck, Marc}, title = {3D Convex Splatting: Radiance Field Rendering with 3D Smooth Convexes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21360-21369} }
3D Prior Is All You Need: Cross-Task Few-shot 2D Gaze Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Yihua and Wang, Hengfei and Zhang, Zhongqun and Yue, Yang and Kim, Boeun and Lu, Feng and Chang, Hyung Jin}, title = {3D Prior Is All You Need: Cross-Task Few-shot 2D Gaze Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23891-23900} }
SeriesBench: A Benchmark for Narrative-Driven Drama Series Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chenkai and Lei, Yiming and Liu, Zeming and Leng, Haitao and Liu, ShaoGuo and Gao, Tingting and Liu, Qingjie and Wang, Yunhong}, title = {SeriesBench: A Benchmark for Narrative-Driven Drama Series Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28995-29004} }
Weakly Supervised Temporal Action Localization via Dual-Prior Collaborative Learning Guided by Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Quan and Fang, Jinwei and Yuan, Rui and Tang, Xi and Qi, Yuxin and Zhang, Ke and Yuan, Chun}, title = {Weakly Supervised Temporal Action Localization via Dual-Prior Collaborative Learning Guided by Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24139-24148} }
GliaNet: Adaptive Neural Network Structure Learning with Glia-Driven-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Mengqiao and Pan, Liyuan and Liu, Xiabi}, title = {GliaNet: Adaptive Neural Network Structure Learning with Glia-Driven}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25240-25249} }
EntitySAM: Segment Everything in Video-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Mingqiao and Oh, Seoung Wug and Ke, Lei and Lee, Joon-Young}, title = {EntitySAM: Segment Everything in Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24234-24243} }
GS-2DGS: Geometrically Supervised 2DGS for Reflective Object Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Tong_2025_CVPR, author = {Tong, Jinguang and Li, Xuesong and Maken, Fahira Afzal and Muthu, Sundaram and Petersson, Lars and Nguyen, Chuong and Li, Hongdong}, title = {GS-2DGS: Geometrically Supervised 2DGS for Reflective Object Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21547-21557} }
Video Depth Anything: Consistent Depth Estimation for Super-Long Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Sili and Guo, Hengkai and Zhu, Shengnan and Zhang, Feihu and Huang, Zilong and Feng, Jiashi and Kang, Bingyi}, title = {Video Depth Anything: Consistent Depth Estimation for Super-Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22831-22840} }
InstanceCap: Improving Text-to-Video Generation via Instance-aware Structured Caption-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Tiehan and Nan, Kepan and Xie, Rui and Zhou, Penghao and Yang, Zhenheng and Fu, Chaoyou and Li, Xiang and Yang, Jian and Tai, Ying}, title = {InstanceCap: Improving Text-to-Video Generation via Instance-aware Structured Caption}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28974-28983} }
Luminance-GS: Adapting 3D Gaussian Splatting to Challenging Lighting Conditions with View-Adaptive Curve Adjustment-
[pdf]
[supp]
[bibtex]@InProceedings{Cui_2025_CVPR, author = {Cui, Ziteng and Chu, Xuangeng and Harada, Tatsuya}, title = {Luminance-GS: Adapting 3D Gaussian Splatting to Challenging Lighting Conditions with View-Adaptive Curve Adjustment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26472-26482} }
EventSplat: 3D Gaussian Splatting from Moving Event Cameras for Real-time Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yura_2025_CVPR, author = {Yura, Toshiya and Mirzaei, Ashkan and Gilitschenski, Igor}, title = {EventSplat: 3D Gaussian Splatting from Moving Event Cameras for Real-time Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26876-26886} }
3D Student Splatting and Scooping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jialin and Yue, Jiangbei and He, Feixiang and Wang, He}, title = {3D Student Splatting and Scooping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21045-21054} }
World-consistent Video Diffusion with Explicit 3D Modeling-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Qihang and Zhai, Shuangfei and Martin, Miguel \'Angel Bautista and Miao, Kevin and Toshev, Alexander and Susskind, Joshua and Gu, Jiatao}, title = {World-consistent Video Diffusion with Explicit 3D Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21685-21695} }
Learning Partonomic 3D Reconstruction from Image Collections-
[pdf]
[supp]
[bibtex]@InProceedings{Ruan_2025_CVPR, author = {Ruan, Xiaoqian and Yu, Pei and Jia, Dian and Park, Hyeonjeong and Xiong, Peixi and Tang, Wei}, title = {Learning Partonomic 3D Reconstruction from Image Collections}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26734-26744} }
ODA-GAN: Orthogonal Decoupling Alignment GAN Assisted by Weakly-supervised Learning for Virtual Immunohistochemistry Staining-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Tong and Wang, Mingkang and Wang, Zhongze and Wang, Hongkai and Xu, Qi and Cong, Fengyu and Xu, Hongming}, title = {ODA-GAN: Orthogonal Decoupling Alignment GAN Assisted by Weakly-supervised Learning for Virtual Immunohistochemistry Staining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25920-25929} }
EVOS: Efficient Implicit Neural Training via EVOlutionary Selector-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Weixiang and Xie, Shuzhao and Ren, Chengwei and Xie, Siyi and Tang, Chen and Ge, Shijia and Wang, Mingzi and Wang, Zhi}, title = {EVOS: Efficient Implicit Neural Training via EVOlutionary Selector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30472-30482} }
MEET: Towards Memory-Efficient Temporal Sparse Deep Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Zeqi and Akkaya, Ibrahim Batuhan and Waeijen, Luc and Bondarev, Egor and Pourtaherian, Arash and Moreira, Orlando}, title = {MEET: Towards Memory-Efficient Temporal Sparse Deep Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29309-29320} }
Probabilistic Prompt Distribution Learning for Animal Pose Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Rao_2025_CVPR, author = {Rao, Jiyong and Zhao, Brian Nlong and Wang, Yu}, title = {Probabilistic Prompt Distribution Learning for Animal Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29438-29447} }
Mitigating Object Hallucinations in Large Vision-Language Models with Assembly of Global and Local Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2025_CVPR, author = {An, Wenbin and Tian, Feng and Leng, Sicong and Nie, Jiahao and Lin, Haonan and Wang, Qianying and Chen, Ping and Zhang, Xiaoqin and Lu, Shijian}, title = {Mitigating Object Hallucinations in Large Vision-Language Models with Assembly of Global and Local Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29915-29926} }
UniSTD: Towards Unified Spatio-Temporal Learning across Diverse Disciplines-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Chen and Ma, Xinzhu and Su, Encheng and Song, Xiufeng and Liu, Xiaohong and Li, Wei-Hong and Bai, Lei and Ouyang, Wanli and Yue, Xiangyu}, title = {UniSTD: Towards Unified Spatio-Temporal Learning across Diverse Disciplines}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29213-29224} }
Mani-GS: Gaussian Splatting Manipulation with Triangular Mesh-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Xiangjun and Li, Xiaoyu and Zhuang, Yiyu and Zhang, Qi and Hu, Wenbo and Zhang, Chaopeng and Yao, Yao and Shan, Ying and Quan, Long}, title = {Mani-GS: Gaussian Splatting Manipulation with Triangular Mesh}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21392-21402} }
BooW-VTON: Boosting In-the-Wild Virtual Try-On via Mask-Free Pseudo Data Training-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xuanpu and Song, Dan and Zhan, Pengxin and Chang, Tianyu and Zeng, Jianhao and Chen, Qingguo and Luo, Weihua and Liu, An-An}, title = {BooW-VTON: Boosting In-the-Wild Virtual Try-On via Mask-Free Pseudo Data Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26399-26408} }
Supervising Sound Localization by In-the-wild Egomotion-
[pdf]
[supp]
[bibtex]@InProceedings{Min_2025_CVPR, author = {Min, Anna and Chen, Ziyang and Zhao, Hang and Owens, Andrew}, title = {Supervising Sound Localization by In-the-wild Egomotion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23936-23946} }
AutoLUT: LUT-Based Image Super-Resolution with Automatic Sampling and Adaptive Residual Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yuheng and Yang, Shijie and Liu, Xin and Liu, Jie and Tang, Jie and Wu, Gangshan}, title = {AutoLUT: LUT-Based Image Super-Resolution with Automatic Sampling and Adaptive Residual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23131-23140} }
AniGS: Animatable Gaussian Avatar from a Single Image with Inconsistent Gaussian Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2025_CVPR, author = {Qiu, Lingteng and Zhu, Shenhao and Zuo, Qi and Gu, Xiaodong and Dong, Yuan and Zhang, Junfei and Xu, Chao and Li, Zhe and Yuan, Weihao and Bo, Liefeng and Chen, Guanying and Dong, Zilong}, title = {AniGS: Animatable Gaussian Avatar from a Single Image with Inconsistent Gaussian Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21148-21158} }
IM-Portrait: Learning 3D-aware Video Diffusion for Photorealistic Talking Heads from Monocular VideosC-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yuan and Bai, Ziqian and Tan, Feitong and Cui, Zhaopeng and Fanello, Sean and Zhang, Yinda}, title = {IM-Portrait: Learning 3D-aware Video Diffusion for Photorealistic Talking Heads from Monocular VideosC}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21107-21116} }
DynaMoDe-NeRF: Motion-aware Deblurring Neural Radiance Field for Dynamic Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2025_CVPR, author = {Kumar, Ashish and N., Rajagopalan A.}, title = {DynaMoDe-NeRF: Motion-aware Deblurring Neural Radiance Field for Dynamic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21728-21738} }
UrbanCAD: Towards Highly Controllable and Photorealistic 3D Vehicles for Urban Scene Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Yichong and Cai, Yichi and Zhang, Shangzhan and Zhou, Hongyu and Hu, Haoji and Yu, Huimin and Geiger, Andreas and Liao, Yiyi}, title = {UrbanCAD: Towards Highly Controllable and Photorealistic 3D Vehicles for Urban Scene Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27519-27530} }
Diff-Palm: Realistic Palmprint Generation with Polynomial Creases and Intra-Class Variation Controllable Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Jianlong and Zhao, Chenglong and Zhang, Ruixin and Shang, Sheng and Xu, Jianqing and Zhang, Jingyun and Wang, ShaoMing and Zhao, Yang and Ding, Shouhong and Jia, Wei and Wu, Yunsheng}, title = {Diff-Palm: Realistic Palmprint Generation with Polynomial Creases and Intra-Class Variation Controllable Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26367-26376} }
Chain of Semantics Programming in 3D Gaussian Splatting Representation for 3D Vision Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Jiaxin and Xiang, Mingyue and Sun, Hao and Huang, Yixuan and Weng, Zhi}, title = {Chain of Semantics Programming in 3D Gaussian Splatting Representation for 3D Vision Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24560-24569} }
MVPortrait: Text-Guided Motion and Emotion Control for Multi-view Vivid Portrait Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Yukang and Fung, Hokit and Xu, Jianjin and Ren, Zeping and Lau, Adela S.M. and Yin, Guosheng and Li, Xiu}, title = {MVPortrait: Text-Guided Motion and Emotion Control for Multi-view Vivid Portrait Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26242-26252} }
Accelerating Multimodal Large Language Models by Searching Optimal Vision Token Reduction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shiyu and Wang, Zhenting and Juefei-Xu, Felix and Xia, Xide and Liu, Miao and Wang, Xiaofang and Liang, Mingfu and Zhang, Ning and Metaxas, Dimitris N. and Yu, Licheng}, title = {Accelerating Multimodal Large Language Models by Searching Optimal Vision Token Reduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29869-29879} }
Matrix-Free Shared Intrinsics Bundle Adjustment-
[pdf]
[bibtex]@InProceedings{Safari_2025_CVPR, author = {Safari, Daniel}, title = {Matrix-Free Shared Intrinsics Bundle Adjustment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27017-27026} }
Uncertainty-Instructed Structure Injection for Generalizable HD Map Construction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xiaolu and Yang, Ruizi and Wang, Song and Li, Wentong and Chen, Junbo and Zhu, Jianke}, title = {Uncertainty-Instructed Structure Injection for Generalizable HD Map Construction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22359-22368} }
Color Alignment in Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shum_2025_CVPR, author = {Shum, Ka Chun and Hua, Binh-Son and Nguyen, Duc Thanh and Yeung, Sai-Kit}, title = {Color Alignment in Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28446-28455} }
LLAVIDAL: A Large LAnguage VIsion Model for Daily Activities of Living-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Reilly_2025_CVPR, author = {Reilly, Dominick and Chakraborty, Rajatsubhra and Sinha, Arkaprava and Govind, Manish Kumar and Wang, Pu and Bremond, Francois and Xue, Le and Das, Srijan}, title = {LLAVIDAL: A Large LAnguage VIsion Model for Daily Activities of Living}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24297-24308} }
Language-Guided Salient Object Ranking-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Fang and Liu, Yuhao and Xu, Ke and Ye, Shuquan and Hancke, Gerhard Petrus and Lau, Rynson W. H.}, title = {Language-Guided Salient Object Ranking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29803-29813} }
Towards More General Video-based Deepfake Detection through Facial Component Guided Adaptation for Foundation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Yue-Hua and Huang, Tai-Ming and Hua, Kai-Lung and Chen, Jun-Cheng}, title = {Towards More General Video-based Deepfake Detection through Facial Component Guided Adaptation for Foundation Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22995-23005} }
SP3D: Boosting Sparsely-Supervised 3D Object Detection via Accurate Cross-Modal Semantic Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shijia and Xia, Qiming and Guo, Xusheng and Zou, Pufan and Zheng, Maoji and Wu, Hai and Wen, Chenglu and Wang, Cheng}, title = {SP3D: Boosting Sparsely-Supervised 3D Object Detection via Accurate Cross-Modal Semantic Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29374-29384} }
VoCo-LLaMA: Towards Vision Compression with Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Xubing and Gan, Yukang and Huang, Xiaoke and Ge, Yixiao and Tang, Yansong}, title = {VoCo-LLaMA: Towards Vision Compression with Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29836-29846} }
Focal Split: Untethered Snapshot Depth from Differential Defocus-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Junjie and Mamish, John and Fu, Alan and Concannon, Thomas and Hester, Josiah and Alexander, Emma and Guo, Qi}, title = {Focal Split: Untethered Snapshot Depth from Differential Defocus}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26965-26974} }
PURA: Parameter Update-Recovery Test-Time Adaption for RGB-T Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Shao_2025_CVPR, author = {Shao, Zekai and Hu, Yufan and Fan, Bin and Liu, Hongmin}, title = {PURA: Parameter Update-Recovery Test-Time Adaption for RGB-T Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22089-22098} }
Towards All-in-One Medical Image Re-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Yuan and Ji, Kaiyuan and Zhang, Rongzhao and Jiang, Yankai and Li, Chunyi and Wang, Xiaosong and Zhai, Guangtao}, title = {Towards All-in-One Medical Image Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30774-30786} }
Integral Fast Fourier Color Constancy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Wenjun and Qian, Yanlin and Chen, Huaian and Dai, Junkang and Jin, Yi}, title = {Integral Fast Fourier Color Constancy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26420-26429} }
ResCLIP: Residual Attention for Training-free Dense Vision-language Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Yuhang and Deng, Jinhong and Li, Wen and Duan, Lixin}, title = {ResCLIP: Residual Attention for Training-free Dense Vision-language Inference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29968-29978} }
Dispider: Enabling Video LLMs with Active Real-Time Interaction via Disentangled Perception, Decision, and Reaction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Qian_2025_CVPR, author = {Qian, Rui and Ding, Shuangrui and Dong, Xiaoyi and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and Lin, Dahua and Wang, Jiaqi}, title = {Dispider: Enabling Video LLMs with Active Real-Time Interaction via Disentangled Perception, Decision, and Reaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24045-24055} }
Bayesian Test-Time Adaptation for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Lihua and Ye, Mao and Li, Shuaifeng and Li, Nianxin and Zhu, Xiatian and Deng, Lei and Liu, Hongbin and Lei, Zhen}, title = {Bayesian Test-Time Adaptation for Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29999-30009} }
Causal Composition Diffusion Model for Closed-loop Traffic Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Haohong and Huang, Xin and Phan, Tung and Hayden, David and Zhang, Huan and Zhao, Ding and Srinivasa, Siddhartha and Wolff, Eric and Chen, Hongge}, title = {Causal Composition Diffusion Model for Closed-loop Traffic Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27542-27552} }
Change3D: Revisiting Change Detection and Captioning from A Video Modeling Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Duowang and Huang, Xiaohu and Huang, Haiyan and Zhou, Hao and Shao, Zhenfeng}, title = {Change3D: Revisiting Change Detection and Captioning from A Video Modeling Perspective}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24011-24022} }
Attribute-formed Class-specific Concept Space: Endowing Language Bottleneck Model with Better Interpretability and Scalability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jianyang and Luo, Qianli and Yang, Guowu and Yang, Wenjing and Liu, Weide and Lin, Guosheng and Lv, Fengmao}, title = {Attribute-formed Class-specific Concept Space: Endowing Language Bottleneck Model with Better Interpretability and Scalability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30291-30300} }
Customized Condition Controllable Generation for Video Soundtrack-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2025_CVPR, author = {Qi, Fan and Ma, Kunsheng and Xu, Changsheng}, title = {Customized Condition Controllable Generation for Video Soundtrack}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23914-23924} }
ProjAttacker: A Configurable Physical Adversarial Attack for Face Recognition via Projector-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuanwei and Wei, Hui and Jia, Chengyu and Xiao, Ruqi and Ruan, Weijian and Wei, Xingxing and Zhou, Joey Tianyi and Wang, Zheng}, title = {ProjAttacker: A Configurable Physical Adversarial Attack for Face Recognition via Projector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21248-21257} }
WISE: A Framework for Gigapixel Whole-Slide-Image Lossless Compression-
[pdf]
[arXiv]
[bibtex]@InProceedings{Mao_2025_CVPR, author = {Mao, Yu and Wang, Jun and Guan, Nan and Xue, Chun Jason}, title = {WISE: A Framework for Gigapixel Whole-Slide-Image Lossless Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29342-29351} }
Gromov-Wasserstein Problem with Cyclic Symmetry-
[pdf]
[supp]
[bibtex]@InProceedings{Takeda_2025_CVPR, author = {Takeda, Shoichiro and Akagi, Yasunori}, title = {Gromov-Wasserstein Problem with Cyclic Symmetry}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21011-21020} }
SimAvatar: Simulation-Ready Avatars with Layered Hair and Clothing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xueting and Yuan, Ye and De Mello, Shalini and Daviet, Gilles and Leaf, Jonathan and Macklin, Miles and Kautz, Jan and Iqbal, Umar}, title = {SimAvatar: Simulation-Ready Avatars with Layered Hair and Clothing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26320-26330} }
Test-Time Backdoor Detection for Object Detection Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Hangtao and Wang, Yichen and Yan, Shihui and Zhu, Chenyu and Zhou, Ziqi and Hou, Linshan and Hu, Shengshan and Li, Minghui and Zhang, Yanjun and Zhang, Leo Yu}, title = {Test-Time Backdoor Detection for Object Detection Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24377-24386} }
SDBF: Steep-Decision-Boundary Fingerprinting for Hard-Label Tampering Detection of DNN Models-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2025_CVPR, author = {Bai, Xiaofan and Li, Shixin and Ma, Xiaojing and Zhu, Bin Benjamin and Zhang, Dongmei and Yu, Linchen}, title = {SDBF: Steep-Decision-Boundary Fingerprinting for Hard-Label Tampering Detection of DNN Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29278-29287} }
Distilling Multi-modal Large Language Models for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hegde_2025_CVPR, author = {Hegde, Deepti and Yasarla, Rajeev and Cai, Hong and Han, Shizhong and Bhattacharyya, Apratim and Mahajan, Shweta and Liu, Litian and Garrepalli, Risheek and Patel, Vishal M. and Porikli, Fatih}, title = {Distilling Multi-modal Large Language Models for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27575-27585} }
HD-EPIC: A Highly-Detailed Egocentric Video Dataset-
[pdf]
[supp]
[bibtex]@InProceedings{Perrett_2025_CVPR, author = {Perrett, Toby and Darkhalil, Ahmad and Sinha, Saptarshi and Emara, Omar and Pollard, Sam and Parida, Kranti Kumar and Liu, Kaiting and Gatti, Prajwal and Bansal, Siddhant and Flanagan, Kevin and Chalk, Jacob and Zhu, Zhifan and Guerrier, Rhodri and Abdelazim, Fahd and Zhu, Bin and Moltisanti, Davide and Wray, Michael and Doughty, Hazel and Damen, Dima}, title = {HD-EPIC: A Highly-Detailed Egocentric Video Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23901-23913} }
Advancing Myopia To Holism: Fully Contrastive Language-Image Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Haicheng and Ju, Chen and Lin, Weixiong and Xiao, Shuai and Chen, Mengting and Huang, Yixuan and Liu, Chang and Yao, Mingshuai and Lan, Jinsong and Chen, Ying and Liu, Qingwen and Wang, Yanfeng}, title = {Advancing Myopia To Holism: Fully Contrastive Language-Image Pre-training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29791-29802} }
H-MoRe: Learning Human-centric Motion Representation for Action Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zhanbo and Liu, Xiaoming and Kong, Yu}, title = {H-MoRe: Learning Human-centric Motion Representation for Action Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22702-22713} }
Hierarchical Compact Clustering Attention (COCA) for Unsupervised Object-Centric Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kucuksozen_2025_CVPR, author = {Kucuksozen, Can and Yemez, Yucel}, title = {Hierarchical Compact Clustering Attention (COCA) for Unsupervised Object-Centric Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25388-25398} }
Effortless Active Labeling for Long-Term Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Guowei and Ding, Changxing}, title = {Effortless Active Labeling for Long-Term Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25633-25642} }
Leveraging Temporal Cues for Semi-Supervised Multi-View 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Jinhyung and Sanghvi, Navyata and Adachi, Hiroki and Shibata, Yoshihisa and Hunt, Shawn and Tanaka, Shinya and Fujiyoshi, Hironobu and Kitani, Kris}, title = {Leveraging Temporal Cues for Semi-Supervised Multi-View 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27401-27412} }
Logits DeConfusion with CLIP for Few-Shot Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Shuo and Liu, Fang and Hao, Zehua and Wang, Xinyi and Li, Lingling and Liu, Xu and Chen, Puhua and Ma, Wenping}, title = {Logits DeConfusion with CLIP for Few-Shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25411-25421} }
Pay Attention to the Foreground in Object-Centric Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Pinzhuo and Yang, Shengjie and Yu, Hang and Kot, Alex}, title = {Pay Attention to the Foreground in Object-Centric Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30281-30290} }
FluidNexus: 3D Fluid Reconstruction and Prediction from a Single Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Yue and Yu, Hong-Xing and Zhu, Bo and Wu, Jiajun}, title = {FluidNexus: 3D Fluid Reconstruction and Prediction from a Single Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26091-26101} }
DeformCL: Learning Deformable Centerline Representation for Vessel Extraction in 3D Medical Image-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Ziwei and Zhang, Zhixing and Liu, Yuhang and Zhang, Zhao and Yu, Haojun and Wang, Dong and Wang, Liwei}, title = {DeformCL: Learning Deformable Centerline Representation for Vessel Extraction in 3D Medical Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30896-30905} }
OCRT: Boosting Foundation Models in the Open World with Object-Concept-Relation Triad-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Luyao and Yuan, Yuxuan and Chen, Chaoqi and Zhang, Zeyu and Huang, Yue and Zhang, Kun}, title = {OCRT: Boosting Foundation Models in the Open World with Object-Concept-Relation Triad}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25422-25433} }
SPARS3R: Semantic Prior Alignment and Regularization for Sparse 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Yutao and Guo, Yuxiang and Li, Deming and Peng, Cheng}, title = {SPARS3R: Semantic Prior Alignment and Regularization for Sparse 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26810-26821} }
VidBot: Learning Generalizable 3D Actions from In-the-Wild 2D Human Videos for Zero-Shot Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Hanzhi and Sun, Boyang and Zhang, Anran and Pollefeys, Marc and Leutenegger, Stefan}, title = {VidBot: Learning Generalizable 3D Actions from In-the-Wild 2D Human Videos for Zero-Shot Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27661-27672} }
Which Viewpoint Shows it Best? Language for Weakly Supervising View Selection in Multi-view Instructional Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Majumder_2025_CVPR, author = {Majumder, Sagnik and Nagarajan, Tushar and Al-Halah, Ziad and Pradhan, Reina and Grauman, Kristen}, title = {Which Viewpoint Shows it Best? Language for Weakly Supervising View Selection in Multi-view Instructional Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29016-29028} }
Adaptive Keyframe Sampling for Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Xi and Qiu, Jihao and Xie, Lingxi and Tian, Yunjie and Jiao, Jianbin and Ye, Qixiang}, title = {Adaptive Keyframe Sampling for Long Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29118-29128} }
Person De-reidentification: A Variation-guided Identity Shift Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Yi-Xing and Tang, Yu-Ming and Lin, Kun-Yu and Yang, Qize and Meng, Jingke and Wei, Xihan and Zheng, Wei-Shi}, title = {Person De-reidentification: A Variation-guided Identity Shift Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29331-29341} }
DiGIT: Multi-Dilated Gated Encoder and Central-Adjacent Region Integrated Decoder for Temporal Action Detection Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Ho-Joong and Lee, Yearang and Hong, Jung-Ho and Lee, Seong-Whan}, title = {DiGIT: Multi-Dilated Gated Encoder and Central-Adjacent Region Integrated Decoder for Temporal Action Detection Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24286-24296} }
Florence-VL: Enhancing Vision-Language Models with Generative Vision Encoder and Depth-Breadth Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Jiuhai and Yang, Jianwei and Wu, Haiping and Li, Dianqi and Gao, Jianfeng and Zhou, Tianyi and Xiao, Bin}, title = {Florence-VL: Enhancing Vision-Language Models with Generative Vision Encoder and Depth-Breadth Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24928-24938} }
Realistic Test-Time Adaptation of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zanella_2025_CVPR, author = {Zanella, Maxime and Fuchs, Cl\'ement and De Vleeschouwer, Christophe and Ben Ayed, Ismail}, title = {Realistic Test-Time Adaptation of Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25103-25112} }
SelfSplat: Pose-Free and 3D Prior-Free Generalizable 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_CVPR, author = {Kang, Gyeongjin and Yoo, Jisang and Park, Jihyeon and Nam, Seungtae and Im, Hyeonsoo and Shin, Sangheon and Kim, Sangpil and Park, Eunbyung}, title = {SelfSplat: Pose-Free and 3D Prior-Free Generalizable 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22012-22022} }
Enhancing Virtual Try-On with Synthetic Pairs and Error-Aware Noise Scheduling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Nannan and Shih, Kevin J. and Plummer, Bryan A.}, title = {Enhancing Virtual Try-On with Synthetic Pairs and Error-Aware Noise Scheduling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21238-21247} }
Exploring Simple Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Zihang}, title = {Exploring Simple Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30221-30230} }
MP-GUI: Modality Perception with MLLMs for GUI Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ziwei and Chen, Weizhi and Yang, Leyang and Zhou, Sheng and Zhao, Shengchu and Zhan, Hanbei and Jin, Jiongchao and Li, Liangcheng and Shao, Zirui and Bu, Jiajun}, title = {MP-GUI: Modality Perception with MLLMs for GUI Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29711-29721} }
Improving Adversarial Transferability on Vision Transformers via Forward Propagation Refinement-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Yuchen and Zhao, Zhengyu and Lin, Chenhao and Yang, Bo and Zhou, Lu and Liu, Zhe and Shen, Chao}, title = {Improving Adversarial Transferability on Vision Transformers via Forward Propagation Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25071-25080} }
Seeing What Matters: Empowering CLIP with Patch Generation-to-Selection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pei_2025_CVPR, author = {Pei, Gensheng and Chen, Tao and Wang, Yujia and Cai, Xinhao and Shu, Xiangbo and Zhou, Tianfei and Yao, Yazhou}, title = {Seeing What Matters: Empowering CLIP with Patch Generation-to-Selection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24862-24872} }
Erasing Undesirable Influence in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Jing and Le, Trung and Hayat, Munawar and Harandi, Mehrtash}, title = {Erasing Undesirable Influence in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28263-28273} }
Closest Neighbors are Harmful for Lightweight Masked Auto-encoders-
[pdf]
[supp]
[bibtex]@InProceedings{Meng_2025_CVPR, author = {Meng, Jian and Hasssan, Ahmed and Yang, Li and Fan, Deliang and Shin, Jinwoo and Seo, Jae-sun}, title = {Closest Neighbors are Harmful for Lightweight Masked Auto-encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25230-25239} }
Decouple-Then-Merge: Finetune Diffusion Models as Multi-Task Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Qianli and Ning, Xuefei and Liu, Dongrui and Niu, Li and Zhang, Linfeng}, title = {Decouple-Then-Merge: Finetune Diffusion Models as Multi-Task Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23281-23291} }
HELVIPAD: A Real-World Dataset for Omnidirectional Stereo Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zayene_2025_CVPR, author = {Zayene, Mehdi and Endres, Jannik and Havolli, Albias and Corbi\`ere, Charles and Cherkaoui, Salim and Kontouli, Alexandre and Alahi, Alexandre}, title = {HELVIPAD: A Real-World Dataset for Omnidirectional Stereo Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26975-26984} }
Towards Enhanced Image Inpainting: Mitigating Unwanted Object Insertion and Preserving Color Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yikai and Cao, Chenjie and Yu, Junqiu and Fan, Ke and Xue, Xiangyang and Fu, Yanwei}, title = {Towards Enhanced Image Inpainting: Mitigating Unwanted Object Insertion and Preserving Color Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23237-23248} }
Practical Solutions to the Relative Pose of Three Calibrated Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tzamos_2025_CVPR, author = {Tzamos, Charalambos and Kocur, Viktor and Ding, Yaqing and Barath, Daniel and Haladova, Zuzana Berger and Sattler, Torsten and Kukelova, Zuzana}, title = {Practical Solutions to the Relative Pose of Three Calibrated Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21913-21923} }
PARC: A Quantitative Framework Uncovering the Symmetries within Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Schmalfuss_2025_CVPR, author = {Schmalfuss, Jenny and Chang, Nadine and VS, Vibashan and Shen, Maying and Bruhn, Andres and Alvarez, Jose M.}, title = {PARC: A Quantitative Framework Uncovering the Symmetries within Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25081-25091} }
RoboTwin: Dual-Arm Robot Benchmark with Generative Digital Twins-
[pdf]
[supp]
[bibtex]@InProceedings{Mu_2025_CVPR, author = {Mu, Yao and Chen, Tianxing and Chen, Zanxin and Peng, Shijia and Lan, Zhiqian and Gao, Zeyu and Liang, Zhixuan and Yu, Qiaojun and Zou, Yude and Xu, Mingkun and Lin, Lunkai and Xie, Zhiqiang and Ding, Mingyu and Luo, Ping}, title = {RoboTwin: Dual-Arm Robot Benchmark with Generative Digital Twins}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27649-27660} }
AnimateAnything: Consistent and Controllable Animation for Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2025_CVPR, author = {Lei, Guojun and Wang, Chi and Zhang, Rong and Wang, Yikai and Li, Hong and Xu, Weiwei}, title = {AnimateAnything: Consistent and Controllable Animation for Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27946-27956} }
PRaDA: Projective Radial Distortion Averaging-
[pdf]
[supp]
[bibtex]@InProceedings{Sinitsyn_2025_CVPR, author = {Sinitsyn, Daniil and H\"arenstam-Nielsen, Linus and Cremers, Daniel}, title = {PRaDA: Projective Radial Distortion Averaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21902-21912} }
GenAssets: Generating in-the-wild 3D Assets in Latent Space-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Ze and Wang, Jingkang and Zhang, Haowei and Manivasagam, Sivabalan and Chen, Yun and Urtasun, Raquel}, title = {GenAssets: Generating in-the-wild 3D Assets in Latent Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22392-22403} }
Low-Rank Adaptation in Multilinear Operator Networks for Security-Preserving Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Ta_2025_CVPR, author = {Ta, Huu Binh and Nguyen, Duc and Tran, Quyen and Tran, Toan and Pham, Tung}, title = {Low-Rank Adaptation in Multilinear Operator Networks for Security-Preserving Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24341-24350} }
FiRe: Fixed-points of Restoration Priors for Solving Inverse Problems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Terris_2025_CVPR, author = {Terris, Matthieu and Kamilov, Ulugbek S. and Moreau, Thomas}, title = {FiRe: Fixed-points of Restoration Priors for Solving Inverse Problems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23185-23194} }
CoMBO: Conflict Mitigation via Branched Optimization for Class Incremental Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Kai and Zhang, Anqi and Gao, Guangyu and Jiao, Jianbo and Liu, Chi Harold and Wei, Yunchao}, title = {CoMBO: Conflict Mitigation via Branched Optimization for Class Incremental Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25667-25676} }
Recurrent Feature Mining and Keypoint Mixup Padding for Category-Agnostic Pose Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Junjie and Chen, Weilong and Zuo, Yifan and Fang, Yuming}, title = {Recurrent Feature Mining and Keypoint Mixup Padding for Category-Agnostic Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22035-22044} }
Unbiasing through Textual Descriptions: Mitigating Representation Bias in Video Benchmarks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shvetsova_2025_CVPR, author = {Shvetsova, Nina and Nagrani, Arsha and Schiele, Bernt and Kuehne, Hilde and Rupprecht, Christian}, title = {Unbiasing through Textual Descriptions: Mitigating Representation Bias in Video Benchmarks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29050-29059} }
Embodied Scene Understanding for Vision Language Models via MetaVQA-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Weizhen and Duan, Chenda and Peng, Zhenghao and Liu, Yuxin and Zhou, Bolei}, title = {Embodied Scene Understanding for Vision Language Models via MetaVQA}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22453-22464} }
Learning Temporally Consistent Video Depth from Video Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2025_CVPR, author = {Shao, Jiahao and Yang, Yuanbo and Zhou, Hongyu and Zhang, Youmin and Shen, Yujun and Guizilini, Vitor and Wang, Yue and Poggi, Matteo and Liao, Yiyi}, title = {Learning Temporally Consistent Video Depth from Video Diffusion Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22841-22852} }
Samba: A Unified Mamba-based Framework for General Salient Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Jiahao and Fu, Keren and Liu, Xiaohong and Zhao, Qijun}, title = {Samba: A Unified Mamba-based Framework for General Salient Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25314-25324} }
LesionLocator: Zero-Shot Universal Tumor Segmentation and Tracking in 3D Whole-Body Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rokuss_2025_CVPR, author = {Rokuss, Maximilian and Kirchhoff, Yannick and Akbal, Seval and Kovacs, Balint and Roy, Saikat and Ulrich, Constantin and Wald, Tassilo and Rotkopf, Lukas T. and Schlemmer, Heinz-Peter and Maier-Hein, Klaus}, title = {LesionLocator: Zero-Shot Universal Tumor Segmentation and Tracking in 3D Whole-Body Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30872-30885} }
DOF-GS: Adjustable Depth-of-Field 3D Gaussian Splatting for Post-Capture Refocusing, Defocus Rendering and Blur Removal-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yujie and Chakravarthula, Praneeth and Chen, Baoquan}, title = {DOF-GS: Adjustable Depth-of-Field 3D Gaussian Splatting for Post-Capture Refocusing, Defocus Rendering and Blur Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21297-21306} }
The Photographer's Eye: Teaching Multimodal Large Language Models to See, and Critique Like Photographers-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2025_CVPR, author = {Qi, Daiqing and Zhao, Handong and Shi, Jing and Jenni, Simon and Fan, Yifei and Dernoncourt, Franck and Cohen, Scott and Li, Sheng}, title = {The Photographer's Eye: Teaching Multimodal Large Language Models to See, and Critique Like Photographers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24807-24816} }
Synergizing Motion and Appearance: Multi-Scale Compensatory Codebooks for Talking Head Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shuling and Hong, Fa-Ting and Huang, Xiaoshui and Xu, Dan}, title = {Synergizing Motion and Appearance: Multi-Scale Compensatory Codebooks for Talking Head Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26232-26241} }
GraphI2P: Image-to-Point Cloud Registration with Exploring Pattern of Correspondence via Graph Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Bie_2025_CVPR, author = {Bie, Lin and Pan, Shouan and Li, Siqi and Zhao, Yining and Gao, Yue}, title = {GraphI2P: Image-to-Point Cloud Registration with Exploring Pattern of Correspondence via Graph Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22161-22171} }
SoftVQ-VAE: Efficient 1-Dimensional Continuous Tokenizer-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Hao and Wang, Ze and Li, Xiang and Sun, Ximeng and Chen, Fangyi and Liu, Jiang and Wang, Jindong and Raj, Bhiksha and Liu, Zicheng and Barsoum, Emad}, title = {SoftVQ-VAE: Efficient 1-Dimensional Continuous Tokenizer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28358-28370} }
DPC: Dual-Prompt Collaboration for Tuning Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Haoyang and Wang, Liang and Wang, Chao and Jiang, Jing and Peng, Yan and Long, Guodong}, title = {DPC: Dual-Prompt Collaboration for Tuning Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25623-25632} }
AIM-Fair: Advancing Algorithmic Fairness via Selectively Fine-Tuning Biased Models with Contextual Synthetic Data-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zengqun and Liu, Ziquan and Cao, Yu and Gong, Shaogang and Patras, Ioannis}, title = {AIM-Fair: Advancing Algorithmic Fairness via Selectively Fine-Tuning Biased Models with Contextual Synthetic Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28748-28758} }
Robust Multi-Object 4D Generation for In-the-wild Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Chu_2025_CVPR, author = {Chu, Wen-Hsuan and Ke, Lei and Liu, Jianmeng and Huo, Mingxiao and Tokmakov, Pavel and Fragkiadaki, Katerina}, title = {Robust Multi-Object 4D Generation for In-the-wild Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22067-22077} }
Mono-InternVL: Pushing the Boundaries of Monolithic Multimodal Large Language Models with Endogenous Visual Pre-training-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Gen and Yang, Xue and Dou, Wenhan and Wang, Zhaokai and Liu, Jiawen and Dai, Jifeng and Qiao, Yu and Zhu, Xizhou}, title = {Mono-InternVL: Pushing the Boundaries of Monolithic Multimodal Large Language Models with Endogenous Visual Pre-training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24960-24971} }
FLAVC: Learned Video Compression with Feature Level Attention-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chun and Sun, Heming and Katto, Jiro}, title = {FLAVC: Learned Video Compression with Feature Level Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28019-28028} }
An End-to-End Robust Point Cloud Semantic Segmentation Network with Single-Step Conditional Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Wentao and Wang, Jing and Gong, YongShun and Huang, Xiaoshui and Xiao, Liang}, title = {An End-to-End Robust Point Cloud Semantic Segmentation Network with Single-Step Conditional Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27325-27335} }
PCDreamer: Point Cloud Completion Through Multi-view Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Guangshun and Feng, Yuan and Ma, Long and Wang, Chen and Zhou, Yuanfeng and Li, Changjian}, title = {PCDreamer: Point Cloud Completion Through Multi-view Diffusion Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27243-27253} }
Your ViT is Secretly an Image Segmentation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kerssies_2025_CVPR, author = {Kerssies, Tommie and Cavagnero, Niccol\`o and Hermans, Alexander and Norouzi, Narges and Averta, Giuseppe and Leibe, Bastian and Dubbelman, Gijs and de Geus, Daan}, title = {Your ViT is Secretly an Image Segmentation Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25303-25313} }
Cross-Rejective Open-Set SAR Image Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2025_CVPR, author = {Mao, Shasha and Lu, Shiming and Du, Zhaolong and Jiao, Licheng and Gou, Shuiping and Mou, Luntian and Lu, Xuequan and Xiong, Lin and Zhang, Yimeng}, title = {Cross-Rejective Open-Set SAR Image Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23027-23036} }
SplineGS: Robust Motion-Adaptive Spline for Real-Time Dynamic 3D Gaussians from Monocular Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Jongmin and Bui, Minh-Quan Viet and Bello, Juan Luis Gonzalez and Moon, Jaeho and Oh, Jihyong and Kim, Munchurl}, title = {SplineGS: Robust Motion-Adaptive Spline for Real-Time Dynamic 3D Gaussians from Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26866-26875} }
Multi-modal Knowledge Distillation-based Human Trajectory Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_CVPR, author = {Jeong, Jaewoo and Lee, Seohee and Park, Daehee and Lee, Giwon and Yoon, Kuk-Jin}, title = {Multi-modal Knowledge Distillation-based Human Trajectory Forecasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24222-24233} }
ShiftwiseConv: Small Convolutional Kernel with Large Kernel Effect-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Dachong and Li, Li and Chen, Zhuangzhuang and Li, Jianqiang}, title = {ShiftwiseConv: Small Convolutional Kernel with Large Kernel Effect}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25281-25291} }
Object-Shot Enhanced Grounding Network for Egocentric Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Yisen and Zhang, Haoyu and Liu, Meng and Guan, Weili and Nie, Liqiang}, title = {Object-Shot Enhanced Grounding Network for Egocentric Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24190-24200} }
Ev-3DOD: Pushing the Temporal Boundaries of 3D Object Detection with Event Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2025_CVPR, author = {Cho, Hoonhee and Kang, Jae-Young and Kim, Youngho and Yoon, Kuk-Jin}, title = {Ev-3DOD: Pushing the Temporal Boundaries of 3D Object Detection with Event Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27197-27210} }
Nearly Zero-Cost Protection Against Mimicry by Personalized Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ahn_2025_CVPR, author = {Ahn, Namhyuk and Yoo, KiYoon and Ahn, Wonhyuk and Kim, Daesik and Nam, Seung-Hun}, title = {Nearly Zero-Cost Protection Against Mimicry by Personalized Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28801-28810} }
The Devil is in Temporal Token: High Quality Video Reasoning Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2025_CVPR, author = {Gong, Sitong and Zhuge, Yunzhi and Zhang, Lu and Yang, Zongxin and Zhang, Pingping and Lu, Huchuan}, title = {The Devil is in Temporal Token: High Quality Video Reasoning Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29183-29192} }
LITA-GS: Illumination-Agnostic Novel View Synthesis via Reference-Free 3D Gaussian Splatting and Physical Priors-
[pdf]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Han and Dong, Wei and Chen, Jun}, title = {LITA-GS: Illumination-Agnostic Novel View Synthesis via Reference-Free 3D Gaussian Splatting and Physical Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21580-21589} }
T-FAKE: Synthesizing Thermal Images for Facial Landmarking-
[pdf]
[supp]
[bibtex]@InProceedings{Flotho_2025_CVPR, author = {Flotho, Philipp and Piening, Moritz and Kukleva, Anna and Steidl, Gabriele}, title = {T-FAKE: Synthesizing Thermal Images for Facial Landmarking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26356-26366} }
Multi-Resolution Pathology-Language Pre-training Model with Text-Guided Visual Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Albastaki_2025_CVPR, author = {Albastaki, Shahad and Sohail, Anabia and Ganapathi, Iyyakutti Iyappan and Alawode, Basit and Khan, Asim and Javed, Sajid and Werghi, Naoufel and Bennamoun, Mohammed and Mahmood, Arif}, title = {Multi-Resolution Pathology-Language Pre-training Model with Text-Guided Visual Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25907-25919} }
PICD: Versatile Perceptual Image Compression with Diffusion Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Tongda and Li, Jiahao and Li, Bin and Wang, Yan and Zhang, Ya-Qin and Lu, Yan}, title = {PICD: Versatile Perceptual Image Compression with Diffusion Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28436-28445} }
VideoSPatS: Video SPatiotemporal Splines for Disentangled Occlusion, Appearance and Motion Modeling and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gonzalez_2025_CVPR, author = {Gonzalez, Juan Luis and Yao, Xu and Whelan, Alex and Olszewski, Kyle and Kim, Hyeongwoo and Garrido, Pablo}, title = {VideoSPatS: Video SPatiotemporal Splines for Disentangled Occlusion, Appearance and Motion Modeling and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22901-22910} }
Six-CD: Benchmarking Concept Removals for Text-to-image Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Jie and Chen, Kangrui and Cui, Yingqian and Zeng, Shenglai and Liu, Hui and Xing, Yue and Tang, Jiliang and Lyu, Lingjuan}, title = {Six-CD: Benchmarking Concept Removals for Text-to-image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28769-28778} }
Black-Box Forgery Attacks on Semantic Watermarks for Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Muller_2025_CVPR, author = {M\"uller, Andreas and Lukovnikov, Denis and Thietke, Jonas and Fischer, Asja and Quiring, Erwin}, title = {Black-Box Forgery Attacks on Semantic Watermarks for Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20937-20946} }
VidSeg: Training-free Video Semantic Segmentation based on Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Qian and Eldesokey, Abdelrahman and Mendiratta, Mohit and Zhan, Fangneng and Kortylewski, Adam and Theobalt, Christian and Wonka, Peter}, title = {VidSeg: Training-free Video Semantic Segmentation based on Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22985-22994} }
PersonaBooth: Personalized Text-to-Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Boeun and Jeong, Hea In and Sung, JungHoon and Cheng, Yihua and Lee, Jeongmin and Chang, Ju Yong and Choi, Sang-Il and Choi, Younggeun and Shin, Saim and Kim, Jungho and Chang, Hyung Jin}, title = {PersonaBooth: Personalized Text-to-Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22756-22765} }
Star with Bilinear Mapping-
[pdf]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Zelin and Huang, Yu and Xu, Zhengqin and Tang, Feilong and Hu, Ming and Yang, Xiaokang and Shen, Wei}, title = {Star with Bilinear Mapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25292-25302} }
DIFIX3D+: Improving 3D Reconstructions with Single-Step Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Jay Zhangjie and Zhang, Yuxuan and Turki, Haithem and Ren, Xuanchi and Gao, Jun and Shou, Mike Zheng and Fidler, Sanja and Gojcic, Zan and Ling, Huan}, title = {DIFIX3D+: Improving 3D Reconstructions with Single-Step Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26024-26035} }
Time of the Flight of the Gaussians: Optimizing Depth Indirectly in Dynamic Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Runfeng and Okunev, Mikhail and Guo, Zixuan and Duong, Anh Ha and Richardt, Christian and O'Toole, Matthew and Tompkin, James}, title = {Time of the Flight of the Gaussians: Optimizing Depth Indirectly in Dynamic Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21021-21030} }
Align3R: Aligned Monocular Depth Estimation for Dynamic Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Jiahao and Huang, Tianyu and Li, Peng and Dou, Zhiyang and Lin, Cheng and Cui, Zhiming and Dong, Zhen and Yeung, Sai-Kit and Wang, Wenping and Liu, Yuan}, title = {Align3R: Aligned Monocular Depth Estimation for Dynamic Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22820-22830} }
Seek Common Ground While Reserving Differences: Semi-Supervised Image-Text Sentiment Recognition-
[pdf]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Wuyou and Jia, Guoli and Zhao, Sicheng and Yang, Jufeng}, title = {Seek Common Ground While Reserving Differences: Semi-Supervised Image-Text Sentiment Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29601-29611} }
Anomize: Better Open Vocabulary Video Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Fei and Liu, Wenxuan and Chen, Jingjing and Zhang, Ruixu and Wang, Yuran and Zhong, Xian and Wang, Zheng}, title = {Anomize: Better Open Vocabulary Video Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29203-29212} }
Efficient Diffusion as Low Light Enhancer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lan_2025_CVPR, author = {Lan, Guanzhou and Ma, Qianli and Yang, Yuqi and Wang, Zhigang and Wang, Dong and Li, Xuelong and Zhao, Bin}, title = {Efficient Diffusion as Low Light Enhancer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21277-21286} }
HyperNVD: Accelerating Neural Video Decomposition via Hypernetworks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pilligua_2025_CVPR, author = {Pilligua, Maria and Xue, Danna and Vazquez-Corral, Javier}, title = {HyperNVD: Accelerating Neural Video Decomposition via Hypernetworks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22933-22942} }
Instant Adversarial Purification with Adversarial Consistency Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2025_CVPR, author = {Lei, Chun Tong and Yam, Hon Ming and Guo, Zhongliang and Qian, Yifei and Lau, Chun Pong}, title = {Instant Adversarial Purification with Adversarial Consistency Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24331-24340} }
Feature Selection for Latent Factor Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kansabanik_2025_CVPR, author = {Kansabanik, Rittwika and Barbu, Adrian}, title = {Feature Selection for Latent Factor Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30742-30751} }
Preserve or Modify? Context-Aware Evaluation for Balancing Preservation and Modification in Text-Guided Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Yoonjeon and Ryu, Soohyun and Jung, Yeonsung and Lee, Hyunkoo and Kim, Joowon and Yang, June Yong and Hwang, Jaeryong and Yang, Eunho}, title = {Preserve or Modify? Context-Aware Evaluation for Balancing Preservation and Modification in Text-Guided Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23474-23483} }
Decoupling Training-Free Guided Diffusion by ADMM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Youyuan and Liu, Zehua and Li, Zenan and Li, Zhaoyu and Clark, James J. and Si, Xujie}, title = {Decoupling Training-Free Guided Diffusion by ADMM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23292-23302} }
SwiftEdit: Lightning Fast Text-Guided Image Editing via One-Step Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Trong-Tung and Nguyen, Quang and Nguyen, Khoi and Tran, Anh and Pham, Cuong}, title = {SwiftEdit: Lightning Fast Text-Guided Image Editing via One-Step Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21492-21501} }
Learning from Synchronization: Self-Supervised Uncalibrated Multi-View Person Association in Challenging Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Keqi and Srivastav, Vinkle and Mutter, Didier and Padoy, Nicolas}, title = {Learning from Synchronization: Self-Supervised Uncalibrated Multi-View Person Association in Challenging Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24419-24428} }
CLIP-driven Coarse-to-fine Semantic Guidance for Fine-grained Open-set Semi-supervised Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xiaokun and Huang, Yaping and Guan, Qingji}, title = {CLIP-driven Coarse-to-fine Semantic Guidance for Fine-grained Open-set Semi-supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30312-30321} }
A Simple Data Augmentation for Feature Distribution Skewed Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Yunlu and Fu, Huazhu and Li, Yuexiang and Xie, Jinheng and Ma, Jun and Yang, Guang and Zhu, Lei}, title = {A Simple Data Augmentation for Feature Distribution Skewed Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25749-25758} }
GLane3D: Detecting Lanes with Graph of 3D Keypoints-
[pdf]
[supp]
[bibtex]@InProceedings{Ozturk_2025_CVPR, author = {\"Ozt\"urk, Halil \.Ibrahim and Kalfao\u{g}lu, Muhammet Esat and Kilinc, Ozsel}, title = {GLane3D: Detecting Lanes with Graph of 3D Keypoints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27508-27518} }
Minimal Interaction Seperated Tuning: A New Paradigm for Visual Adaptation-
[pdf]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Ningyuan and Fu, Minghao and Wu, Jianxin}, title = {Minimal Interaction Seperated Tuning: A New Paradigm for Visual Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25208-25217} }
Attraction Diminishing and Distributing for Few-Shot Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Li-Jun and Chen, Zhen-Duo and Wang, Yongxin and Luo, Xin and Xu, Xin-Shun}, title = {Attraction Diminishing and Distributing for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25657-25666} }
4DTAM: Non-Rigid Tracking and Mapping via Dynamic Surface Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Matsuki_2025_CVPR, author = {Matsuki, Hidenobu and Bae, Gwangbin and Davison, Andrew J.}, title = {4DTAM: Non-Rigid Tracking and Mapping via Dynamic Surface Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26921-26932} }
Unseen Visual Anomaly Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Han and Cao, Yunkang and Dong, Hao and Fink, Olga}, title = {Unseen Visual Anomaly Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25508-25517} }
T2ICount: Enhancing Cross-modal Understanding for Zero-Shot Counting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qian_2025_CVPR, author = {Qian, Yifei and Guo, Zhongliang and Deng, Bowen and Lei, Chun Tong and Zhao, Shuai and Lau, Chun Pong and Hong, Xiaopeng and Pound, Michael P.}, title = {T2ICount: Enhancing Cross-modal Understanding for Zero-Shot Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25336-25345} }
ReNeg: Learning Negative Embedding with Reward Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xiaomin and Liu, Yixuan and Isobe, Takashi and Jia, Xu and Cui, Qinpeng and Zhou, Dong and Li, Dong and He, You and Lu, Huchuan and Wang, Zhongdao and Barsoum, Emad}, title = {ReNeg: Learning Negative Embedding with Reward Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23636-23645} }
MotionPro: A Precise Motion Controller for Image-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhongwei and Long, Fuchen and Qiu, Zhaofan and Pan, Yingwei and Liu, Wu and Yao, Ting and Mei, Tao}, title = {MotionPro: A Precise Motion Controller for Image-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27957-27967} }
Goku: Flow Based Video Generative Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Shoufa and Ge, Chongjian and Zhang, Yuqi and Zhang, Yida and Zhu, Fengda and Yang, Hao and Hao, Hongxiang and Wu, Hui and Lai, Zhichao and Hu, Yifei and Lin, Ting-Che and Zhang, Shilong and Li, Fu and Li, Chuan and Wang, Xing and Peng, Yanghua and Sun, Peize and Luo, Ping and Jiang, Yi and Yuan, Zehuan and Peng, Bingyue and Liu, Xiaobing}, title = {Goku: Flow Based Video Generative Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23516-23527} }
WISH: Weakly Supervised Instance Segmentation using Heterogeneous Labels-
[pdf]
[supp]
[bibtex]@InProceedings{Kweon_2025_CVPR, author = {Kweon, Hyeokjun and Yoon, Kuk-Jin}, title = {WISH: Weakly Supervised Instance Segmentation using Heterogeneous Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25377-25387} }
Good, Cheap, and Fast: Overfitted Image Compression with Wasserstein Distortion-
[pdf]
[supp]
[bibtex]@InProceedings{Balle_2025_CVPR, author = {Ball\'e, Jona and Versari, Luca and Dupont, Emilien and Kim, Hyunjik and Bauer, Matthias}, title = {Good, Cheap, and Fast: Overfitted Image Compression with Wasserstein Distortion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23259-23268} }
Period-LLM: Extending the Periodic Capability of Multimodal Large Language Model-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yuting and Lu, Hao and Hu, Qingyong and Wang, Yin and Yuan, Kaishen and Liu, Xin and Wu, Kaishun}, title = {Period-LLM: Extending the Periodic Capability of Multimodal Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29237-29247} }
V2X-R: Cooperative LiDAR-4D Radar Fusion with Denoising Diffusion for 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Xun and Wang, Jinlong and Xia, Qiming and Chen, Siheng and Yang, Bisheng and Li, Xin and Wang, Cheng and Wen, Chenglu}, title = {V2X-R: Cooperative LiDAR-4D Radar Fusion with Denoising Diffusion for 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27390-27400} }
TAROT: Towards Essentially Domain-Invariant Robustness with Theoretical Justification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Dongyoon and Lee, Jihu and Kim, Yongdai}, title = {TAROT: Towards Essentially Domain-Invariant Robustness with Theoretical Justification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25780-25789} }
Unveiling the Mist over 3D Vision-Language Understanding: Object-centric Evaluation with Chain-of-Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Jiangyong and Jia, Baoxiong and Wang, Yan and Zhu, Ziyu and Linghu, Xiongkun and Li, Qing and Zhu, Song-Chun and Huang, Siyuan}, title = {Unveiling the Mist over 3D Vision-Language Understanding: Object-centric Evaluation with Chain-of-Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24570-24581} }
APT: Adaptive Personalized Training for Diffusion Models with Limited Data-
[pdf]
[supp]
[bibtex]@InProceedings{Chae_2025_CVPR, author = {Chae, JungWoo and Kim, Jiyoon and Choi, JaeWoong and Kim, Kyungyul and Hwang, Sangheum}, title = {APT: Adaptive Personalized Training for Diffusion Models with Limited Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28619-28628} }
SCAP: Transductive Test-Time Adaptation via Supportive Clique-based Attribute Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chenyu and Xu, Kunlun and Liu, Zichen and Peng, Yuxin and Zhou, Jiahuan}, title = {SCAP: Transductive Test-Time Adaptation via Supportive Clique-based Attribute Prompting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30032-30041} }
Tracktention: Leveraging Point Tracking to Attend Videos Faster and Better-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Zihang and Vedaldi, Andrea}, title = {Tracktention: Leveraging Point Tracking to Attend Videos Faster and Better}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22809-22819} }
DocVLM: Make Your VLM an Efficient Reader-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nacson_2025_CVPR, author = {Nacson, Mor Shpigel and Aberdam, Aviad and Ganz, Roy and Ben Avraham, Elad and Golts, Alona and Kittenplon, Yair and Mazor, Shai and Litman, Ron}, title = {DocVLM: Make Your VLM an Efficient Reader}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29005-29015} }
Revisiting Source-Free Domain Adaptation: Insights into Representativeness, Generalization, and Variety-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Ronghang and Hu, Mengxuan and Zhuang, Weiming and Lyu, Lingjuan and Yu, Xiang and Li, Sheng}, title = {Revisiting Source-Free Domain Adaptation: Insights into Representativeness, Generalization, and Variety}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25688-25697} }
Adaptive Unimodal Regulation for Balanced Multimodal Information Acquisition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Chengxiang and Wei, Yake and Yang, Zequn and Hu, Di}, title = {Adaptive Unimodal Regulation for Balanced Multimodal Information Acquisition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25854-25863} }
FLARE: Feed-forward Geometry, Appearance and Camera Estimation from Uncalibrated Sparse Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shangzhan and Wang, Jianyuan and Xu, Yinghao and Xue, Nan and Rupprecht, Christian and Zhou, Xiaowei and Shen, Yujun and Wetzstein, Gordon}, title = {FLARE: Feed-forward Geometry, Appearance and Camera Estimation from Uncalibrated Sparse Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21936-21947} }
Improving Gaussian Splatting with Localized Points Management-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Haosen and Zhang, Chenhao and Wang, Wenqing and Volino, Marco and Hilton, Adrian and Zhang, Li and Zhu, Xiatian}, title = {Improving Gaussian Splatting with Localized Points Management}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21696-21705} }
One-Way Ticket: Time-Independent Unified Encoder for Distilling Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Senmao and Wang, Lei and Wang, Kai and Liu, Tao and Xie, Jiehang and van de Weijer, Joost and Khan, Fahad Shahbaz and Yang, Shiqi and Wang, Yaxing and Yang, Jian}, title = {One-Way Ticket: Time-Independent Unified Encoder for Distilling Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23563-23574} }
Domain Adaptive Diabetic Retinopathy Grading with Model Absence and Flowing Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2025_CVPR, author = {Su, Wenxin and Tang, Song and Liu, Xiaofeng and Yi, Xiaojing and Ye, Mao and Zu, Chunxiao and Li, Jiahao and Zhu, Xiatian}, title = {Domain Adaptive Diabetic Retinopathy Grading with Model Absence and Flowing Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28337-28346} }
LoRASculpt: Sculpting LoRA for Harmonizing General and Specialized Knowledge in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Jian and Huang, Wenke and Wan, Guancheng and Yang, Qu and Ye, Mang}, title = {LoRASculpt: Sculpting LoRA for Harmonizing General and Specialized Knowledge in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26170-26180} }
SEAL: Semantic Attention Learning for Long Video Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Lan and Chen, Yujia and Tran, Du and Boddeti, Vishnu Naresh and Chu, Wen-Sheng}, title = {SEAL: Semantic Attention Learning for Long Video Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26192-26201} }
SCFlow2: Plug-and-Play Object Pose Refiner with Shape-Constraint Scene Flow-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Qingyuan and Song, Rui and Li, Jiaojiao and Cheng, Kerui and Ferstl, David and Hu, Yinlin}, title = {SCFlow2: Plug-and-Play Object Pose Refiner with Shape-Constraint Scene Flow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22045-22054} }
FlipSketch: Flipping Static Drawings to Text-Guided Sketch Animations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bandyopadhyay_2025_CVPR, author = {Bandyopadhyay, Hmrishav and Song, Yi-Zhe}, title = {FlipSketch: Flipping Static Drawings to Text-Guided Sketch Animations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28394-28404} }
SketchAgent: Language-Driven Sequential Sketch Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vinker_2025_CVPR, author = {Vinker, Yael and Shaham, Tamar Rott and Zheng, Kristine and Zhao, Alex and E Fan, Judith and Torralba, Antonio}, title = {SketchAgent: Language-Driven Sequential Sketch Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23355-23368} }
DRAWER: Digital Reconstruction and Articulation With Environment Realism-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Hongchi and Su, Entong and Memmel, Marius and Jain, Arhan and Yu, Raymond and Mbiziwo-Tiapo, Numfor and Farhadi, Ali and Gupta, Abhishek and Wang, Shenlong and Ma, Wei-Chiu}, title = {DRAWER: Digital Reconstruction and Articulation With Environment Realism}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21771-21782} }
GoLF-NRT: Integrating Global Context and Local Geometry for Few-Shot View Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, You and Fang, Li and Zhu, Hao and Hu, Fei and Ye, Long and Ma, Zhan}, title = {GoLF-NRT: Integrating Global Context and Local Geometry for Few-Shot View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21349-21359} }
Deep Change Monitoring: A Hyperbolic Representative Learning Framework and a Dataset for Long-term Fine-grained Tree Change Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yante and Qi, Hanwen and Chen, Haoyu and Liang, Xinlian and Zhao, Guoying}, title = {Deep Change Monitoring: A Hyperbolic Representative Learning Framework and a Dataset for Long-term Fine-grained Tree Change Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27346-27356} }
ITA-MDT: Image-Timestep-Adaptive Masked Diffusion Transformer Framework for Image-Based Virtual Try-On-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2025_CVPR, author = {Hong, Ji Woo and Ton, Tri and Pham, Trung X. and Koo, Gwanhyeong and Yoon, Sunjae and Yoo, Chang D.}, title = {ITA-MDT: Image-Timestep-Adaptive Masked Diffusion Transformer Framework for Image-Based Virtual Try-On}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28284-28294} }
MultiVENT 2.0: A Massive Multilingual Benchmark for Event-Centric Video Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Kriz_2025_CVPR, author = {Kriz, Reno and Sanders, Kate and Etter, David and Murray, Kenton and Carpenter, Cameron and Recknor, Hannah and Guallar-Blasco, Jimena and Martin, Alexander and Yang, Eugene and Van Durme, Benjamin}, title = {MultiVENT 2.0: A Massive Multilingual Benchmark for Event-Centric Video Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24149-24158} }
VolFormer: Explore More Comprehensive Cube Interaction for Hyperspectral Image Restoration and Beyond-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Dabing and Gao, Zheng}, title = {VolFormer: Explore More Comprehensive Cube Interaction for Hyperspectral Image Restoration and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28091-28101} }
BizGen: Advancing Article-level Visual Text Rendering for Infographics Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Yuyang and Xiao, Shishi and Wu, Keming and Liao, Qisheng and Chen, Bohan and Lin, Kevin and Huang, Danqing and Li, Ji and Yuan, Yuhui}, title = {BizGen: Advancing Article-level Visual Text Rendering for Infographics Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23615-23624} }
SmartCLIP: Modular Vision-language Alignment with Identification Guarantees-
[pdf]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Shaoan and Lingjing, Lingjing and Zheng, Yujia and Yao, Yu and Tang, Zeyu and Xing, Eric P. and Chen, Guangyi and Zhang, Kun}, title = {SmartCLIP: Modular Vision-language Alignment with Identification Guarantees}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29780-29790} }
Q-DiT: Accurate Post-Training Quantization for Diffusion Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Lei and Meng, Yuan and Tang, Chen and Ma, Xinzhu and Jiang, Jingyan and Wang, Xin and Wang, Zhi and Zhu, Wenwu}, title = {Q-DiT: Accurate Post-Training Quantization for Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28306-28315} }
RoboGround: Robotic Manipulation with Grounded Vision-Language Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Haifeng and Chen, Xinyi and Chen, Yilun and Li, Hao and Han, Xiaoshen and Wang, Zehan and Wang, Tai and Pang, Jiangmiao and Zhao, Zhou}, title = {RoboGround: Robotic Manipulation with Grounded Vision-Language Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22540-22550} }
Improving Transferable Targeted Attacks with Feature Tuning Mixup-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Kaisheng and Dai, Xuelong and Li, Yanjie and Wang, Dong and Xiao, Bin}, title = {Improving Transferable Targeted Attacks with Feature Tuning Mixup}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25802-25811} }
DrivingSphere: Building a High-fidelity 4D World for Closed-loop Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Tianyi and Wu, Dongming and Han, Wencheng and Jiang, Junpeng and Zhou, Xia and Zhan, Kun and Xu, Cheng-zhong and Shen, Jianbing}, title = {DrivingSphere: Building a High-fidelity 4D World for Closed-loop Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27531-27541} }
HuPerFlow: A Comprehensive Benchmark for Human vs. Machine Motion Estimation Comparison-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Yung-Hao and Sun, Zitang and Fukiage, Taiki and Nishida, Shin'ya}, title = {HuPerFlow: A Comprehensive Benchmark for Human vs. Machine Motion Estimation Comparison}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22799-22808} }
MetaWriter: Personalized Handwritten Text Recognition Using Meta-Learned Prompt Tuning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Wenhao and Gu, Li and Suen, Chingyee Yee and Wang, Yang}, title = {MetaWriter: Personalized Handwritten Text Recognition Using Meta-Learned Prompt Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23494-23504} }
Subnet-Aware Dynamic Supernet Training for Neural Architecture Search-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeon_2025_CVPR, author = {Jeon, Jeimin and Oh, Youngmin and Lee, Junghyup and Baek, Donghyeon and Kim, Dohyung and Eom, Chanho and Ham, Bumsub}, title = {Subnet-Aware Dynamic Supernet Training for Neural Architecture Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30137-30146} }
EchoWorld: Learning Motion-Aware World Models for Echocardiography Probe Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2025_CVPR, author = {Yue, Yang and Wang, Yulin and Jiang, Haojun and Liu, Pan and Song, Shiji and Huang, Gao}, title = {EchoWorld: Learning Motion-Aware World Models for Echocardiography Probe Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25993-26003} }
Controllable Human Image Generation with Personalized Multi-Garments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2025_CVPR, author = {Choi, Yisol and Kwak, Sangkyung and Yu, Sihyun and Choi, Hyungwon and Shin, Jinwoo}, title = {Controllable Human Image Generation with Personalized Multi-Garments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28736-28747} }
UHD-processer: Unified UHD Image Restoration with Progressive Frequency Learning and Degradation-aware Prompts-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yidi and Li, Dong and Fu, Xueyang and Lu, Xin and Huang, Jie and Zha, Zheng-Jun}, title = {UHD-processer: Unified UHD Image Restoration with Progressive Frequency Learning and Degradation-aware Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23121-23130} }
GBC-Splat: Generalizable Gaussian-Based Clothed Human Digitalization under Sparse RGB Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Tu_2025_CVPR, author = {Tu, Hanzhang and Liao, Zhanfeng and Zhou, Boyao and Zheng, Shunyuan and Zhou, Xilong and Zhang, Liuxin and Wang, QianYing and Liu, Yebin}, title = {GBC-Splat: Generalizable Gaussian-Based Clothed Human Digitalization under Sparse RGB Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26377-26387} }
AC3D: Analyzing and Improving 3D Camera Control in Video Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bahmani_2025_CVPR, author = {Bahmani, Sherwin and Skorokhodov, Ivan and Qian, Guocheng and Siarohin, Aliaksandr and Menapace, Willi and Tagliasacchi, Andrea and Lindell, David B. and Tulyakov, Sergey}, title = {AC3D: Analyzing and Improving 3D Camera Control in Video Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22875-22889} }
A Unified Model for Compressed Sensing MRI Across Undersampling Patterns-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jatyani_2025_CVPR, author = {Jatyani, Armeet Singh and Wang, Jiayun and Chandrashekar, Aditi and Wu, Zihui and Liu-Schiaffini, Miguel and Tolooshams, Bahareh and Anandkumar, Anima}, title = {A Unified Model for Compressed Sensing MRI Across Undersampling Patterns}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26004-26013} }
TSD-SR: One-Step Diffusion with Target Score Distillation for Real-World Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Linwei and Fan, Qingnan and Guo, Yihong and Wang, Zhonghao and Zhang, Qi and Chen, Jinwei and Luo, Yawei and Zou, Changqing}, title = {TSD-SR: One-Step Diffusion with Target Score Distillation for Real-World Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23174-23184} }
Fast3R: Towards 3D Reconstruction of 1000+ Images in One Forward Pass-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jianing and Sax, Alexander and Liang, Kevin J. and Henaff, Mikael and Tang, Hao and Cao, Ang and Chai, Joyce and Meier, Franziska and Feiszli, Matt}, title = {Fast3R: Towards 3D Reconstruction of 1000+ Images in One Forward Pass}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21924-21935} }
StyleStudio: Text-Driven Style Transfer with Selective Control of Style Elements-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2025_CVPR, author = {Lei, Mingkun and Song, Xue and Zhu, Beier and Wang, Hao and Zhang, Chi}, title = {StyleStudio: Text-Driven Style Transfer with Selective Control of Style Elements}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23443-23452} }
CTRL-O: Language-Controllable Object-Centric Visual Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Didolkar_2025_CVPR, author = {Didolkar, Aniket and Zadaianchuk, Andrii and Awal, Rabiul and Seitzer, Maximilian and Gavves, Efstratios and Agrawal, Aishwarya}, title = {CTRL-O: Language-Controllable Object-Centric Visual Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29523-29533} }
Text Augmented Correlation Transformer For Few-shot Classification & Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Nandam_2025_CVPR, author = {Nandam, Srinivasa Rao and Atito, Sara and Feng, Zhenhua and Kittler, Josef and Awais, Muhammad}, title = {Text Augmented Correlation Transformer For Few-shot Classification \& Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25357-25366} }
Unified Dense Prediction of Video Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Lehan and Qi, Lu and Li, Xiangtai and Li, Sheng and Jampani, Varun and Yang, Ming-Hsuan}, title = {Unified Dense Prediction of Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28963-28973} }
Towards Million-Scale Adversarial Robustness Evaluation With Stronger Individual Attacks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Yong and Zheng, Weijie and Huang, Hanxun and Ye, Guangnan and Ma, Xingjun}, title = {Towards Million-Scale Adversarial Robustness Evaluation With Stronger Individual Attacks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30702-30711} }
Temporal Action Detection Model Compression by Progressive Block Drop-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xiaoyong and Guo, Yong and Liang, Jiaming and Zhuang, Sitong and Zeng, Runhao and Hu, Xiping}, title = {Temporal Action Detection Model Compression by Progressive Block Drop}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29225-29236} }
Pursuing Temporal-Consistent Video Virtual Try-On via Dynamic Pose Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Dong and Zhong, Wenqi and Yu, Wei and Pan, Yingwei and Zhang, Dingwen and Yao, Ting and Han, Junwei and Mei, Tao}, title = {Pursuing Temporal-Consistent Video Virtual Try-On via Dynamic Pose Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22648-22657} }
DINOv2 Meets Text: A Unified Framework for Image- and Pixel-Level Vision-Language Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Jose_2025_CVPR, author = {Jose, Cijo and Moutakanni, Th\'eo and Kang, Dahyun and Baldassarre, Federico and Darcet, Timoth\'ee and Xu, Hu and Li, Daniel and Szafraniec, Marc and Ramamonjisoa, Micha\"el and Oquab, Maxime and Sim\'eoni, Oriane and Vo, Huy V. and Labatut, Patrick and Bojanowski, Piotr}, title = {DINOv2 Meets Text: A Unified Framework for Image- and Pixel-Level Vision-Language Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24905-24916} }
Learning Affine Correspondences by Integrating Geometric Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Pengju and Guan, Banglei and Yu, Zhenbao and Shang, Yang and Yu, Qifeng and Barath, Daniel}, title = {Learning Affine Correspondences by Integrating Geometric Constraints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27038-27048} }
UCOD-DPL: Unsupervised Camouflaged Object Detection via Dynamic Pseudo-label Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Weiqi and Chen, Lvhai and Kou, Huaijia and Zhang, Shengchuan and Zhang, Yan and Cao, Liujuan}, title = {UCOD-DPL: Unsupervised Camouflaged Object Detection via Dynamic Pseudo-label Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30365-30375} }
Geometry in Style: 3D Stylization via Surface Normal Deformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dinh_2025_CVPR, author = {Dinh, Nam Anh and Lang, Itai and Kim, Hyunwoo and Stein, Oded and Hanocka, Rana}, title = {Geometry in Style: 3D Stylization via Surface Normal Deformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28456-28467} }
PVC: Progressive Visual Token Compression for Unified Image and Video Processing in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Chenyu and Dong, Xuan and Zhu, Xizhou and Su, Weijie and Wang, Jiahao and Tian, Hao and Chen, Zhe and Wang, Wenhai and Lu, Lewei and Dai, Jifeng}, title = {PVC: Progressive Visual Token Compression for Unified Image and Video Processing in Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24939-24949} }
Multiple Object Tracking as ID Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Ruopeng and Qi, Ji and Wang, Limin}, title = {Multiple Object Tracking as ID Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27883-27893} }
PIDLoc: Cross-View Pose Optimization Network Inspired by PID Controllers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Wooju and Park, Juhye and Hong, Dasol and Sung, Changki and Seo, Youngwoo and Kang, DongWan and Myung, Hyun}, title = {PIDLoc: Cross-View Pose Optimization Network Inspired by PID Controllers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21981-21990} }
DreamOmni: Unified Image Generation and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Bin and Zhang, Yuechen and Li, Jingyao and Wang, Chengyao and Wang, Yitong and Wu, Xinglong and Yu, Bei and Jia, Jiaya}, title = {DreamOmni: Unified Image Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28533-28543} }
Hash3D: Training-free Acceleration for 3D Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Xingyi and Liu, Songhua and Wang, Xinchao}, title = {Hash3D: Training-free Acceleration for 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21481-21491} }
Learning Hazing to Dehazing: Towards Realistic Haze Generation for Real-World Image Dehazing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ruiyi and Zheng, Yushuo and Zhang, Zicheng and Li, Chunyi and Liu, Shuaicheng and Zhai, Guangtao and Liu, Xiaohong}, title = {Learning Hazing to Dehazing: Towards Realistic Haze Generation for Real-World Image Dehazing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23091-23100} }
RUBIK: A Structured Benchmark for Image Matching across Geometric Challenges-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Loiseau_2025_CVPR, author = {Loiseau, Thibaut and Bourmaud, Guillaume}, title = {RUBIK: A Structured Benchmark for Image Matching across Geometric Challenges}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27070-27080} }
Fast and Accurate Gigapixel Pathological Image Classification with Hierarchical Distillation Multi-Instance Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Jiuyang and Jiang, Junjun and Jiang, Kui and Li, Jiahan and Zhang, Yongbing}, title = {Fast and Accurate Gigapixel Pathological Image Classification with Hierarchical Distillation Multi-Instance Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30818-30828} }
IncEventGS: Pose-Free Gaussian Splatting from a Single Event Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Jian and Dong, Chengrui and Chen, Xuanhua and Liu, Peidong}, title = {IncEventGS: Pose-Free Gaussian Splatting from a Single Event Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26933-26942} }
OpenMIBOOD: Open Medical Imaging Benchmarks for Out-Of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gutbrod_2025_CVPR, author = {Gutbrod, Max and Rauber, David and Nunes, Danilo Weber and Palm, Christoph}, title = {OpenMIBOOD: Open Medical Imaging Benchmarks for Out-Of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25874-25886} }
FactCheXcker: Mitigating Measurement Hallucinations in Chest X-ray Report Generation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Heiman_2025_CVPR, author = {Heiman, Alice and Zhang, Xiaoman and Chen, Emma and Kim, Sung Eun and Rajpurkar, Pranav}, title = {FactCheXcker: Mitigating Measurement Hallucinations in Chest X-ray Report Generation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30787-30796} }
When the Future Becomes the Past: Taming Temporal Correspondence for Self-supervised Video Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yang and Xu, Qianqian and Wen, Peisong and Dai, Siran and Huang, Qingming}, title = {When the Future Becomes the Past: Taming Temporal Correspondence for Self-supervised Video Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24033-24044} }
UniPose: A Unified Multimodal Framework for Human Pose Comprehension, Generation and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yiheng and Hou, Ruibing and Chang, Hong and Shan, Shiguang and Chen, Xilin}, title = {UniPose: A Unified Multimodal Framework for Human Pose Comprehension, Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27805-27815} }
POMP: Physics-consistent Motion Generative Model through Phase Manifolds-
[pdf]
[supp]
[bibtex]@InProceedings{Ji_2025_CVPR, author = {Ji, Bin and Pan, Ye and Liu, Zhimeng and Tan, Shuai and Jin, Xiaogang and Yang, Xiaokang}, title = {POMP: Physics-consistent Motion Generative Model through Phase Manifolds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22690-22701} }
Reasoning to Attend: Try to Understand How <SEG> Token Works-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qian_2025_CVPR, author = {Qian, Rui and Yin, Xin and Dou, Dejing}, title = {Reasoning to Attend: Try to Understand How \ensuremath{<}SEG\ensuremath{>} Token Works}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24722-24731} }
ReSpec: Relevance and Specificity Grounded Online Filtering for Learning on Video-Text Data Streams-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Chris Dongjoo and Moon, Jihwan and Moon, Sangwoo and Yun, Heeseung and Lee, Sihaeng and Kembhavi, Aniruddha and Lee, Soonyoung and Kim, Gunhee and Lee, Sangho and Clark, Christopher}, title = {ReSpec: Relevance and Specificity Grounded Online Filtering for Learning on Video-Text Data Streams}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29040-29049} }
DynRefer: Delving into Region-level Multimodal Tasks via Dynamic Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yuzhong and Liu, Feng and Liu, Yue and Liao, Mingxiang and Gong, Chen and Ye, Qixiang and Wan, Fang}, title = {DynRefer: Delving into Region-level Multimodal Tasks via Dynamic Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24742-24752} }
Playing the Fool: Jailbreaking LLMs and Multimodal LLMs with Out-of-Distribution Strategy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_CVPR, author = {Jeong, Joonhyun and Bae, Seyun and Jung, Yeonsung and Hwang, Jaeryong and Yang, Eunho}, title = {Playing the Fool: Jailbreaking LLMs and Multimodal LLMs with Out-of-Distribution Strategy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29937-29946} }
VideoWorld: Exploring Knowledge Learning from Unlabeled Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Zhongwei and Wei, Yunchao and Guo, Xun and Zhao, Yao and Kang, Bingyi and Feng, Jiashi and Jin, Xiaojie}, title = {VideoWorld: Exploring Knowledge Learning from Unlabeled Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29029-29039} }
3D-SLNR: A Super Lightweight Neural Representation for Large-scale 3D Mapping-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Chenhui and Tang, Fulin and An, Ning and Wu, Yihong}, title = {3D-SLNR: A Super Lightweight Neural Representation for Large-scale 3D Mapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27233-27242} }
STINR: Deciphering Spatial Transcriptomics via Implicit Neural Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Yisi and Zhao, Xile and Ye, Kai and Meng, Deyu}, title = {STINR: Deciphering Spatial Transcriptomics via Implicit Neural Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25930-25939} }
RADIOv2.5: Improved Baselines for Agglomerative Vision Foundation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Heinrich_2025_CVPR, author = {Heinrich, Greg and Ranzinger, Mike and Yin, Hongxu and Lu, Yao and Kautz, Jan and Tao, Andrew and Catanzaro, Bryan and Molchanov, Pavlo}, title = {RADIOv2.5: Improved Baselines for Agglomerative Vision Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22487-22497} }
Unsupervised Discovery of Facial Landmarks and Head Pose-
[pdf]
[supp]
[bibtex]@InProceedings{Tourani_2025_CVPR, author = {Tourani, Satyajit and Tourani, Siddharth and Mahmood, Arif and Khan, Muhammad Haris}, title = {Unsupervised Discovery of Facial Landmarks and Head Pose}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21192-21202} }
Instruct-CLIP: Improving Instruction-Guided Image Editing with Automated Data Refinement Using Contrastive Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Sherry X. and Sra, Misha and Sen, Pradeep}, title = {Instruct-CLIP: Improving Instruction-Guided Image Editing with Automated Data Refinement Using Contrastive Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28513-28522} }
Stabilizing and Accelerating Autofocus with Expert Trajectory Regularized Deep Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Shouhang and Li, Chenglin and Jiang, Yuankun and Wei, Li and Kan, Nuowen and Zheng, Ziyang and Dai, Wenrui and Zou, Junni and Xiong, Hongkai}, title = {Stabilizing and Accelerating Autofocus with Expert Trajectory Regularized Deep Reinforcement Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26440-26450} }
Repurposing Stable Diffusion Attention for Training-Free Unsupervised Interactive Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karmann_2025_CVPR, author = {Karmann, Markus and Urfalioglu, Onay}, title = {Repurposing Stable Diffusion Attention for Training-Free Unsupervised Interactive Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24518-24528} }
GO-N3RDet: Geometry Optimized NeRF-enhanced 3D Object Detector-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zechuan and Yu, Hongshan and Ding, Yihao and Qiao, Jinhao and Azam, Basim and Akhtar, Naveed}, title = {GO-N3RDet: Geometry Optimized NeRF-enhanced 3D Object Detector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27211-27221} }
DPSeg: Dual-Prompt Cost Volume Learning for Open-Vocabulary Semantic Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Ziyu and Li, Xiaoguang and Shi, Lingjia and Imanpour, Nasrin and Wang, Song}, title = {DPSeg: Dual-Prompt Cost Volume Learning for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25346-25356} }
Simulator HC: Regression-based Online Simulation of Starting Problem-Solution Pairs for Homotopy Continuation in Geometric Vision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xinyue and Dai, Zijia and Xu, Wanting and Kneip, Laurent}, title = {Simulator HC: Regression-based Online Simulation of Starting Problem-Solution Pairs for Homotopy Continuation in Geometric Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27103-27112} }
Dynamic Integration of Task-Specific Adapters for Class Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiashuo and Wang, Shaokun and Qian, Bo and He, Yuhang and Wei, Xing and Wang, Qiang and Gong, Yihong}, title = {Dynamic Integration of Task-Specific Adapters for Class Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30545-30555} }
EgoPressure: A Dataset for Hand Pressure and Pose Estimation in Egocentric Vision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yiming and Kwon, Taein and Streli, Paul and Pollefeys, Marc and Holz, Christian}, title = {EgoPressure: A Dataset for Hand Pressure and Pose Estimation in Egocentric Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27727-27738} }
DiverseFlow: Sample-Efficient Diverse Mode Coverage in Flows-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Morshed_2025_CVPR, author = {Morshed, Mashrur M. and Boddeti, Vishnu}, title = {DiverseFlow: Sample-Efficient Diverse Mode Coverage in Flows}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23303-23312} }
MaskGWM: A Generalizable Driving World Model with Video Mask Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2025_CVPR, author = {Ni, Jingcheng and Guo, Yuxin and Liu, Yichen and Chen, Rui and Lu, Lewei and Wu, Zehuan}, title = {MaskGWM: A Generalizable Driving World Model with Video Mask Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22381-22391} }
3D-MVP: 3D Multiview Pretraining for Manipulation-
[pdf]
[bibtex]@InProceedings{Qian_2025_CVPR, author = {Qian, Shengyi and Mo, Kaichun and Blukis, Valts and Fouhey, David F. and Fox, Dieter and Goyal, Ankit}, title = {3D-MVP: 3D Multiview Pretraining for Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22530-22539} }
Enhanced OoD Detection through Cross-Modal Alignment of Multi-Modal Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Jeonghyeon and Hwang, Sangheum}, title = {Enhanced OoD Detection through Cross-Modal Alignment of Multi-Modal Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29979-29988} }
Mimir: Improving Video Diffusion Models for Precise Text Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2025_CVPR, author = {Tan, Shuai and Gong, Biao and Feng, Yutong and Zheng, Kecheng and Zheng, Dandan and Shi, Shuwei and Shen, Yujun and Chen, Jingdong and Yang, Ming}, title = {Mimir: Improving Video Diffusion Models for Precise Text Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23978-23988} }
UCM-VeID V2: A Richer Dataset and A Pre-training Method for UAV Cross-Modality Vehicle Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xingyue and Qi, Jiahao and Chen, Chen and Bin, KangCheng and Zhong, Ping}, title = {UCM-VeID V2: A Richer Dataset and A Pre-training Method for UAV Cross-Modality Vehicle Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22286-22295} }
GeoAvatar: Geometrically-Consistent Multi-Person Avatar Reconstruction from Sparse Multi-View Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Soohyun and Kim, Seoyeon and Lee, HeeKyung and Jeong, Won-Sik and Lee, Joo Ho}, title = {GeoAvatar: Geometrically-Consistent Multi-Person Avatar Reconstruction from Sparse Multi-View Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21138-21147} }
DiET-GS: Diffusion Prior and Event Stream-Assisted Motion Deblurring 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Seungjun and Lee, Gim Hee}, title = {DiET-GS: Diffusion Prior and Event Stream-Assisted Motion Deblurring 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21739-21749} }
Speedy-Splat: Fast 3D Gaussian Splatting with Sparse Pixels and Sparse Primitives-
[pdf]
[supp]
[bibtex]@InProceedings{Hanson_2025_CVPR, author = {Hanson, Alex and Tu, Allen and Lin, Geng and Singla, Vasu and Zwicker, Matthias and Goldstein, Tom}, title = {Speedy-Splat: Fast 3D Gaussian Splatting with Sparse Pixels and Sparse Primitives}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21537-21546} }
Omnia de EgoTempo: Benchmarking Temporal Understanding of Multi-Modal LLMs in Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Plizzari_2025_CVPR, author = {Plizzari, Chiara and Tonioni, Alessio and Xian, Yongqin and Kulshrestha, Achin and Tombari, Federico}, title = {Omnia de EgoTempo: Benchmarking Temporal Understanding of Multi-Modal LLMs in Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24129-24138} }
ODHSR: Online Dense 3D Reconstruction of Humans and Scenes from Monocular Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zetong and Kaufmann, Manuel and Xue, Lixin and Song, Jie and Oswald, Martin R.}, title = {ODHSR: Online Dense 3D Reconstruction of Humans and Scenes from Monocular Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21824-21835} }
SpiritSight Agent: Advanced GUI Agent with One Look-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zhiyuan and Cheng, Ziming and Pan, Junting and Hou, Zhaohui and Zhan, Mingjie}, title = {SpiritSight Agent: Advanced GUI Agent with One Look}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29490-29500} }
Zero-Shot Monocular Scene Flow Estimation in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Yiqing and Badki, Abhishek and Su, Hang and Tompkin, James and Gallo, Orazio}, title = {Zero-Shot Monocular Scene Flow Estimation in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21031-21044} }
MG-MotionLLM: A Unified Framework for Motion Comprehension and Generation across Multiple Granularities-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Bizhu and Xie, Jinheng and Shen, Keming and Kong, Zhe and Ren, Jianfeng and Bai, Ruibin and Qu, Rong and Shen, Linlin}, title = {MG-MotionLLM: A Unified Framework for Motion Comprehension and Generation across Multiple Granularities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27849-27858} }
Retaining Knowledge and Enhancing Long-Text Representations in CLIP through Dual-Teacher Distillation-
[pdf]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Yuheng and Wen, Changsong and Peng, Zelin and jiaye, Li and Zhu, Siyu}, title = {Retaining Knowledge and Enhancing Long-Text Representations in CLIP through Dual-Teacher Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24895-24904} }
MMRL: Multi-Modal Representation Learning for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Yuncheng and Gu, Xiaodong}, title = {MMRL: Multi-Modal Representation Learning for Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25015-25025} }
Anchor-Aware Similarity Cohesion in Target Frames Enables Predicting Temporal Moment Boundaries in 2D-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2025_CVPR, author = {Tan, Jiawei and Wang, Hongxing and Weng, Junwu and Li, Jiaxin and Ou, Zhilong and Dang, Kang}, title = {Anchor-Aware Similarity Cohesion in Target Frames Enables Predicting Temporal Moment Boundaries in 2D}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24180-24189} }
Breaking the Low-Rank Dilemma of Linear Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Qihang and Huang, Huaibo and He, Ran}, title = {Breaking the Low-Rank Dilemma of Linear Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25271-25280} }
Embracing Collaboration Over Competition: Condensing Multiple Prompts for Visual In-Context Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jinpeng and Luo, Tianci and Zha, Yaohua and Feng, Yan and Luo, Ruisheng and Chen, Bin and Dai, Tao and Chen, Long and Wang, Yaowei and Xia, Shu-Tao}, title = {Embracing Collaboration Over Competition: Condensing Multiple Prompts for Visual In-Context Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25156-25165} }
Unity in Diversity: Video Editing via Gradient-Latent Purification-
[pdf]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Junyu and Yang, Kunlin and Yao, Xuan and Hu, Yufan}, title = {Unity in Diversity: Video Editing via Gradient-Latent Purification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23401-23411} }
Revealing Key Details to See Differences: A Novel Prototypical Perspective for Skeleton-based Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Hongda and Liu, Yunfan and Ren, Min and Wang, Hao and Wang, Yunlong and Sun, Zhenan}, title = {Revealing Key Details to See Differences: A Novel Prototypical Perspective for Skeleton-based Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29248-29257} }
Finsler Multi-Dimensional Scaling: Manifold Learning for Asymmetric Dimensionality Reduction and Embedding-
[pdf]
[supp]
[bibtex]@InProceedings{Dages_2025_CVPR, author = {Dag\`es, Thomas and Weber, Simon and Lin, Ya-Wei Eileen and Talmon, Ronen and Cremers, Daniel and Lindenbaum, Michael and Bruckstein, Alfred M. and Kimmel, Ron}, title = {Finsler Multi-Dimensional Scaling: Manifold Learning for Asymmetric Dimensionality Reduction and Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25842-25853} }
VideoEspresso: A Large-Scale Chain-of-Thought Dataset for Fine-Grained Video Reasoning via Core Frame Selection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Songhao and Huang, Wei and Shi, Hairong and Zhuo, Le and Su, Xiu and Zhang, Shifeng and Zhou, Xu and Qi, Xiaojuan and Liao, Yue and Liu, Si}, title = {VideoEspresso: A Large-Scale Chain-of-Thought Dataset for Fine-Grained Video Reasoning via Core Frame Selection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26181-26191} }
Cross-Modal Distillation for 2D/3D Multi-Object Discovery from 2D Motion-
[pdf]
[supp]
[bibtex]@InProceedings{Lahlali_2025_CVPR, author = {Lahlali, Saad and Kara, Sandra and Ammar, Hejer and Chabot, Florian and Granger, Nicolas and Le Borgne, Herv\'e and Pham, Quoc-Cuong}, title = {Cross-Modal Distillation for 2D/3D Multi-Object Discovery from 2D Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24529-24538} }
Bridge Frame and Event: Common Spatiotemporal Fusion for High-Dynamic Scene Optical Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Hanyu and Wang, Haonan and Liu, Haoyue and Duan, Yuxing and Chang, Yi and Yan, Luxin}, title = {Bridge Frame and Event: Common Spatiotemporal Fusion for High-Dynamic Scene Optical Flow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27904-27913} }
Inversion Circle Interpolation: Diffusion-based Image Augmentation for Data-scarce Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yanghao and Chen, Long}, title = {Inversion Circle Interpolation: Diffusion-based Image Augmentation for Data-scarce Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25560-25569} }
TSP-Mamba: The Travelling Salesman Problem Meets Mamba for Image Super-resolution and Beyond-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Kun and Lin, Xinyu and Lu, Jiangbo}, title = {TSP-Mamba: The Travelling Salesman Problem Meets Mamba for Image Super-resolution and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28134-28143} }
RENO: Real-Time Neural Compression for 3D LiDAR Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2025_CVPR, author = {You, Kang and Chen, Tong and Ding, Dandan and Asif, M. Salman and Ma, Zhan}, title = {RENO: Real-Time Neural Compression for 3D LiDAR Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22172-22181} }
FADE: Frequency-Aware Diffusion Model Factorization for Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yixuan and Wang, Haolin and Ma, Shilin and Zhao, Wenliang and Tang, Yansong and Chen, Lei and Zhou, Jie}, title = {FADE: Frequency-Aware Diffusion Model Factorization for Video Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28426-28435} }
Data Synthesis with Diverse Styles for Face Recognition via 3DMM-Guided Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mi_2025_CVPR, author = {Mi, Yuxi and Zhong, Zhizhou and Huang, Yuge and Yuan, Qiuyang and Zhao, Xuan and Xu, Jianqing and Ding, Shouhong and Wang, Shaoming and Guo, Rizen and Zhou, Shuigeng}, title = {Data Synthesis with Diverse Styles for Face Recognition via 3DMM-Guided Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21203-21214} }
Geometric Knowledge-Guided Localized Global Distribution Alignment for Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Yanbiao and Dai, Wei and Huang, Wenke and Chen, Jiayi}, title = {Geometric Knowledge-Guided Localized Global Distribution Alignment for Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20958-20968} }
GlyphMastero: A Glyph Encoder for High-Fidelity Scene Text Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Tong and Liu, Ting and Qu, Xiaochao and Wu, Chengjing and Liu, Luoqi and Hu, Xiaolin}, title = {GlyphMastero: A Glyph Encoder for High-Fidelity Scene Text Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28523-28532} }
Birth and Death of a Rose-
[pdf]
[arXiv]
[bibtex]@InProceedings{Geng_2025_CVPR, author = {Geng, Chen and Zhang, Yunzhi and Wu, Shangzhe and Wu, Jiajun}, title = {Birth and Death of a Rose}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26102-26113} }
MetricGrids: Arbitrary Nonlinear Approximation with Elementary Metric Grids based Implicit Neural Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shu and Gao, Yanbo and Li, Shuai and Lv, Chong and Cai, Xun and Li, Chuankun and Yuan, Hui and Zhang, Jinglin}, title = {MetricGrids: Arbitrary Nonlinear Approximation with Elementary Metric Grids based Implicit Neural Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21381-21391} }
MovieBench: A Hierarchical Movie Level Dataset for Long Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Weijia and Liu, Mingyu and Zhu, Zeyu and Xia, Xi and Feng, Haoen and Wang, Wen and Lin, Kevin Qinghong and Shen, Chunhua and Shou, Mike Zheng}, title = {MovieBench: A Hierarchical Movie Level Dataset for Long Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28984-28994} }
Be More Specific: Evaluating Object-centric Realism in Synthetic Images-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Anqi and Corneanu, Ciprian and Feng, Qianli and Giannone, Giorgio and Martinez, Aleix}, title = {Be More Specific: Evaluating Object-centric Realism in Synthetic Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28842-28851} }
Correlative and Discriminative Label Grouping for Multi-Label Visual Prompt Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Lei-Lei and Xu, Shuo and Xie, Ming-Kun and Wang, Lei and Sun, Dengdi and Zhao, Haifeng}, title = {Correlative and Discriminative Label Grouping for Multi-Label Visual Prompt Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25434-25443} }
SFDM: Robust Decomposition of Geometry and Reflectance for Realistic Face Rendering from Sparse-view Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Daisheng and Hu, Jiangbei and Xu, Baixin and Dai, Yuxin and Qian, Chen and He, Ying}, title = {SFDM: Robust Decomposition of Geometry and Reflectance for Realistic Face Rendering from Sparse-view Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26409-26419} }
Ouroboros3D: Image-to-3D Generation via 3D-aware Recursive Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2025_CVPR, author = {Wen, Hao and Huang, Zehuan and Wang, Yaohui and Chen, Xinyuan and Sheng, Lu}, title = {Ouroboros3D: Image-to-3D Generation via 3D-aware Recursive Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21631-21641} }
QMambaBSR: Burst Image Super-Resolution with Query State Space Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Di_2025_CVPR, author = {Di, Xin and Peng, Long and Xia, Peizhe and Li, Wenbo and Pei, Renjing and Cao, Yang and Wang, Yang and Zha, Zheng-Jun}, title = {QMambaBSR: Burst Image Super-Resolution with Query State Space Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23080-23090} }
Multi-Group Proportional Representations for Text-to-Image Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2025_CVPR, author = {Jung, Sangwon and Oesterling, Alex and Verdun, Claudio Mayrink and Vithana, Sajani and Moon, Taesup and Calmon, Flavio P.}, title = {Multi-Group Proportional Representations for Text-to-Image Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23744-23754} }
Towards Generalizable Trajectory Prediction using Dual-Level Representation Learning and Adaptive Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Messaoud_2025_CVPR, author = {Messaoud, Kaouther and Cord, Matthieu and Alahi, Alexandre}, title = {Towards Generalizable Trajectory Prediction using Dual-Level Representation Learning and Adaptive Prompting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27564-27574} }
CoMatcher: Multi-View Collaborative Feature Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jintao and Xia, Zimin and Dong, Mingyue and Shen, Shuhan and Yue, Linwei and Zheng, Xianwei}, title = {CoMatcher: Multi-View Collaborative Feature Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21970-21980} }
Towards a Universal Synthetic Video Detector: From Face or Background Manipulations to Fully AI-Generated Content-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kundu_2025_CVPR, author = {Kundu, Rohit and Xiong, Hao and Mohanty, Vishal and Balachandran, Athula and Roy-Chowdhury, Amit K.}, title = {Towards a Universal Synthetic Video Detector: From Face or Background Manipulations to Fully AI-Generated Content}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28050-28060} }
A Focused Human Body Model for Accurate Anthropometric Measurements Extraction-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Shuhang and Huang, Xianliang and Zhong, Zhizhou and Guan, Juhong and Zhou, Shuigeng}, title = {A Focused Human Body Model for Accurate Anthropometric Measurements Extraction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22658-22667} }
ACE: Anti-Editing Concept Erasure in Text-to-Image Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zihao and Wei, Yuxiang and Li, Fan and Pei, Renjing and Xu, Hang and Zuo, Wangmeng}, title = {ACE: Anti-Editing Concept Erasure in Text-to-Image Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23505-23515} }
Synthetic-to-Real Self-supervised Robust Depth Estimation via Learning with Motion and Structure Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Weilong and Li, Ming and Li, Haipeng and Shao, Shuwei and Tan, Robby T.}, title = {Synthetic-to-Real Self-supervised Robust Depth Estimation via Learning with Motion and Structure Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21880-21890} }
Hierarchical Knowledge Prompt Tuning for Multi-task Test-Time Adaptation-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Qiang and Zhao, Mengsheng and Liu, Jiawei and Zhang, Fanrui and Xu, Yongchao and Zha, Zheng-Jun}, title = {Hierarchical Knowledge Prompt Tuning for Multi-task Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30524-30533} }
LaTexBlend: Scaling Multi-concept Customized Generation with Latent Textual Blending-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Jian and Yu, Zhenbo and Shen, Yang and Fu, Zhenyong and Yang, Jian}, title = {LaTexBlend: Scaling Multi-concept Customized Generation with Latent Textual Blending}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23585-23594} }
DejaVid: Encoder-Agnostic Learned Temporal Matching for Video Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Ho_2025_CVPR, author = {Ho, Darryl and Madden, Samuel}, title = {DejaVid: Encoder-Agnostic Learned Temporal Matching for Video Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24023-24032} }
Let's Verify and Reinforce Image Generation Step by Step-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Renrui and Tong, Chengzhuo and Zhao, Zhizheng and Guo, Ziyu and Zhang, Haoquan and Zhang, Manyuan and Liu, Jiaming and Gao, Peng and Li, Hongsheng}, title = {Let's Verify and Reinforce Image Generation Step by Step}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28662-28672} }
All-Optical Nonlinear Diffractive Deep Network for Ultrafast Image Denoising-
[pdf]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Xiaoling and Lee, Zhemg and Ye, Wei and Xie, Rui and Zhang, Wenbo and Peng, Guanju and Li, Zongze and Zhang, Shikun}, title = {All-Optical Nonlinear Diffractive Deep Network for Ultrafast Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28221-28231} }
UNOPose: Unseen Object Pose Estimation with an Unposed RGB-D Reference Image-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xingyu and Wang, Gu and Zhang, Ruida and Zhang, Chenyangguang and Tombari, Federico and Ji, Xiangyang}, title = {UNOPose: Unseen Object Pose Estimation with an Unposed RGB-D Reference Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22023-22034} }
HybridMQA: Exploring Geometry-Texture Interactions for Colored Mesh Quality Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sarvestani_2025_CVPR, author = {Sarvestani, Armin Shafiee and Tang, Sheyang and Wang, Zhou}, title = {HybridMQA: Exploring Geometry-Texture Interactions for Colored Mesh Quality Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21414-21424} }
SIR-DIFF: Sparse Image Sets Restoration with Multi-View Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2025_CVPR, author = {Mao, Yucheng and Wang, Boyang and Kulkarni, Nilesh and Park, Jeong Joon}, title = {SIR-DIFF: Sparse Image Sets Restoration with Multi-View Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21620-21630} }
Reversible Decoupling Network for Single Image Reflection Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Hao and Li, Mingjia and Hu, Qiming and Guo, Xiaojie}, title = {Reversible Decoupling Network for Single Image Reflection Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26430-26439} }
Hierarchical Features Matter: A Deep Exploration of Progressive Parameterization Method for Dataset Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_CVPR, author = {Zhong, Xinhao and Fang, Hao and Chen, Bin and Gu, Xulin and Qiu, Meikang and Qi, Shuhan and Xia, Shu-Tao}, title = {Hierarchical Features Matter: A Deep Exploration of Progressive Parameterization Method for Dataset Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30462-30471} }
GLASS: Guided Latent Slot Diffusion for Object-Centric Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Singh_2025_CVPR, author = {Singh, Krishnakant and Schaub-Meyer, Simone and Roth, Stefan}, title = {GLASS: Guided Latent Slot Diffusion for Object-Centric Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28673-28683} }
SASep: Saliency-Aware Structured Separation of Geometry and Feature for Open Set Learning on Point Clouds-
[pdf]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Jinfeng and Li, Xianzhi and Tang, Yuan and Han, Xu and Yu, Qiao and Hao, Yixue and Hu, Long and Chen, Min}, title = {SASep: Saliency-Aware Structured Separation of Geometry and Feature for Open Set Learning on Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27295-27304} }
Low-Biased General Annotated Dataset Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Dengyang and Wang, Haoyu and Zhang, Lei and Wei, Wei and Dai, Guang and Wang, Mengmeng and Wang, Jingdong and Zhang, Yanning}, title = {Low-Biased General Annotated Dataset Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25113-25123} }
Generative Hard Example Augmentation for Semantic Point Cloud Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Qi and Peng, Jibin and Huang, Zhao and Feng, Wei and Lin, Di}, title = {Generative Hard Example Augmentation for Semantic Point Cloud Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22205-22214} }
ETAP: Event-based Tracking of Any Point-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hamann_2025_CVPR, author = {Hamann, Friedhelm and Gehrig, Daniel and Febryanto, Filbert and Daniilidis, Kostas and Gallego, Guillermo}, title = {ETAP: Event-based Tracking of Any Point}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27186-27196} }
Beyond Sight: Towards Cognitive Alignment in LVLM via Enriched Visual Knowledge-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yaqi and Yin, Yuanyang and Li, Lin and Lin, Mingan and Huang, Victor Shea-Jay and Chen, Siwei and Chen, Weipeng and Yin, Baoqun and Zhou, Zenan and Zhang, Wentao}, title = {Beyond Sight: Towards Cognitive Alignment in LVLM via Enriched Visual Knowledge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24950-24959} }
Volumetric Surfaces: Representing Fuzzy Geometries with Layered Meshes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Esposito_2025_CVPR, author = {Esposito, Stefano and Chen, Anpei and Reiser, Christian and Bul\`o, Samuel Rota and Porzi, Lorenzo and Schwarz, Katja and Richardt, Christian and Zollh\"ofer, Michael and Kontschieder, Peter and Geiger, Andreas}, title = {Volumetric Surfaces: Representing Fuzzy Geometries with Layered Meshes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21370-21380} }
STEPS: Sequential Probability Tensor Estimation for Text-to-Image Hard Prompt Search-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2025_CVPR, author = {Qiu, Yuning and Wang, Andong and Li, Chao and Huang, Haonan and Zhou, Guoxu and Zhao, Qibin}, title = {STEPS: Sequential Probability Tensor Estimation for Text-to-Image Hard Prompt Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28640-28650} }
VIRES: Video Instance Repainting via Sketch and Text Guided Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Weng_2025_CVPR, author = {Weng, Shuchen and Zheng, Haojie and Zhang, Peixuan and Hong, Yuchen and Jiang, Han and Li, Si and Shi, Boxin}, title = {VIRES: Video Instance Repainting via Sketch and Text Guided Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28416-28425} }
MIMO: Controllable Character Video Synthesis with Spatial Decomposed Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Men_2025_CVPR, author = {Men, Yifang and Yao, Yuan and Cui, Miaomiao and Bo, Liefeng}, title = {MIMO: Controllable Character Video Synthesis with Spatial Decomposed Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21181-21191} }
From Sparse to Dense: Camera Relocalization with Scene-Specific Detector from Feature Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zhiwei and Yu, Hailin and Shentu, Yichun and Yuan, Jin and Zhang, Guofeng}, title = {From Sparse to Dense: Camera Relocalization with Scene-Specific Detector from Feature Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27059-27069} }
StableAnimator: High-Quality Identity-Preserving Human Image Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tu_2025_CVPR, author = {Tu, Shuyuan and Xing, Zhen and Han, Xintong and Cheng, Zhi-Qi and Dai, Qi and Luo, Chong and Wu, Zuxuan}, title = {StableAnimator: High-Quality Identity-Preserving Human Image Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21096-21106} }
OODD: Test-time Out-of-Distribution Detection with Dynamic Dictionary-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Yifeng and Zhu, Lin and Sun, Zewen and Liu, Hengyu and Gu, Qinying and Ye, Nanyang}, title = {OODD: Test-time Out-of-Distribution Detection with Dynamic Dictionary}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30630-30639} }
BIMBA: Selective-Scan Compression for Long-Range Video Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Islam_2025_CVPR, author = {Islam, Md Mohaiminul and Nagarajan, Tushar and Wang, Huiyu and Bertasius, Gedas and Torresani, Lorenzo}, title = {BIMBA: Selective-Scan Compression for Long-Range Video Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29096-29107} }
Diff2Flow: Training Flow Matching Models via Diffusion Model Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Schusterbauer_2025_CVPR, author = {Schusterbauer, Johannes and Gui, Ming and Fundel, Frank and Ommer, Bj\"orn}, title = {Diff2Flow: Training Flow Matching Models via Diffusion Model Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28347-28357} }
Prof. Robot: Differentiable Robot Rendering Without Static and Self-Collisions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ruan_2025_CVPR, author = {Ruan, Quanyuan and Lei, Jiabao and Yuan, Wenhao and Zhang, Yanglin and Lu, Dekun and Liu, Guiliang and Jia, Kui}, title = {Prof. Robot: Differentiable Robot Rendering Without Static and Self-Collisions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22562-22572} }
DropGaussian: Structural Regularization for Sparse-view Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Hyunwoo and Ryu, Gun and Kim, Wonjun}, title = {DropGaussian: Structural Regularization for Sparse-view Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21600-21609} }
Blurred LiDAR for Sharper 3D: Robust Handheld 3D Scanning with Diffuse LiDAR and RGB-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Behari_2025_CVPR, author = {Behari, Nikhil and Young, Aaron and Somasundaram, Siddharth and Klinghoffer, Tzofi and Dave, Akshat and Raskar, Ramesh}, title = {Blurred LiDAR for Sharper 3D: Robust Handheld 3D Scanning with Diffuse LiDAR and RGB}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26954-26964} }
Novel View Synthesis with Pixel-Space Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Elata_2025_CVPR, author = {Elata, Noam and Kawar, Bahjat and Ostrovsky-Berman, Yaron and Farber, Miriam and Sokolovsky, Ron}, title = {Novel View Synthesis with Pixel-Space Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26756-26766} }
Object Detection using Event Camera: A MoE Heat Conduction based Detector and A New Benchmark Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xiao and Jin, Yu and Wu, Wentao and Zhang, Wei and Zhu, Lin and Jiang, Bo and Tian, Yonghong}, title = {Object Detection using Event Camera: A MoE Heat Conduction based Detector and A New Benchmark Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29321-29330} }
Document Haystacks: Vision-Language Reasoning Over Piles of 1000+ Documents-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Jun and Xu, Dannong and Fei, Junjie and Feng, Chun-Mei and Elhoseiny, Mohamed}, title = {Document Haystacks: Vision-Language Reasoning Over Piles of 1000+ Documents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24817-24826} }
Rethinking Few-Shot Adaptation of Vision-Language Models in Two Stages-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Farina_2025_CVPR, author = {Farina, Matteo and Mancini, Massimiliano and Iacca, Giovanni and Ricci, Elisa}, title = {Rethinking Few-Shot Adaptation of Vision-Language Models in Two Stages}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29989-29998} }
TAGA: Self-supervised Learning for Template-free Animatable Gaussian Articulated Model-
[pdf]
[supp]
[bibtex]@InProceedings{Zhai_2025_CVPR, author = {Zhai, Zhichao and Chen, Guikun and Wang, Wenguan and Zheng, Dong and Xiao, Jun}, title = {TAGA: Self-supervised Learning for Template-free Animatable Gaussian Articulated Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21159-21169} }
Horizon-GS: Unified 3D Gaussian Splatting for Large-Scale Aerial-to-Ground Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Lihan and Ren, Kerui and Yu, Mulin and Xu, Linning and Dong, Junting and Lu, Tao and Zhao, Feng and Lin, Dahua and Dai, Bo}, title = {Horizon-GS: Unified 3D Gaussian Splatting for Large-Scale Aerial-to-Ground Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26789-26799} }
LotusFilter: Fast Diverse Nearest Neighbor Search via a Learned Cutoff Table-
[pdf]
[supp]
[bibtex]@InProceedings{Matsui_2025_CVPR, author = {Matsui, Yusuke}, title = {LotusFilter: Fast Diverse Nearest Neighbor Search via a Learned Cutoff Table}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30430-30439} }
Think Small, Act Big: Primitive Prompt Learning for Lifelong Robot Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_CVPR, author = {Yao, Yuanqi and Liu, Siao and Song, Haoming and Qu, Delin and Chen, Qizhi and Ding, Yan and Zhao, Bin and Wang, Zhigang and Li, Xuelong and Wang, Dong}, title = {Think Small, Act Big: Primitive Prompt Learning for Lifelong Robot Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22573-22583} }
SoMA: Singular Value Decomposed Minor Components Adaptation for Domain Generalizable Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yun_2025_CVPR, author = {Yun, Seokju and Chae, Seunghye and Lee, Dongheon and Ro, Youngmin}, title = {SoMA: Singular Value Decomposed Minor Components Adaptation for Domain Generalizable Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25602-25612} }
Ref-GS: Directional Factorization for 2D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Youjia and Chen, Anpei and Wan, Yumin and Song, Zikai and Yu, Junqing and Luo, Yawei and Yang, Wei}, title = {Ref-GS: Directional Factorization for 2D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26483-26492} }
VDocRAG: Retrieval-Augmented Generation over Visually-Rich Documents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tanaka_2025_CVPR, author = {Tanaka, Ryota and Iki, Taichi and Hasegawa, Taku and Nishida, Kyosuke and Saito, Kuniko and Suzuki, Jun}, title = {VDocRAG: Retrieval-Augmented Generation over Visually-Rich Documents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24827-24837} }
Concept Lancet: Image Editing with Compositional Representation Transplant-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Jinqi and Ding, Tianjiao and Chan, Kwan Ho Ryan and Min, Hancheng and Callison-Burch, Chris and Vidal, Rene}, title = {Concept Lancet: Image Editing with Compositional Representation Transplant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28502-28512} }
Generative Densification: Learning to Densify Gaussians for High-Fidelity Generalizable 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2025_CVPR, author = {Nam, Seungtae and Sun, Xiangyu and Kang, Gyeongjin and Lee, Younggeun and Oh, Seungjun and Park, Eunbyung}, title = {Generative Densification: Learning to Densify Gaussians for High-Fidelity Generalizable 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26683-26693} }
Video-MME: The First-Ever Comprehensive Evaluation Benchmark of Multi-modal LLMs in Video Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Chaoyou and Dai, Yuhan and Luo, Yongdong and Li, Lei and Ren, Shuhuai and Zhang, Renrui and Wang, Zihan and Zhou, Chenyu and Shen, Yunhang and Zhang, Mengdan and Chen, Peixian and Li, Yanwei and Lin, Shaohui and Zhao, Sirui and Li, Ke and Xu, Tong and Zheng, Xiawu and Chen, Enhong and Shan, Caifeng and He, Ran and Sun, Xing}, title = {Video-MME: The First-Ever Comprehensive Evaluation Benchmark of Multi-modal LLMs in Video Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24108-24118} }
Are Images Indistinguishable to Humans Also Indistinguishable to Classifiers?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2025_CVPR, author = {You, Zebin and Zhang, Xinyu and Guo, Hanzhong and Wang, Jingdong and Li, Chongxuan}, title = {Are Images Indistinguishable to Humans Also Indistinguishable to Classifiers?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28790-28800} }
Understanding Multi-layered Transmission Matrices-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Levin_2025_CVPR, author = {Levin, Anat and Alterman, Marina}, title = {Understanding Multi-layered Transmission Matrices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23164-23173} }
GS-DiT: Advancing Video Generation with Dynamic 3D Gaussian Fields through Efficient Dense 3D Point Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Bian_2025_CVPR, author = {Bian, Weikang and Huang, Zhaoyang and Shi, Xiaoyu and Li, Yijin and Wang, Fu-Yun and Li, Hongsheng}, title = {GS-DiT: Advancing Video Generation with Dynamic 3D Gaussian Fields through Efficient Dense 3D Point Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21717-21727} }
AnyMoLe: Any Character Motion In-betweening Leveraging Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yun_2025_CVPR, author = {Yun, Kwan and Hong, Seokhyeon and Kim, Chaelin and Noh, Junyong}, title = {AnyMoLe: Any Character Motion In-betweening Leveraging Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27838-27848} }
Dual Energy-Based Model with Open-World Uncertainty Estimation for Out-of-distribution Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Qi and Ding, Hu}, title = {Dual Energy-Based Model with Open-World Uncertainty Estimation for Out-of-distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25728-25737} }
DTGBrepGen: A Novel B-rep Generative Model through Decoupling Topology and Geometry-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jing and Fu, Yihang and Chen, Falai}, title = {DTGBrepGen: A Novel B-rep Generative Model through Decoupling Topology and Geometry}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21438-21447} }
Schedule On the Fly: Diffusion Time Prediction for Faster and Better Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Zilyu and Chen, Zhiyang and Li, Tiancheng and Huang, Zemin and Luo, Weijian and Qi, Guo-Jun}, title = {Schedule On the Fly: Diffusion Time Prediction for Faster and Better Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23412-23422} }
Learning Audio-guided Video Representation with Gated Attention for Video-Text Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_CVPR, author = {Jeong, Boseung and Park, Jicheol and Kim, Sungyeon and Kwak, Suha}, title = {Learning Audio-guided Video Representation with Gated Attention for Video-Text Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26202-26211} }
TASTE-Rob: Advancing Video Generation of Task-Oriented Hand-Object Interaction for Generalizable Robotic Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Hongxiang and Liu, Xingchen and Xu, Mutian and Hao, Yiming and Chen, Weikai and Han, Xiaoguang}, title = {TASTE-Rob: Advancing Video Generation of Task-Oriented Hand-Object Interaction for Generalizable Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27683-27693} }
NoT: Federated Unlearning via Weight Negation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khalil_2025_CVPR, author = {Khalil, Yasser H. and Brunswic, Leo and Lamghari, Soufiane and Li, Xu and Beitollahi, Mahdi and Chen, Xi}, title = {NoT: Federated Unlearning via Weight Negation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25759-25769} }
RANGE: Retrieval Augmented Neural Fields for Multi-Resolution Geo-Embeddings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dhakal_2025_CVPR, author = {Dhakal, Aayush and Sastry, Srikumar and Khanal, Subash and Ahmad, Adeel and Xing, Eric and Jacobs, Nathan}, title = {RANGE: Retrieval Augmented Neural Fields for Multi-Resolution Geo-Embeddings}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24680-24689} }
SimMotionEdit: Text-Based Human Motion Editing with Motion Similarity Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zhengyuan and Cheng, Kai and Ghosh, Anindita and Bhattacharya, Uttaran and Gui, Liangyan and Bera, Aniket}, title = {SimMotionEdit: Text-Based Human Motion Editing with Motion Similarity Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27827-27837} }
From Head to Tail: Efficient Black-box Model Inversion Attack via Long-tailed Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Ziang and Zhang, Hongguang and Wang, Juan and Chen, Meihui and Hu, Hongxin and Yi, Wenzhe and Xu, Xiaoyang and Yang, Mengda and Ma, Chenjun}, title = {From Head to Tail: Efficient Black-box Model Inversion Attack via Long-tailed Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29288-29298} }
SIDA: Social Media Image Deepfake Detection, Localization and Explanation with Large Multimodal Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zhenglin and Hu, Jinwei and Li, Xiangtai and He, Yiwei and Zhao, Xingyu and Peng, Bei and Wu, Baoyuan and Huang, Xiaowei and Cheng, Guangliang}, title = {SIDA: Social Media Image Deepfake Detection, Localization and Explanation with Large Multimodal Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28831-28841} }
Object-Centric Prompt-Driven Vision-Language-Action Model for Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xiaoqi and Xu, Jingyun and Zhang, Mingxu and Liu, Jiaming and Shen, Yan and Ponomarenko, Iaroslav and Xu, Jiahui and Heng, Liang and Huang, Siyuan and Zhang, Shanghang and Dong, Hao}, title = {Object-Centric Prompt-Driven Vision-Language-Action Model for Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27638-27648} }
Depth Any Camera: Zero-Shot Metric Depth Estimation from Any Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Yuliang and Garg, Sparsh and Miangoleh, S. Mahdi H. and Huang, Xinyu and Ren, Liu}, title = {Depth Any Camera: Zero-Shot Metric Depth Estimation from Any Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26996-27006} }
ShotAdapter: Text-to-Multi-Shot Video Generation with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kara_2025_CVPR, author = {Kara, Ozgur and Singh, Krishna Kumar and Liu, Feng and Ceylan, Duygu and Rehg, James M. and Hinz, Tobias}, title = {ShotAdapter: Text-to-Multi-Shot Video Generation with Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28405-28415} }
Sound Bridge: Associating Egocentric and Exocentric Videos via Audio Cues-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Sihong and Wu, Jiaxin and Wei, Xiaoyong and Cai, Yi and Jiang, Dongmei and Wang, Yaowei}, title = {Sound Bridge: Associating Egocentric and Exocentric Videos via Audio Cues}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28942-28951} }
OmniDocBench: Benchmarking Diverse PDF Document Parsing with Comprehensive Annotations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ouyang_2025_CVPR, author = {Ouyang, Linke and Qu, Yuan and Zhou, Hongbin and Zhu, Jiawei and Zhang, Rui and Lin, Qunshu and Wang, Bin and Zhao, Zhiyuan and Jiang, Man and Zhao, Xiaomeng and Shi, Jin and Wu, Fan and Chu, Pei and Liu, Minghao and Li, Zhenxiang and Xu, Chao and Zhang, Bo and Shi, Botian and Tu, Zhongying and He, Conghui}, title = {OmniDocBench: Benchmarking Diverse PDF Document Parsing with Comprehensive Annotations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24838-24848} }
LayoutVLM: Differentiable Optimization of 3D Layout via Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Fan-Yun and Liu, Weiyu and Gu, Siyi and Lim, Dylan and Bhat, Goutam and Tombari, Federico and Li, Manling and Haber, Nick and Wu, Jiajun}, title = {LayoutVLM: Differentiable Optimization of 3D Layout via Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29469-29478} }
Point Clouds Meets Physics: Dynamic Acoustic Field Fitting Network for Point Cloud Understanding-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Changshuo and He, Shuting and Fang, Xiang and Han, Jiawei and Liu, Zhonghang and Ning, Xin and Li, Weijun and Tiwari, Prayag}, title = {Point Clouds Meets Physics: Dynamic Acoustic Field Fitting Network for Point Cloud Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22182-22192} }
Faster Parameter-Efficient Tuning with Token Redundancy Reduction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Kwonyoung and Park, Jungin and Kim, Jin and Kwon, Hyeongjun and Sohn, Kwanghoon}, title = {Faster Parameter-Efficient Tuning with Token Redundancy Reduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30189-30198} }
Panorama Generation From NFoV Image Done Right-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Dian and Zhang, Cheng and Wu, Xiao-Ming and Li, Cao and Lv, Chengfei and Hu, Jian-Fang and Zheng, Wei-Shi}, title = {Panorama Generation From NFoV Image Done Right}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21610-21619} }
Sparse Point Cloud Patches Rendering via Splitting 2D Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Changfeng and Bi, Ran and Guo, Jie and Wang, Chongjun and Guo, Yanwen}, title = {Sparse Point Cloud Patches Rendering via Splitting 2D Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27285-27294} }
Distilling Monocular Foundation Model for Fine-grained Depth Completion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Yingping and Hu, Yutao and Shao, Wenqi and Fu, Ying}, title = {Distilling Monocular Foundation Model for Fine-grained Depth Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22254-22265} }
AniGrad: Anisotropic Gradient-Adaptive Sampling for 3D Reconstruction From Monocular Video-
[pdf]
[supp]
[bibtex]@InProceedings{Stier_2025_CVPR, author = {Stier, Noah and Rich, Alex and Sen, Pradeep and H\"ollerer, Tobias}, title = {AniGrad: Anisotropic Gradient-Adaptive Sampling for 3D Reconstruction From Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21814-21823} }
Less Attention is More: Prompt Transformer for Generalized Category Discovery-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Wei and Zhang, Baopeng and Teng, Zhu and Luo, Wenxin and Zou, Junnan and Fan, Jianping}, title = {Less Attention is More: Prompt Transformer for Generalized Category Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30322-30331} }
AToM: Aligning Text-to-Motion Model at Event-Level with GPT-4Vision Reward-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Haonan and Wu, Xiangzuo and Liao, Huan and Xu, Zunnan and Hu, Zhongyuan and Li, Ronghui and Zhang, Yachao and Li, Xiu}, title = {AToM: Aligning Text-to-Motion Model at Event-Level with GPT-4Vision Reward}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22746-22755} }
DoF-Gaussian: Controllable Depth-of-Field for 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Liao and Liu, Tianqi and Sun, Huiqiang and Li, Jiaqi and Cao, Zhiguo and Li, Wei and Loy, Chen Change}, title = {DoF-Gaussian: Controllable Depth-of-Field for 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26462-26471} }
Reconciling Stochastic and Deterministic Strategies for Zero-shot Image Restoration using Diffusion Model in Dual-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Chong and Guo, Lanqing and Fu, Zixuan and Yang, Siyuan and Cheng, Hao and Kot, Alex C. and Wen, Bihan}, title = {Reconciling Stochastic and Deterministic Strategies for Zero-shot Image Restoration using Diffusion Model in Dual}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23207-23216} }
Hierarchical Flow Diffusion for Efficient Frame Interpolation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hai_2025_CVPR, author = {Hai, Yang and Wang, Guo and Su, Tan and Jiang, Wenjie and Hu, Yinlin}, title = {Hierarchical Flow Diffusion for Efficient Frame Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22943-22952} }
BASKET: A Large-Scale Video Dataset for Fine-Grained Skill Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2025_CVPR, author = {Pan, Yulu and Zhang, Ce and Bertasius, Gedas}, title = {BASKET: A Large-Scale Video Dataset for Fine-Grained Skill Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28952-28962} }
Arbitrary-steps Image Super-resolution via Diffusion Inversion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2025_CVPR, author = {Yue, Zongsheng and Liao, Kang and Loy, Chen Change}, title = {Arbitrary-steps Image Super-resolution via Diffusion Inversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23153-23163} }
Dynamic Neural Surfaces for Elastic 4D Shape Representation and Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nizamani_2025_CVPR, author = {Nizamani, Awais and Laga, Hamid and Wang, Guanjin and Boussaid, Farid and Bennamoun, Mohammed and Srivastava, Anuj}, title = {Dynamic Neural Surfaces for Elastic 4D Shape Representation and Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21783-21792} }
ComfyBench: Benchmarking LLM-based Agents in ComfyUI for Autonomously Designing Collaborative AI Systems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2025_CVPR, author = {Xue, Xiangyuan and Lu, Zeyu and Huang, Di and Wang, Zidong and Ouyang, Wanli and Bai, Lei}, title = {ComfyBench: Benchmarking LLM-based Agents in ComfyUI for Autonomously Designing Collaborative AI Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24614-24624} }
Incomplete Multi-View Multi-label Learning via Disentangled Representation and Label Semantic Embedding-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Xu and Yin, Jun and Wen, Jie}, title = {Incomplete Multi-View Multi-label Learning via Disentangled Representation and Label Semantic Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30722-30731} }
AutoURDF: Unsupervised Robot Modeling from Point Cloud Frames Using Cluster Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Jiong and Zhang, Lechen and Lee, Kwansoo and Ning, Jialong and Goldfeder, Judah and Lipson, Hod}, title = {AutoURDF: Unsupervised Robot Modeling from Point Cloud Frames Using Cluster Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27628-27637} }
Golden Cudgel Network for Real-Time Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Guoyu and Wang, Yuan and Shi, Daming and Wang, Yanzhong}, title = {Golden Cudgel Network for Real-Time Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25367-25376} }
Multi-modal Contrastive Learning with Negative Sampling Calibration for Phenotypic Drug Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Rao_2025_CVPR, author = {Rao, Jiahua and Lin, Hanjing and Chen, Leyu and Xie, Jiancong and Zheng, Shuangjia and Yang, Yuedong}, title = {Multi-modal Contrastive Learning with Negative Sampling Calibration for Phenotypic Drug Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30752-30762} }
R-TPT: Improving Adversarial Robustness of Vision-Language Models through Test-Time Prompt Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Sheng_2025_CVPR, author = {Sheng, Lijun and Liang, Jian and Wang, Zilei and He, Ran}, title = {R-TPT: Improving Adversarial Robustness of Vision-Language Models through Test-Time Prompt Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29958-29967} }
SplatFlow: Multi-View Rectified Flow Model for 3D Gaussian Splatting Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Go_2025_CVPR, author = {Go, Hyojun and Park, Byeongjun and Jang, Jiho and Kim, Jin-Young and Kwon, Soonwoo and Kim, Changick}, title = {SplatFlow: Multi-View Rectified Flow Model for 3D Gaussian Splatting Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21524-21536} }
Boltzmann Attention Sampling for Image Analysis with Small Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Theodore and Kiblawi, Sid and Usuyama, Naoto and Lee, Ho Hin and Preston, Sam and Poon, Hoifung and Wei, Mu}, title = {Boltzmann Attention Sampling for Image Analysis with Small Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25950-25959} }
Generalized Recorrupted-to-Recorrupted: Self-Supervised Learning Beyond Gaussian Noise-
[pdf]
[supp]
[bibtex]@InProceedings{Monroy_2025_CVPR, author = {Monroy, Brayan and Bacca, Jorge and Tachella, Juli\'an}, title = {Generalized Recorrupted-to-Recorrupted: Self-Supervised Learning Beyond Gaussian Noise}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28155-28164} }
Dynamic Motion Blending for Versatile Motion Editing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Nan and Li, Hongjie and Yuan, Ziye and He, Zimo and Chen, Yixin and Liu, Tengyu and Zhu, Yixin and Huang, Siyuan}, title = {Dynamic Motion Blending for Versatile Motion Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22735-22745} }
StdGEN: Semantic-Decomposed 3D Character Generation from Single Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Yuze and Zhou, Yanning and Zhao, Wang and Wu, Zhongkai and Xiao, Kaiwen and Yang, Wei and Liu, Yong-Jin and Han, Xiao}, title = {StdGEN: Semantic-Decomposed 3D Character Generation from Single Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26345-26355} }
Spatiotemporal Decoupling for Efficient Vision-Based Occupancy Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Jingyi and Chen, Xieyuanli and Ma, Junyi and Huang, Jiawei and Xu, Jintao and Wang, Yue and Pei, Ling}, title = {Spatiotemporal Decoupling for Efficient Vision-Based Occupancy Forecasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22338-22347} }
FFR: Frequency Feature Rectification for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Ziqian and Zhao, Xinqiao and Wang, Xiaolei and Zhang, Quan and Xiao, Jimin}, title = {FFR: Frequency Feature Rectification for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30261-30270} }
Video-XL: Extra-Long Vision Language Model for Hour-Scale Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Shu_2025_CVPR, author = {Shu, Yan and Liu, Zheng and Zhang, Peitian and Qin, Minghao and Zhou, Junjie and Liang, Zhengyang and Huang, Tiejun and Zhao, Bo}, title = {Video-XL: Extra-Long Vision Language Model for Hour-Scale Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26160-26169} }
Sonata: Self-Supervised Learning of Reliable Point Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Xiaoyang and DeTone, Daniel and Frost, Duncan and Shen, Tianwei and Xie, Chris and Yang, Nan and Engel, Jakob and Newcombe, Richard and Zhao, Hengshuang and Straub, Julian}, title = {Sonata: Self-Supervised Learning of Reliable Point Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22193-22204} }
DriveGEN: Generalized and Robust 3D Detection in Driving via Controllable Text-to-Image Diffusion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Hongbin and Guo, Zilu and Zhang, Yifan and Niu, Shuaicheng and Li, Yafeng and Zhang, Ruimao and Cui, Shuguang and Li, Zhen}, title = {DriveGEN: Generalized and Robust 3D Detection in Driving via Controllable Text-to-Image Diffusion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27497-27507} }
DRiVE: Diffusion-based Rigging Empowers Generation of Versatile and Expressive Characters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Mingze and Chen, Junhao and Dong, Junting and Chen, Yurun and Jiang, Xinyu and Mao, Shiwei and Jiang, Puhua and Wang, Jingbo and Dai, Bo and Huang, Ruqi}, title = {DRiVE: Diffusion-based Rigging Empowers Generation of Versatile and Expressive Characters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21170-21180} }
A Unified Approach to Interpreting Self-supervised Pre-training Methods for 3D Point Clouds via Interactions-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Qiang and Ruan, Jian and Wu, Fanghao and Chen, Yuchi and Wei, Zhihua and Shen, Wen}, title = {A Unified Approach to Interpreting Self-supervised Pre-training Methods for 3D Point Clouds via Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27315-27324} }
Enhancing SAM with Efficient Prompting and Preference Optimization for Semi-supervised Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Konwer_2025_CVPR, author = {Konwer, Aishik and Yang, Zhijian and Bas, Erhan and Xiao, Cao and Prasanna, Prateek and Bhatia, Parminder and Kass-Hout, Taha}, title = {Enhancing SAM with Efficient Prompting and Preference Optimization for Semi-supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20990-21000} }
STEREO: A Two-Stage Framework for Adversarially Robust Concept Erasing from Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Srivatsan_2025_CVPR, author = {Srivatsan, Koushik and Shamshad, Fahad and Naseer, Muzammal and Patel, Vishal M. and Nandakumar, Karthik}, title = {STEREO: A Two-Stage Framework for Adversarially Robust Concept Erasing from Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23765-23774} }
GenVDM: Generating Vector Displacement Maps From a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Yuezhi and Chen, Qimin and Kim, Vladimir G. and Chaudhuri, Siddhartha and Huang, Qixing and Chen, Zhiqin}, title = {GenVDM: Generating Vector Displacement Maps From a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26618-26629} }
Effective SAM Combination for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Minhyeok and Cho, Suhwan and Lee, Jungho and Yang, Sunghun and Choi, Heeseung and Kim, Ig-Jae and Lee, Sangyoun}, title = {Effective SAM Combination for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26081-26090} }
Towards Visual Discrimination and Reasoning of Real-World Physical Dynamics: Physics-Grounded Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Wenqiao and Gu, Yao and Chen, Xintao and Xu, Xiaohao and Hu, Ming and Huang, Xiaonan and Wu, Yingna}, title = {Towards Visual Discrimination and Reasoning of Real-World Physical Dynamics: Physics-Grounded Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30409-30419} }
UniAP: Unifying Inter- and Intra-Layer Automatic Parallelism by Mixed Integer Quadratic Programming-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Hao and Wu, Ke and Li, Jie and Li, Jun and Li, Wu-Jun}, title = {UniAP: Unifying Inter- and Intra-Layer Automatic Parallelism by Mixed Integer Quadratic Programming}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20947-20957} }
Turbo3D: Ultra-fast Text-to-3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Hanzhe and Yin, Tianwei and Luan, Fujun and Hu, Yiwei and Tan, Hao and Xu, Zexiang and Bi, Sai and Tulsiani, Shubham and Zhang, Kai}, title = {Turbo3D: Ultra-fast Text-to-3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23668-23678} }
SUM Parts: Benchmarking Part-Level Semantic Segmentation of Urban Meshes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Weixiao and Nan, Liangliang and Ledoux, Hugo}, title = {SUM Parts: Benchmarking Part-Level Semantic Segmentation of Urban Meshes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24474-24484} }
MODA: Motion-Drift Augmentation for Inertial Human Motion Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Yinghao and Guo, Shihui and Qin, Yipeng}, title = {MODA: Motion-Drift Augmentation for Inertial Human Motion Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27771-27781} }
Higher-Order Ratio Cycles for Fast and Globally Optimal Shape Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Roetzer_2025_CVPR, author = {Roetzer, Paul and Ehm, Viktoria and Cremers, Daniel and L\"ahner, Zorah and Bernard, Florian}, title = {Higher-Order Ratio Cycles for Fast and Globally Optimal Shape Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21793-21803} }
Hyperdimensional Uncertainty Quantification for Multimodal Uncertainty Fusion in Autonomous Vehicles Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Luke and Wang, Junyao and Mortlock, Trier and Khargonekar, Pramod and Al Faruque, Mohammad Abdullah}, title = {Hyperdimensional Uncertainty Quantification for Multimodal Uncertainty Fusion in Autonomous Vehicles Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22306-22316} }
GIFStream: 4D Gaussian-based Immersive Video with Feature Stream-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Hao and Li, Sicheng and Gao, Xiang and Batuer, Abudouaihati and Yu, Lu and Liao, Yiyi}, title = {GIFStream: 4D Gaussian-based Immersive Video with Feature Stream}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21761-21770} }
Multi-Scale Neighborhood Occupancy Masked Autoencoder for Self-Supervised Learning in LiDAR Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Abdelsamad_2025_CVPR, author = {Abdelsamad, Mohamed and Ulrich, Michael and Glaeser, Claudius and Valada, Abhinav}, title = {Multi-Scale Neighborhood Occupancy Masked Autoencoder for Self-Supervised Learning in LiDAR Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22234-22243} }
PLeaS - Merging Models with Permutations and Least Squares-
[pdf]
[supp]
[bibtex]@InProceedings{Nasery_2025_CVPR, author = {Nasery, Anshul and Hayase, Jonathan and Koh, Pang Wei and Oh, Sewoong}, title = {PLeaS - Merging Models with Permutations and Least Squares}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30493-30502} }
Incremental Object Keypoint Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Mingfu and Zhou, Jiahuan and Zou, Xu and Wu, Ying}, title = {Incremental Object Keypoint Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25399-25410} }
InteractVLM: 3D Interaction Reasoning from 2D Foundational Models-
[pdf]
[supp]
[bibtex]@InProceedings{Dwivedi_2025_CVPR, author = {Dwivedi, Sai Kumar and Anti\'c, Dimitrije and Tripathi, Shashank and Taheri, Omid and Schmid, Cordelia and Black, Michael J. and Tzionas, Dimitrios}, title = {InteractVLM: 3D Interaction Reasoning from 2D Foundational Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22605-22615} }
Attribute-Missing Multi-view Graph Clustering-
[pdf]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Bowen and Wang, Qianqian and Ding, Zhengming and Gao, Quanxue}, title = {Attribute-Missing Multi-view Graph Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25832-25841} }
Pose-Guided Temporal Enhancement for Robust Low-Resolution Hand Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Kaixin and Ren, Pengfei and Wang, Jingyu and Sun, Haifeng and Qi, Qi and Zhuang, Zirui and Liao, Jianxin}, title = {Pose-Guided Temporal Enhancement for Robust Low-Resolution Hand Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22627-22637} }
ReDiffDet: Rotation-equivariant Diffusion Model for Oriented Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jiaqi and Ding, Zeyu and Zhou, Yong and Zhu, Hancheng and Du, Wen-Liang and Yao, Rui}, title = {ReDiffDet: Rotation-equivariant Diffusion Model for Oriented Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24429-24439} }
Unlocking Generalization Power in LiDAR Point Cloud Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_CVPR, author = {Zeng, Zhenxuan and Wu, Qiao and Zhang, Xiyu and Wu, Lin Yuanbo and An, Pei and Yang, Jiaqi and Wang, Ji and Wang, Peng}, title = {Unlocking Generalization Power in LiDAR Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22244-22253} }
LoRA Recycle: Unlocking Tuning-Free Few-Shot Adaptability in Visual Foundation Models by Recycling Pre-Tuned LoRAs-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Zixuan and Wei, Yongxian and Shen, Li and Yuan, Chun and Tao, Dacheng}, title = {LoRA Recycle: Unlocking Tuning-Free Few-Shot Adaptability in Visual Foundation Models by Recycling Pre-Tuned LoRAs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25026-25037} }
Event Fields: Capturing Light Fields at High Speed, Resolution, and Dynamic Range-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Ziyuan and Zou, Zihao and Boominathan, Vivek and Chakravarthula, Praneeth and Pediredla, Adithya}, title = {Event Fields: Capturing Light Fields at High Speed, Resolution, and Dynamic Range}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26910-26920} }
HyperFree: A Channel-adaptive and Tuning-free Foundation Model for Hyperspectral Remote Sensing Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jingtao and Liu, Yingyi and Wang, Xinyu and Peng, Yunning and Sun, Chen and Wang, Shaoyu and Sun, Zhendong and Ke, Tian and Jiang, Xiao and Lu, Tangwei and Zhao, Anran and Zhong, Yanfei}, title = {HyperFree: A Channel-adaptive and Tuning-free Foundation Model for Hyperspectral Remote Sensing Imagery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23048-23058} }
GBlobs: Explicit Local Structure via Gaussian Blobs for Improved Cross-Domain LiDAR-based 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Malic_2025_CVPR, author = {Mali\'c, Du\v{s}an and Fruhwirth-Reisinger, Christian and Schulter, Samuel and Possegger, Horst}, title = {GBlobs: Explicit Local Structure via Gaussian Blobs for Improved Cross-Domain LiDAR-based 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27357-27367} }
3D Gaussian Head Avatars with Expressive Dynamic Appearances by Compact Tensorial Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yating and Wang, Xuan and Yi, Ran and Fan, Yanbo and Hu, Jichen and Zhu, Jingcheng and Ma, Lizhuang}, title = {3D Gaussian Head Avatars with Expressive Dynamic Appearances by Compact Tensorial Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21117-21126} }
MambaIRv2: Attentive State Space Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Hang and Guo, Yong and Zha, Yaohua and Zhang, Yulun and Li, Wenbo and Dai, Tao and Xia, Shu-Tao and Li, Yawei}, title = {MambaIRv2: Attentive State Space Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28124-28133} }
Floating No More: Object-Ground Reconstruction from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Man_2025_CVPR, author = {Man, Yunze and Sheng, Yichen and Zhang, Jianming and Gui, Liang-Yan and Wang, Yu-Xiong}, title = {Floating No More: Object-Ground Reconstruction from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27134-27143} }
Pattern Analogies: Learning to Perform Programmatic Image Edits by Analogy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ganeshan_2025_CVPR, author = {Ganeshan, Aditya and Groueix, Thibault and Guerrero, Paul and Mech, Radomir and Fisher, Matthew and Ritchie, Daniel}, title = {Pattern Analogies: Learning to Perform Programmatic Image Edits by Analogy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28715-28725} }
STAR-Edge: Structure-aware Local Spherical Curve Representation for Thin-walled Edge Extraction from Unstructured Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zikuan and Chen, Honghua and Wang, Yuecheng and Wu, Sibo and Wei, Mingqiang and Wang, Jun}, title = {STAR-Edge: Structure-aware Local Spherical Curve Representation for Thin-walled Edge Extraction from Unstructured Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27254-27263} }
Boosting the Dual-Stream Architecture in Ultra-High Resolution Segmentation with Resolution-Biased Uncertainty Estimation-
[pdf]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Rong and Liu, Xingyu and Shi, Jinglei and Lin, Liang and Yang, Jufeng}, title = {Boosting the Dual-Stream Architecture in Ultra-High Resolution Segmentation with Resolution-Biased Uncertainty Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25960-25970} }
pFedMxF: Personalized Federated Class-Incremental Learning with Mixture of Frequency Aggregation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yifei and Zhu, Hao and Tan, Alysa Ziying and Yu, Dianzhi and Huang, Longtao and Yu, Han}, title = {pFedMxF: Personalized Federated Class-Incremental Learning with Mixture of Frequency Aggregation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30640-30650} }
Efficient Transfer Learning for Video-language Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Haoxing and Huang, Zizheng and Hong, Yan and Wang, Yanshuo and Lyu, Zhongcai and Xu, Zhuoer and Lan, Jun and Gu, Zhangxuan}, title = {Efficient Transfer Learning for Video-language Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29129-29138} }
Radio Frequency Ray Tracing with Neural Object Representation for Enhanced RF Modeling-
[pdf]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xingyu and Feng, Zihao and Qian, Kun and Zhang, Xinyu}, title = {Radio Frequency Ray Tracing with Neural Object Representation for Enhanced RF Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21339-21348} }
Neuro-3D: Towards 3D Visual Decoding from EEG Signals-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Zhanqiang and Wu, Jiamin and Song, Yonghao and Bu, Jiahui and Mai, Weijian and Zheng, Qihao and Ouyang, Wanli and Song, Chunfeng}, title = {Neuro-3D: Towards 3D Visual Decoding from EEG Signals}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23870-23880} }
Probing the Mid-level Vision Capabilities of Self-Supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xuweiyi and Marks, Markus and Cheng, Zezhou}, title = {Probing the Mid-level Vision Capabilities of Self-Supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30095-30105} }
Efficient Long Video Tokenization via Coordinate-based Patch Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Huiwon and Yu, Sihyun and Shin, Jinwoo and Abbeel, Pieter and Seo, Younggyo}, title = {Efficient Long Video Tokenization via Coordinate-based Patch Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22853-22863} }
Derivative-Free Diffusion Manifold-Constrained Gradient for Unified XAI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Won Jun and Chung, Hyungjin and Kim, Jaemin and Lee, Sangmin and Sim, Byeongsu and Ye, Jong Chul}, title = {Derivative-Free Diffusion Manifold-Constrained Gradient for Unified XAI}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23795-23805} }
ZoomLDM: Latent Diffusion Model for Multi-scale Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yellapragada_2025_CVPR, author = {Yellapragada, Srikar and Graikos, Alexandros and Triaridis, Kostas and Prasanna, Prateek and Gupta, Rajarsi and Saltz, Joel and Samaras, Dimitris}, title = {ZoomLDM: Latent Diffusion Model for Multi-scale Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23453-23463} }
GaussianUDF: Inferring Unsigned Distance Functions through 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Shujuan and Liu, Yu-Shen and Han, Zhizhong}, title = {GaussianUDF: Inferring Unsigned Distance Functions through 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27113-27123} }
CrossSDF: 3D Reconstruction of Thin Structures From Cross-Sections-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Walker_2025_CVPR, author = {Walker, Thomas and Esposito, Salvatore and Rebain, Daniel and Vaxman, Amir and Onken, Arno and Li, Changjian and Mac Aodha, Oisin}, title = {CrossSDF: 3D Reconstruction of Thin Structures From Cross-Sections}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30928-30937} }
DV-Matcher: Deformation-based Non-rigid Point Cloud Matching Guided by Pre-trained Visual Features-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zhangquan and Jiang, Puhua and Huang, Ruqi}, title = {DV-Matcher: Deformation-based Non-rigid Point Cloud Matching Guided by Pre-trained Visual Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27264-27274} }
Reasoning Mamba: Hypergraph-Guided Region Relation Calculating for Weakly Supervised Affordance Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuxuan and Wu, Aming and Yang, Muli and Min, Yukuan and Zhu, Yihang and Deng, Cheng}, title = {Reasoning Mamba: Hypergraph-Guided Region Relation Calculating for Weakly Supervised Affordance Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27618-27627} }
Adaptive Part Learning for Fine-Grained Generalized Category Discovery: A Plug-and-Play Enhancement-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Qiyuan and Huang, Hanzhuo and Wu, Yu and Yang, Sibei}, title = {Adaptive Part Learning for Fine-Grained Generalized Category Discovery: A Plug-and-Play Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25444-25453} }
FLAIR: VLM with Fine-grained Language-informed Image Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Rui and Kim, Sanghwan and Georgescu, Mariana-Iuliana and Akata, Zeynep and Alaniz, Stephan}, title = {FLAIR: VLM with Fine-grained Language-informed Image Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24884-24894} }
GG-SSMs: Graph-Generating State Space Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zubic_2025_CVPR, author = {Zubic, Nikola and Scaramuzza, Davide}, title = {GG-SSMs: Graph-Generating State Space Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28863-28873} }
Continuous Adverse Weather Removal via Degradation-Aware Distillation-
[pdf]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Xin and Xiao, Jie and Zhu, Yurui and Fu, Xueyang}, title = {Continuous Adverse Weather Removal via Degradation-Aware Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28113-28123} }
Exploiting Temporal State Space Sharing for Video Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hesham_2025_CVPR, author = {Hesham, Syed Ariff Syed and Liu, Yun and Sun, Guolei and Ding, Henghui and Yang, Jing and Konukoglu, Ender and Geng, Xue and Jiang, Xudong}, title = {Exploiting Temporal State Space Sharing for Video Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24211-24221} }
High-fidelity 3D Object Generation from Single Image with RGBN-Volume Gaussian Reconstruction Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Yiyang and Zhou, Kun and Wang, He and Yang, Yin and Shao, Tianjia}, title = {High-fidelity 3D Object Generation from Single Image with RGBN-Volume Gaussian Reconstruction Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21558-21569} }
Steepest Descent Density Control for Compact 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Peihao and Wang, Yuehao and Wang, Dilin and Mohan, Sreyas and Fan, Zhiwen and Wu, Lemeng and Cai, Ruisi and Yeh, Yu-Ying and Wang, Zhangyang and Liu, Qiang and Ranjan, Rakesh}, title = {Steepest Descent Density Control for Compact 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26663-26672} }
Optimal Transport-Guided Source-Free Adaptation for Face Anti-Spoofing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zhuowei and Zhao, Tianchen and Xu, Xiang and Zhang, Zheng and Li, Zhihua and Chen, Xuanbai and Zhang, Qin and Bergamo, Alessandro and Jain, Anil K. and Xing, Yifan}, title = {Optimal Transport-Guided Source-Free Adaptation for Face Anti-Spoofing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24351-24363} }
Robust 3D Shape Reconstruction in Zero-Shot from a Single Image in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cho_2025_CVPR, author = {Cho, Junhyeong and Youwang, Kim and Yang, Hunmin and Oh, Tae-Hyun}, title = {Robust 3D Shape Reconstruction in Zero-Shot from a Single Image in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22786-22798} }
BOE-ViT: Boosting Orientation Estimation with Equivariance in Self-Supervised 3D Subtomogram Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Runmin and Daggett, Jackson and Pingulkar, Shriya and Zhao, Yizhou and Dhingra, Priyanshu and Brown, Daniel and Wu, Qifeng and Zeng, Xiangrui and Li, Xingjian and Xu, Min}, title = {BOE-ViT: Boosting Orientation Estimation with Equivariance in Self-Supervised 3D Subtomogram Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29352-29362} }
Adventurer: Optimizing Vision Mamba Architecture Designs for Efficiency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Feng and Yang, Timing and Yu, Yaodong and Ren, Sucheng and Wei, Guoyizhe and Wang, Angtian and Shao, Wei and Zhou, Yuyin and Yuille, Alan and Xie, Cihang}, title = {Adventurer: Optimizing Vision Mamba Architecture Designs for Efficiency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30157-30166} }
Beyond Local Sharpness: Communication-Efficient Global Sharpness-aware Minimization for Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Caldarola_2025_CVPR, author = {Caldarola, Debora and Cagnasso, Pietro and Caputo, Barbara and Ciccone, Marco}, title = {Beyond Local Sharpness: Communication-Efficient Global Sharpness-aware Minimization for Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25187-25197} }
Parameterized Blur Kernel Prior Learning for Local Motion Deblurring-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Zhenxuan and Wu, Fangfang and Huang, Tao and Dong, Le and Dong, Weisheng and Li, Xin and Shi, Guangming}, title = {Parameterized Blur Kernel Prior Learning for Local Motion Deblurring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23006-23015} }
Scene4U: Hierarchical Layered 3D Scene Reconstruction from Single Panoramic Image for Your Immerse Exploration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zilong and He, Jun and Ye, Junyan and Jiang, Lihan and Li, Weijia and Chen, Yiping and Han, Ting}, title = {Scene4U: Hierarchical Layered 3D Scene Reconstruction from Single Panoramic Image for Your Immerse Exploration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26723-26733} }
ACAttack: Adaptive Cross Attacking RGB-T Tracker via Multi-Modal Response Decoupling-
[pdf]
[bibtex]@InProceedings{Xiang_2025_CVPR, author = {Xiang, Xinyu and Yan, Qinglong and Zhang, Hao and Ma, Jiayi}, title = {ACAttack: Adaptive Cross Attacking RGB-T Tracker via Multi-Modal Response Decoupling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22099-22108} }
DeCafNet: Delegate and Conquer for Efficient Temporal Grounding in Long Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Zijia and Iftekhar, A S M and Mittal, Gaurav and Meng, Tianjian and Wang, Xiawei and Zhao, Cheng and Kukkala, Rohith and Elhamifar, Ehsan and Chen, Mei}, title = {DeCafNet: Delegate and Conquer for Efficient Temporal Grounding in Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24066-24076} }
HoGS: Unified Near and Far Object Reconstruction via Homogeneous Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xinpeng and Huang, Zeyi and Okura, Fumio and Matsushita, Yasuyuki}, title = {HoGS: Unified Near and Far Object Reconstruction via Homogeneous Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26714-26722} }
SmartEraser: Remove Anything from Images using Masked-Region Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Longtao and Wang, Zhendong and Bao, Jianmin and Zhou, Wengang and Chen, Dongdong and Shi, Lei and Chen, Dong and Li, Houqiang}, title = {SmartEraser: Remove Anything from Images using Masked-Region Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24452-24462} }
Sample- and Parameter-Efficient Auto-Regressive Image Models-
[pdf]
[supp]
[bibtex]@InProceedings{Amrani_2025_CVPR, author = {Amrani, Elad and Karlinsky, Leonid and Bronstein, Alex}, title = {Sample- and Parameter-Efficient Auto-Regressive Image Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30127-30136} }
Robust Audio-Visual Segmentation via Audio-Guided Visual Convergent Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Chen and Li, Peike and Yang, Liying and Wang, Dadong and Li, Lincheng and Yu, Xin}, title = {Robust Audio-Visual Segmentation via Audio-Guided Visual Convergent Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28922-28931} }
BiLoRA: Almost-Orthogonal Parameter Spaces for Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Hao and Zhang, Yifei and Dong, Junhao and Koniusz, Piotr}, title = {BiLoRA: Almost-Orthogonal Parameter Spaces for Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25613-25622} }
Vid2Sim: Generalizable, Video-based Reconstruction of Appearance, Geometry and Physics for Mesh-free Simulation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Chuhao and Dou, Zhiyang and Wang, Chen and Huang, Yiming and Chen, Anjun and Feng, Qiao and Gu, Jiatao and Liu, Lingjie}, title = {Vid2Sim: Generalizable, Video-based Reconstruction of Appearance, Geometry and Physics for Mesh-free Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26545-26555} }
SceneTAP: Scene-Coherent Typographic Adversarial Planner against Vision-Language Models in Real-World Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Yue and Xing, Yun and Zhang, Jie and Lin, Di and Zhang, Tianwei and Tsang, Ivor and Liu, Yang and Guo, Qing}, title = {SceneTAP: Scene-Coherent Typographic Adversarial Planner against Vision-Language Models in Real-World Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25050-25059} }
Collaborative Decoding Makes Visual Auto-Regressive Modeling Efficient-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zigeng and Ma, Xinyin and Fang, Gongfan and Wang, Xinchao}, title = {Collaborative Decoding Makes Visual Auto-Regressive Modeling Efficient}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23334-23344} }
AerialMegaDepth: Learning Aerial-Ground Reconstruction and View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vuong_2025_CVPR, author = {Vuong, Khiem and Ghosh, Anurag and Ramanan, Deva and Narasimhan, Srinivasa and Tulsiani, Shubham}, title = {AerialMegaDepth: Learning Aerial-Ground Reconstruction and View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21674-21684} }
Visual Representation Learning through Causal Intervention for Controllable Image Editing-
[pdf]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Shanshan and Li, Haoxuan and Zheng, Chunyuan and Wang, Lei and Liao, Guorui and Gong, Zhili and Yang, Huayi and Liu, Li}, title = {Visual Representation Learning through Causal Intervention for Controllable Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23484-23493} }
Exploring the Deep Fusion of Large Language Models and Diffusion Transformers for Text-to-Image Synthesis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Bingda and Zheng, Boyang and Paul, Sayak and Xie, Saining}, title = {Exploring the Deep Fusion of Large Language Models and Diffusion Transformers for Text-to-Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28586-28595} }
A Comprehensive Study of Decoder-Only LLMs for Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Andrew Z. and Ge, Songwei and Karras, Tero and Liu, Ming-Yu and Balaji, Yogesh}, title = {A Comprehensive Study of Decoder-Only LLMs for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28575-28585} }
Deformable Radial Kernel Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yi-Hua and Lin, Ming-Xian and Sun, Yang-Tian and Yang, Ziyi and Lyu, Xiaoyang and Cao, Yan-Pei and Qi, Xiaojuan}, title = {Deformable Radial Kernel Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21513-21523} }
Bayesian Prompt Flow Learning for Zero-Shot Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Zhen and Tao, Xian and Gong, Xinyi and Qu, ShiChen and Chen, Qiyu and Zhang, Zhengtao and Wang, Xingang and Ding, Guiguang}, title = {Bayesian Prompt Flow Learning for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30398-30408} }
HalLoc: Token-level Localization of Hallucinations for Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Eunkyu and Kim, Minyeong and Kim, Gunhee}, title = {HalLoc: Token-level Localization of Hallucinations for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29893-29903} }
DiffPortrait360: Consistent Portrait Diffusion for 360 View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Yuming and Tran, Phong and Zheng, Yujian and Xu, Hongyi and Li, Heyuan and Karmanov, Adilbek and Li, Hao}, title = {DiffPortrait360: Consistent Portrait Diffusion for 360 View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26263-26273} }
SURGEON: Memory-Adaptive Fully Test-Time Adaptation via Dynamic Activation Sparsity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Ke and Tang, Jiaqi and Guo, Bin and Dang, Fan and Liu, Sicong and Zhu, Zhui and Wu, Lei and Fang, Cheng and Chen, Ying-Cong and Yu, Zhiwen and Liu, Yunhao}, title = {SURGEON: Memory-Adaptive Fully Test-Time Adaptation via Dynamic Activation Sparsity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30514-30523} }
From Slow Bidirectional to Fast Autoregressive Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Tianwei and Zhang, Qiang and Zhang, Richard and Freeman, William T. and Durand, Fredo and Shechtman, Eli and Huang, Xun}, title = {From Slow Bidirectional to Fast Autoregressive Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22963-22974} }
Noise Diffusion for Enhancing Semantic Faithfulness in Text-to-Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miao_2025_CVPR, author = {Miao, Boming and Li, Chunxiao and Wang, Xiaoxiao and Zhang, Andi and Sun, Rui and Wang, Zizhe and Zhu, Yao}, title = {Noise Diffusion for Enhancing Semantic Faithfulness in Text-to-Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23575-23584} }
MonoInstance: Enhancing Monocular Priors via Multi-view Instance Alignment for Neural Rendering and Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Wenyuan and Yang, Yixiao and Huang, Han and Han, Liang and Shi, Kanle and Liu, Yu-Shen and Han, Zhizhong}, title = {MonoInstance: Enhancing Monocular Priors via Multi-view Instance Alignment for Neural Rendering and Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21642-21653} }
CAT4D: Create Anything in 4D with Multi-View Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Rundi and Gao, Ruiqi and Poole, Ben and Trevithick, Alex and Zheng, Changxi and Barron, Jonathan T. and Holynski, Aleksander}, title = {CAT4D: Create Anything in 4D with Multi-View Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26057-26068} }
Exploring Semantic Feature Discrimination for Perceptual Image Super-Resolution and Opinion-Unaware No-Reference Image Quality Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Guanglu and Liao, Xiangyu and Li, Mingyang and Guo, Guihuan and Ren, Chao}, title = {Exploring Semantic Feature Discrimination for Perceptual Image Super-Resolution and Opinion-Unaware No-Reference Image Quality Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28176-28187} }
Distilling Long-tailed Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zhenghao and Wang, Haoxuan and Shang, Yuzhang and Wang, Kai and Yan, Yan}, title = {Distilling Long-tailed Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30609-30618} }
Gaze-LLE: Gaze Target Estimation via Large-Scale Learned Encoders-
[pdf]
[supp]
[bibtex]@InProceedings{Ryan_2025_CVPR, author = {Ryan, Fiona and Bati, Ajay and Lee, Sangmin and Bolya, Daniel and Hoffman, Judy and Rehg, James M.}, title = {Gaze-LLE: Gaze Target Estimation via Large-Scale Learned Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28874-28884} }
Incorporating Dense Knowledge Alignment into Unified Multimodal Representation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Cui_2025_CVPR, author = {Cui, Yuhao and Zu, Xinxing and Zhang, Wenhua and Zhao, Zhongzhou and Gao, Jinyang}, title = {Incorporating Dense Knowledge Alignment into Unified Multimodal Representation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29733-29743} }
Boost Your Human Image Generation Model via Direct Preference Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Na_2025_CVPR, author = {Na, Sanghyeon and Kim, Yonggyu and Lee, Hyunjoon}, title = {Boost Your Human Image Generation Model via Direct Preference Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23551-23562} }
Learning to Highlight Audio by Watching Movies-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Chao and Gao, Ruohan and Tsang, J. M. F. and Kurcius, Jan and Bilen, Cagdas and Xu, Chenliang and Kumar, Anurag and Parekh, Sanjeel}, title = {Learning to Highlight Audio by Watching Movies}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23925-23935} }
Unified Uncertainty-Aware Diffusion for Multi-Agent Trajectory Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Capellera_2025_CVPR, author = {Capellera, Guillem and Rubio, Antonio and Ferraz, Luis and Agudo, Antonio}, title = {Unified Uncertainty-Aware Diffusion for Multi-Agent Trajectory Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22476-22486} }
WeGen: A Unified Model for Interactive Multimodal Generation as We Chat-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zhipeng and Zhuang, Shaobin and Fu, Canmiao and Yang, Binxin and Zhang, Ying and Sun, Chong and Zhang, Zhizheng and Wang, Yali and Li, Chen and Zha, Zheng-Jun}, title = {WeGen: A Unified Model for Interactive Multimodal Generation as We Chat}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23679-23689} }
HRAvatar: High-Quality and Relightable Gaussian Head Avatar-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Dongbin and Liu, Yunfei and Lin, Lijian and Zhu, Ye and Chen, Kangjie and Qin, Minghan and Li, Yu and Wang, Haoqian}, title = {HRAvatar: High-Quality and Relightable Gaussian Head Avatar}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26285-26296} }
A Distractor-Aware Memory for Visual Object Tracking with SAM2-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Videnovic_2025_CVPR, author = {Videnovic, Jovana and Lukezic, Alan and Kristan, Matej}, title = {A Distractor-Aware Memory for Visual Object Tracking with SAM2}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24255-24264} }
Activating Sparse Part Concepts for 3D Class Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Zhenya and Xiao, Jun and Liu, Lupeng and Jiang, Haiyong}, title = {Activating Sparse Part Concepts for 3D Class Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30343-30353} }
ProxyTransformation: Preshaping Point Cloud Manifold With Proxy Attention For 3D Visual Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Qihang and Zheng, Henry and Huang, Gao}, title = {ProxyTransformation: Preshaping Point Cloud Manifold With Proxy Attention For 3D Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24582-24592} }
BFANet: Revisiting 3D Semantic Segmentation with Boundary Feature Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Weiguang and Zhang, Rui and Wang, Qiufeng and Cheng, Guangliang and Huang, Kaizhu}, title = {BFANet: Revisiting 3D Semantic Segmentation with Boundary Feature Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29395-29405} }
Beyond Words: Augmenting Discriminative Richness via Diffusions in Unsupervised Prompt Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Hairui and Tang, Fan and Zhao, He and Wang, Zixuan and Guo, Dandan and Chang, Yi}, title = {Beyond Words: Augmenting Discriminative Richness via Diffusions in Unsupervised Prompt Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25135-25144} }
Unlocking the Potential of Unlabeled Data in Semi-Supervised Domain Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Dongkwan and Hwang, Kyomin and Kwak, Nojun}, title = {Unlocking the Potential of Unlabeled Data in Semi-Supervised Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30599-30608} }
Steering Away from Harm: An Adaptive Approach to Defending Vision Language Model Against Jailbreaks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Han and Wang, Gang and Zhang, Huan}, title = {Steering Away from Harm: An Adaptive Approach to Defending Vision Language Model Against Jailbreaks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29947-29957} }
Neural LightRig: Unlocking Accurate Object Normal and Material Estimation with Multi-Light Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Zexin and Wang, Tengfei and Huang, Xin and Pan, Xingang and Liu, Ziwei}, title = {Neural LightRig: Unlocking Accurate Object Normal and Material Estimation with Multi-Light Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26514-26524} }
Towards Natural Language-Based Document Image Retrieval: New Dataset and Benchmark-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Hao and Qin, Xugong and Yang, Jun Jie Ou and Zhang, Peng and Zeng, Gangyan and Li, Yubo and Lin, Hailun}, title = {Towards Natural Language-Based Document Image Retrieval: New Dataset and Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29722-29732} }
Mitigating Ambiguities in 3D Classification with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Ruiqi and Zhu, Hao and Zhao, Jingyi and Zhang, Qi and Cao, Xun and Ma, Zhan}, title = {Mitigating Ambiguities in 3D Classification with Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27275-27284} }
DH-Set: Improving Vision-Language Alignment with Diverse and Hybrid Set-Embeddings Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Kun and Li, Jingyu and Li, Zhe and Zhou, S.Kevin}, title = {DH-Set: Improving Vision-Language Alignment with Diverse and Hybrid Set-Embeddings Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24993-25003} }
Unveil Inversion and Invariance in Flow Transformer for Versatile Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Pengcheng and Jiang, Boyuan and Hu, Xiaobin and Luo, Donghao and He, Qingdong and Zhang, Jiangning and Wang, Chengjie and Wu, Yunsheng and Ling, Charles and Wang, Boyu}, title = {Unveil Inversion and Invariance in Flow Transformer for Versatile Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28479-28489} }
DyCON: Dynamic Uncertainty-aware Consistency and Contrastive Learning for Semi-supervised Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Assefa_2025_CVPR, author = {Assefa, Maregu and Naseer, Muzammal and Ganapathi, Iyyakutti Iyappan and Ali, Syed Sadaf and Seghier, Mohamed L and Werghi, Naoufel}, title = {DyCON: Dynamic Uncertainty-aware Consistency and Contrastive Learning for Semi-supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30850-30860} }
DUNE: Distilling a Universal Encoder from Heterogeneous 2D and 3D Teachers-
[pdf]
[supp]
[bibtex]@InProceedings{Sariyildiz_2025_CVPR, author = {Sar{\i}y{\i}ld{\i}z, Mert B\"ulent and Weinzaepfel, Philippe and Lucas, Thomas and de Jorge, Pau and Larlus, Diane and Kalantidis, Yannis}, title = {DUNE: Distilling a Universal Encoder from Heterogeneous 2D and 3D Teachers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30084-30094} }
Black Hole-Driven Identity Absorbing in Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Shaheryar_2025_CVPR, author = {Shaheryar, Muhammad and Lee, Jong Taek and Jung, Soon Ki}, title = {Black Hole-Driven Identity Absorbing in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28544-28554} }
HiRes-LLaVA: Restoring Fragmentation Input in High-Resolution Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Runhui and Ding, Xinpeng and Wang, Chunwei and Han, Jianhua and Liu, Yulong and Zhao, Hengshuang and Xu, Hang and Hou, Lu and Zhang, Wei and Liang, Xiaodan}, title = {HiRes-LLaVA: Restoring Fragmentation Input in High-Resolution Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29814-29824} }
Hallo3: Highly Dynamic and Realistic Portrait Image Animation with Video Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2025_CVPR, author = {Cui, Jiahao and Li, Hui and Zhan, Yun and Shang, Hanlin and Cheng, Kaihui and Ma, Yuqi and Mu, Shan and Zhou, Hang and Wang, Jingdong and Zhu, Siyu}, title = {Hallo3: Highly Dynamic and Realistic Portrait Image Animation with Video Diffusion Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21086-21095} }
SeqMvRL: A Sequential Fusion Framework for Multi-view Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ren and Sun, Haoliang and Lin, Yuxiu and Zuo, Chuanhui and Gong, Yongshun and Yin, Yilong and Meng, Wenjia}, title = {SeqMvRL: A Sequential Fusion Framework for Multi-view Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25822-25831} }
BadToken: Token-level Backdoor Attacks to Multi-modal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Zenghui and Shi, Jiawen and Zhou, Pan and Gong, Neil Zhenqiang and Sun, Lichao}, title = {BadToken: Token-level Backdoor Attacks to Multi-modal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29927-29936} }
VLMs-Guided Representation Distillation for Efficient Vision-Based Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Haoran and Peng, Peixi and Tan, Guang and Chang, Yiqian and Li, Luntong and Tian, Yonghong}, title = {VLMs-Guided Representation Distillation for Efficient Vision-Based Reinforcement Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29534-29544} }
NeISF++: Neural Incident Stokes Field for Polarized Inverse Rendering of Conductors and Dielectrics-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Chenhao and Ono, Taishi and Uemori, Takeshi and Nitta, Sho and Mihara, Hajime and Gatto, Alexander and Nagahara, Hajime and Moriuchi, Yusuke}, title = {NeISF++: Neural Incident Stokes Field for Polarized Inverse Rendering of Conductors and Dielectrics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26493-26503} }
Non-Natural Image Understanding with Advancing Frequency-based Vision Encoders-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Wang and Wang, QingSong and Feng, Yueying and Wang, Shulei and Jin, Tao and Zhao, Zhou and Wu, Fei and Yao, Chang and Chen, Jingyuan}, title = {Non-Natural Image Understanding with Advancing Frequency-based Vision Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29756-29766} }
Generative Multimodal Pretraining with Discrete Diffusion Timestep Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2025_CVPR, author = {Pan, Kaihang and Lin, Wang and Yue, Zhongqi and Ao, Tenglong and Jia, Liyu and Zhao, Wei and Li, Juncheng and Tang, Siliang and Zhang, Hanwang}, title = {Generative Multimodal Pretraining with Discrete Diffusion Timestep Tokens}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26136-26146} }
SplatFlow: Self-Supervised Dynamic Gaussian Splatting in Neural Motion Flow Field for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Su and Zhao, Cheng and Sun, Zhuoyang and Chen, Yingjie Victor and Chen, Mei}, title = {SplatFlow: Self-Supervised Dynamic Gaussian Splatting in Neural Motion Flow Field for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27487-27496} }
AesthetiQ: Enhancing Graphic Layout Design via Aesthetic-Aware Preference Alignment of Multi-modal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Patnaik_2025_CVPR, author = {Patnaik, Sohan and Jain, Rishabh and Krishnamurthy, Balaji and Sarkar, Mausoom}, title = {AesthetiQ: Enhancing Graphic Layout Design via Aesthetic-Aware Preference Alignment of Multi-modal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23701-23711} }
FINECAPTION: Compositional Image Captioning Focusing on Wherever You Want at Any Granularity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hua_2025_CVPR, author = {Hua, Hang and Liu, Qing and Zhang, Lingzhi and Shi, Jing and Kim, Soo Ye and Zhang, Zhifei and Wang, Yilin and Zhang, Jianming and Lin, Zhe and Luo, Jiebo}, title = {FINECAPTION: Compositional Image Captioning Focusing on Wherever You Want at Any Granularity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24763-24773} }
Chebyshev Attention Depth Permutation Texture Network with Latent Texture Attribute Loss-
[pdf]
[supp]
[bibtex]@InProceedings{Evani_2025_CVPR, author = {Evani, Ravishankar and Rajan, Deepu and Mao, Shangbo}, title = {Chebyshev Attention Depth Permutation Texture Network with Latent Texture Attribute Loss}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23423-23432} }
Decentralized Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{McAllister_2025_CVPR, author = {McAllister, David and Tancik, Matthew and Song, Jiaming and Kanazawa, Angjoo}, title = {Decentralized Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23323-23333} }
AnyEdit: Mastering Unified High-Quality Image Editing for Any Idea-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Qifan and Chow, Wei and Yue, Zhongqi and Pan, Kaihang and Wu, Yang and Wan, Xiaoyang and Li, Juncheng and Tang, Siliang and Zhang, Hanwang and Zhuang, Yueting}, title = {AnyEdit: Mastering Unified High-Quality Image Editing for Any Idea}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26125-26135} }
DNF: Unconditional 4D Generation with Dictionary-based Neural Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xinyi and Li, Naiqi and Dai, Angela}, title = {DNF: Unconditional 4D Generation with Dictionary-based Neural Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26047-26056} }
ARM: Appearance Reconstruction Model for Relightable 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Xiang and Yu, Chang and Bi, Zoubin and Shang, Yintong and Gao, Feng and Wu, Hongzhi and Zhou, Kun and Jiang, Chenfanfu and Yang, Yin}, title = {ARM: Appearance Reconstruction Model for Relightable 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21425-21437} }
Ground-V: Teaching VLMs to Ground Complex Instructions in Pixels-
[pdf]
[supp]
[bibtex]@InProceedings{Zong_2025_CVPR, author = {Zong, Yongshuo and Zhang, Qin and An, Dongsheng and Li, Zhihua and Xu, Xiang and Xu, Linghan and Tu, Zhuowen and Xing, Yifan and Dabeer, Onkar}, title = {Ground-V: Teaching VLMs to Ground Complex Instructions in Pixels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24635-24645} }
TreeMeshGPT: Artistic Mesh Generation with Autoregressive Tree Sequencing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lionar_2025_CVPR, author = {Lionar, Stefan and Liang, Jiabin and Lee, Gim Hee}, title = {TreeMeshGPT: Artistic Mesh Generation with Autoregressive Tree Sequencing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26608-26617} }
Generating 3D-Consistent Videos from Unposed Internet Photos-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chou_2025_CVPR, author = {Chou, Gene and Zhang, Kai and Bi, Sai and Tan, Hao and Xu, Zexiang and Luan, Fujun and Hariharan, Bharath and Snavely, Noah}, title = {Generating 3D-Consistent Videos from Unposed Internet Photos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27934-27945} }
Parameter Efficient Mamba Tuning via Projector-targeted Diagonal-centric Linear Transformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ham_2025_CVPR, author = {Ham, Seokil and Kim, Hee-Seon and Woo, Sangmin and Kim, Changick}, title = {Parameter Efficient Mamba Tuning via Projector-targeted Diagonal-centric Linear Transformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30106-30115} }
ViUniT: Visual Unit Tests for More Robust Visual Programming-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Panagopoulou_2025_CVPR, author = {Panagopoulou, Artemis and Zhou, Honglu and Savarese, Silvio and Xiong, Caiming and Callison-Burch, Chris and Yatskar, Mark and Niebles, Juan Carlos}, title = {ViUniT: Visual Unit Tests for More Robust Visual Programming}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24646-24656} }
DualTalk: Dual-Speaker Interaction for 3D Talking Head Conversations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Ziqiao and Fan, Yanbo and Wu, Haoyu and Wang, Xuan and Liu, Hongyan and He, Jun and Fan, Zhaoxin}, title = {DualTalk: Dual-Speaker Interaction for 3D Talking Head Conversations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21055-21064} }
beta-FFT: Nonlinear Interpolation and Differentiated Training Strategies for Semi-Supervised Medical Image Segmentation-
[pdf]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Ming and Yin, Jianfu and Ma, Zhuangzhuang and Ma, Jianheng and Zhu, Feiyu and Wu, Bingbing and Wen, Ya and Wu, Meng and Hu, Cong and Hu, Bingliang and Wang, Quan}, title = {beta-FFT: Nonlinear Interpolation and Differentiated Training Strategies for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30839-30849} }
Dynamic Group Normalization: Spatio-Temporal Adaptation to Evolving Data Statistics-
[pdf]
[supp]
[bibtex]@InProceedings{Smadar_2025_CVPR, author = {Smadar, Yair and Hoogi, Assaf}, title = {Dynamic Group Normalization: Spatio-Temporal Adaptation to Evolving Data Statistics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30167-30177} }
SynerGen-VL: Towards Synergistic Image Understanding and Generation with Vision Experts and Token Folding-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Hao and Tian, Changyao and Shao, Jie and Zhu, Xizhou and Wang, Zhaokai and Zhu, Jinguo and Dou, Wenhan and Wang, Xiaogang and Li, Hongsheng and Lu, Lewei and Dai, Jifeng}, title = {SynerGen-VL: Towards Synergistic Image Understanding and Generation with Vision Experts and Token Folding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29767-29779} }
Uncertain Multimodal Intention and Emotion Understanding in the Wild-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Qu and Shi, Qinghongya and Wang, Tongxin and Ye, Mang}, title = {Uncertain Multimodal Intention and Emotion Understanding in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24700-24709} }
VidTwin: Video VAE with Decoupled Structure and Dynamics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuchi and Guo, Junliang and Xie, Xinyi and He, Tianyu and Sun, Xu and Bian, Jiang}, title = {VidTwin: Video VAE with Decoupled Structure and Dynamics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22922-22932} }
CL-LoRA: Continual Low-Rank Adaptation for Rehearsal-Free Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Jiangpeng and Duan, Zhihao and Zhu, Fengqing}, title = {CL-LoRA: Continual Low-Rank Adaptation for Rehearsal-Free Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30534-30544} }
Design2GarmentCode: Turning Design Concepts to Tangible Garments Through Program Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Feng and Liu, Ruiyang and Liu, Chen and He, Gaofeng and Li, Yong-Lu and Jin, Xiaogang and Wang, Huamin}, title = {Design2GarmentCode: Turning Design Concepts to Tangible Garments Through Program Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23712-23722} }
Efficient Dynamic Scene Editing via 4D Gaussian-based Static-Dynamic Separation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2025_CVPR, author = {Kwon, Joohyun and Cho, Hanbyel and Kim, Junmo}, title = {Efficient Dynamic Scene Editing via 4D Gaussian-based Static-Dynamic Separation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26855-26865} }
Unlearning through Knowledge Overwriting: Reversible Federated Unlearning via Selective Sparse Adapter-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_CVPR, author = {Zhong, Zhengyi and Bao, Weidong and Wang, Ji and Zhang, Shuai and Zhou, Jingxuan and Lyu, Lingjuan and Lim, Wei Yang Bryan}, title = {Unlearning through Knowledge Overwriting: Reversible Federated Unlearning via Selective Sparse Adapter}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30661-30670} }
SocialMOIF: Multi-Order Intention Fusion for Pedestrian Trajectory Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Kai and Zhao, Xiaodong and Huang, Yujie and Fang, Guoyu and Song, Xiao and Wang, Ruiping and Wang, Ziyuan}, title = {SocialMOIF: Multi-Order Intention Fusion for Pedestrian Trajectory Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22465-22475} }
HistoFS: Non-IID Histopathologic Whole Slide Image Classification via Federated Style Transfer with RoI-Preserving-
[pdf]
[supp]
[bibtex]@InProceedings{Raswa_2025_CVPR, author = {Raswa, Farchan Hakim and Lu, Chun-Shien and Wang, Jia-Ching}, title = {HistoFS: Non-IID Histopathologic Whole Slide Image Classification via Federated Style Transfer with RoI-Preserving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30251-30260} }
SGSST: Scaling Gaussian Splatting Style Transfer-
[pdf]
[supp]
[bibtex]@InProceedings{Galerne_2025_CVPR, author = {Galerne, Bruno and Wang, Jianling and Raad, Lara and Morel, Jean-Michel}, title = {SGSST: Scaling Gaussian Splatting Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26535-26544} }
Learning Bijective Surface Parameterization for Inferring Signed Distance Functions from Sparse Point Clouds with Grid Deformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Noda_2025_CVPR, author = {Noda, Takeshi and Chen, Chao and Zhou, Junsheng and Zhang, Weiqi and Liu, Yu-Shen and Han, Zhizhong}, title = {Learning Bijective Surface Parameterization for Inferring Signed Distance Functions from Sparse Point Clouds with Grid Deformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22139-22149} }
Balancing Two Classifiers via A Simplex ETF Structure for Model Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2025_CVPR, author = {Ni, Jiani and Zhao, He and Gao, Jintong and Guo, Dandan and Zha, Hongyuan}, title = {Balancing Two Classifiers via A Simplex ETF Structure for Model Calibration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30712-30721} }
DAMM-Diffusion: Learning Divergence-Aware Multi-Modal Diffusion Model for Nanoparticles Distribution Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Junjie and Wang, Shouju and Tang, Yuxia and Zhu, Qi and Zhang, Daoqiang and Shao, Wei}, title = {DAMM-Diffusion: Learning Divergence-Aware Multi-Modal Diffusion Model for Nanoparticles Distribution Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30886-30895} }
U-Know-DiffPAN: An Uncertainty-aware Knowledge Distillation Diffusion Framework with Details Enhancement for PAN-Sharpening-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Sungpyo and Do, Jeonghyeok and Lee, Jaehyup and Kim, Munchurl}, title = {U-Know-DiffPAN: An Uncertainty-aware Knowledge Distillation Diffusion Framework with Details Enhancement for PAN-Sharpening}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23069-23079} }
RelationField: Relate Anything in Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koch_2025_CVPR, author = {Koch, Sebastian and Wald, Johanna and Colosi, Mirco and Vaskevicius, Narunas and Hermosilla, Pedro and Tombari, Federico and Ropinski, Timo}, title = {RelationField: Relate Anything in Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21706-21716} }
Let Humanoids Hike! Integrative Skill Development on Complex Trails-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Kwan-Yee and Yu, Stella X.}, title = {Let Humanoids Hike! Integrative Skill Development on Complex Trails}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22498-22507} }
BF-STVSR: B-Splines and Fourier---Best Friends for High Fidelity Spatial-Temporal Video Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Eunjin and Kim, Hyeonjin and Jin, Kyong Hwan and Yoo, Jaejun}, title = {BF-STVSR: B-Splines and Fourier---Best Friends for High Fidelity Spatial-Temporal Video Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28009-28018} }
DIO: Decomposable Implicit 4D Occupancy-Flow World Model-
[pdf]
[supp]
[bibtex]@InProceedings{Diehl_2025_CVPR, author = {Diehl, Christopher and Sykora, Quinlan and Agro, Ben and Gilles, Thomas and Casas, Sergio and Urtasun, Raquel}, title = {DIO: Decomposable Implicit 4D Occupancy-Flow World Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27456-27466} }
SLADE: Shielding against Dual Exploits in Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Hossain_2025_CVPR, author = {Hossain, Md Zarif and Imteaj, Ahmed}, title = {SLADE: Shielding against Dual Exploits in Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24244-24254} }
Ego4o: Egocentric Human Motion Capture and Understanding from Multi-Modal Input-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jian and Dabral, Rishabh and Luvizon, Diogo and Cao, Zhe and Liu, Lingjie and Beeler, Thabo and Theobalt, Christian}, title = {Ego4o: Egocentric Human Motion Capture and Understanding from Multi-Modal Input}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22668-22679} }
FreePCA: Integrating Consistency Information across Long-short Frames in Training-free Long Video Generation via Principal Component Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2025_CVPR, author = {Tan, Jiangtong and Yu, Hu and Huang, Jie and Xiao, Jie and Zhao, Feng}, title = {FreePCA: Integrating Consistency Information across Long-short Frames in Training-free Long Video Generation via Principal Component Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27979-27988} }
Mind the Time: Temporally-Controlled Multi-Event Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Ziyi and Siarohin, Aliaksandr and Menapace, Willi and Skorokhodov, Ivan and Fang, Yuwei and Chordia, Varnith and Gilitschenski, Igor and Tulyakov, Sergey}, title = {Mind the Time: Temporally-Controlled Multi-Event Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23989-24000} }
Audio-Visual Semantic Graph Network for Audio-Visual Event Localization-
[pdf]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Liang and Li, Shuaiyong and Zhu, Yongqiang}, title = {Audio-Visual Semantic Graph Network for Audio-Visual Event Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23957-23966} }
Video Motion Transfer with Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pondaven_2025_CVPR, author = {Pondaven, Alexander and Siarohin, Aliaksandr and Tulyakov, Sergey and Torr, Philip and Pizzati, Fabio}, title = {Video Motion Transfer with Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22911-22921} }
Unified Reconstruction of Static and Dynamic Scenes from Events-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Qiyao and Duan, Peiqi and Lou, Hanyue and Teng, Minggui and Cai, Ziqi and Chen, Xu and Shi, Boxin}, title = {Unified Reconstruction of Static and Dynamic Scenes from Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27914-27923} }
Automatic Spectral Calibration of Hyperspectral Images: Method, Dataset and Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Zhuoran and You, Shaodi and Cheng, Cheng and Wei, Shikui}, title = {Automatic Spectral Calibration of Hyperspectral Images: Method, Dataset and Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28081-28090} }
Point-to-Region Loss for Semi-Supervised Point-Based Crowd Counting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Wei and Zhao, Chenyang and Chan, Antoni B.}, title = {Point-to-Region Loss for Semi-Supervised Point-Based Crowd Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29363-29373} }
Move-in-2D: 2D-Conditioned Human Motion Generation-
[pdf]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Hsin-Ping and Zhou, Yang and Wang, Jui-Hsien and Liu, Difan and Liu, Feng and Yang, Ming-Hsuan and Xu, Zhan}, title = {Move-in-2D: 2D-Conditioned Human Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22766-22775} }
MATCHA: Towards Matching Anything-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2025_CVPR, author = {Xue, Fei and Elflein, Sven and Leal-Taix\'e, Laura and Zhou, Qunjie}, title = {MATCHA: Towards Matching Anything}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27081-27091} }
CTRL-D: Controllable Dynamic 3D Scene Editing with Personalized 2D Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Kai and Wu, Chin-Hsuan and Gilitschenski, Igor}, title = {CTRL-D: Controllable Dynamic 3D Scene Editing with Personalized 2D Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26630-26640} }
Separation of Powers: On Segregating Knowledge from Observation in LLM-enabled Knowledge-based Visual Question Answering-
[pdf]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Zhen and Tao, Zhuo and Chen, Qi and Li, Liang and Qi, Yuankai and van den Hengel, Anton and Huang, Qingming}, title = {Separation of Powers: On Segregating Knowledge from Observation in LLM-enabled Knowledge-based Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24753-24762} }
SF2T: Self-supervised Fragment Finetuning of Video-LLMs for Fine-Grained Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Yangliu and Song, Zikai and Feng, Na and Luo, Yawei and Yu, Junqing and Chen, Yi-Ping Phoebe and Yang, Wei}, title = {SF2T: Self-supervised Fragment Finetuning of Video-LLMs for Fine-Grained Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29108-29117} }
Fitted Neural Lossless Image Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhe and Chen, Zhenzhong and Liu, Shan}, title = {Fitted Neural Lossless Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23249-23258} }
JarvisIR: Elevating Autonomous Driving Perception with Intelligent Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Yunlong and Lin, Zixu and Chen, Haoyu and Pan, Panwang and Li, Chenxin and Chen, Sixiang and Wen, Kairun and Jin, Yeying and Li, Wenbo and Ding, Xinghao}, title = {JarvisIR: Elevating Autonomous Driving Perception with Intelligent Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22369-22380} }
F-LMM: Grounding Frozen Large Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Size and Jin, Sheng and Zhang, Wenwei and Xu, Lumin and Liu, Wentao and Li, Wei and Loy, Chen Change}, title = {F-LMM: Grounding Frozen Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24710-24721} }
EntityErasure: Erasing Entity Cleanly via Amodal Entity Segmentation and Completion-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yixing and Zhang, Qing and Wang, Yitong and Nie, Yongwei and Zheng, Wei-Shi}, title = {EntityErasure: Erasing Entity Cleanly via Amodal Entity Segmentation and Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28274-28283} }
Joint Out-of-Distribution Filtering and Data Discovery Active Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Schmidt_2025_CVPR, author = {Schmidt, Sebastian and Schenk, Leonard and Schwinn, Leo and G\"unnemann, Stephan}, title = {Joint Out-of-Distribution Filtering and Data Discovery Active Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25677-25687} }
Finding Local Diffusion Schrodinger Bridge using Kolmogorov-Arnold Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2025_CVPR, author = {Qiu, Xingyu and Yang, Mengying and Ma, Xinghua and Li, Fanding and Liang, Dong and Luo, Gongning and Wang, Wei and Wang, Kuanquan and Li, Shuo}, title = {Finding Local Diffusion Schrodinger Bridge using Kolmogorov-Arnold Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23227-23236} }
CorrBEV: Multi-View 3D Object Detection by Correlation Learning with Multi-modal Prototypes-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2025_CVPR, author = {Xue, Ziteng and Guo, Mingzhe and Fan, Heng and Zhang, Shihui and Zhang, Zhipeng}, title = {CorrBEV: Multi-View 3D Object Detection by Correlation Learning with Multi-modal Prototypes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27413-27423} }
Completion as Enhancement: A Degradation-Aware Selective Image Guided Network for Depth Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Zhiqiang and Wang, Zhengxue and Wang, Kun and Li, Jun and Yang, Jian}, title = {Completion as Enhancement: A Degradation-Aware Selective Image Guided Network for Depth Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26943-26953} }
Around the World in 80 Timesteps: A Generative Approach to Global Visual Geolocation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dufour_2025_CVPR, author = {Dufour, Nicolas and Kalogeiton, Vicky and Picard, David and Landrieu, Loic}, title = {Around the World in 80 Timesteps: A Generative Approach to Global Visual Geolocation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23016-23026} }
Real-time High-fidelity Gaussian Human Avatars with Position-based Interpolation of Spatially Distributed MLPs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2025_CVPR, author = {Zhan, Youyi and Shao, Tianjia and Yang, Yin and Zhou, Kun}, title = {Real-time High-fidelity Gaussian Human Avatars with Position-based Interpolation of Spatially Distributed MLPs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26297-26307} }
RoboSense: Large-scale Dataset and Benchmark for Egocentric Robot Perception and Navigation in Crowded and Unstructured Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2025_CVPR, author = {Su, Haisheng and Song, Feixiang and Ma, Cong and Wu, Wei and Yan, Junchi}, title = {RoboSense: Large-scale Dataset and Benchmark for Egocentric Robot Perception and Navigation in Crowded and Unstructured Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27446-27455} }
DEFOM-Stereo: Depth Foundation Model Based Stereo Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Hualie and Lou, Zhiqiang and Ding, Laiyan and Xu, Rui and Tan, Minglang and Jiang, Wenjie and Huang, Rui}, title = {DEFOM-Stereo: Depth Foundation Model Based Stereo Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21857-21867} }
DiskVPS: Vanishing Point Detector via Hough Transform in a Disk Region-
[pdf]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Jianping}, title = {DiskVPS: Vanishing Point Detector via Hough Transform in a Disk Region}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27049-27058} }
Seeing Far and Clearly: Mitigating Hallucinations in MLLMs with Attention Causal Decoding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Feilong and Liu, Chengzhi and Xu, Zhongxing and Hu, Ming and Huang, Zile and Xue, Haochen and Chen, Ziyang and Peng, Zelin and Yang, Zhiwei and Zhou, Sijin and Li, Wenxue and Li, Yulong and Song, Wenxuan and Su, Shiyan and Feng, Wei and Su, Jionglong and Lin, Mingquan and Peng, Yifan and Cheng, Xuelian and Razzak, Imran and Ge, Zongyuan}, title = {Seeing Far and Clearly: Mitigating Hallucinations in MLLMs with Attention Causal Decoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26147-26159} }
Towards Autonomous Micromobility through Scalable Urban Simulation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Wayne and He, Honglin and Zhang, Chaoyuan and He, Jack and Zhao, Seth Z. and Gong, Ran and Li, Quanyi and Zhou, Bolei}, title = {Towards Autonomous Micromobility through Scalable Urban Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27553-27563} }
Language-Assisted Debiasing and Smoothing for Foundation Model-Based Semi-Supervised Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Na and Song, Xuemeng and Dong, Xue and Ghosh, Aashish Nikhil and Nie, Liqiang and Zimmermann, Roger}, title = {Language-Assisted Debiasing and Smoothing for Foundation Model-Based Semi-Supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25708-25717} }
EdgeMovingNet: Edge-preserving Point Cloud Reconstruction via Joint Geometry Features-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Xinran and Ji, Donghao and Li, Yuanqi and Xie, Junyuan and Guo, Jie and Guo, Yanwen}, title = {EdgeMovingNet: Edge-preserving Point Cloud Reconstruction via Joint Geometry Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22150-22160} }
Harnessing Frozen Unimodal Encoders for Flexible Multimodal Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maniparambil_2025_CVPR, author = {Maniparambil, Mayug and Akshulakov, Raiymbek and Djilali, Yasser Abdelaziz Dahou and Narayan, Sanath and Singh, Ankit and O'Connor, Noel E.}, title = {Harnessing Frozen Unimodal Encoders for Flexible Multimodal Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29847-29857} }
Feature Information Driven Position Gaussian Distribution Estimation for Tiny Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Bian_2025_CVPR, author = {Bian, Jinghao and Feng, Mingtao and Dong, Weisheng and Wu, Fangfang and Luo, Jianqiao and Wang, Yaonan and Shi, Guangming}, title = {Feature Information Driven Position Gaussian Distribution Estimation for Tiny Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30376-30386} }
Enhancing Diversity for Data-free Quantization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Kai and Zhuang, Zhihao and Zhang, Miao and Guo, Chenjuan and Shu, Yang and Yang, Bin}, title = {Enhancing Diversity for Data-free Quantization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20969-20978} }
From Alexnet to Transformers: Measuring the Non-linearity of Deep Neural Networks with Affine Optimal Transport-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bouniot_2025_CVPR, author = {Bouniot, Quentin and Redko, Ievgen and Mallasto, Anton and Laclau, Charlotte and Struckmeier, Oliver and Arndt, Karol and Heinonen, Markus and Kyrki, Ville and Kaski, Samuel}, title = {From Alexnet to Transformers: Measuring the Non-linearity of Deep Neural Networks with Affine Optimal Transport}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25250-25260} }
Prompt2Perturb (P2P): Text-Guided Diffusion-Based Adversarial Attack on Breast Ultrasound Images-
[pdf]
[arXiv]
[bibtex]@InProceedings{Medghalchi_2025_CVPR, author = {Medghalchi, Yasamin and Heidari, Moein and Allard, Clayton and Sigal, Leonid and Hacihaliloglu, Ilker}, title = {Prompt2Perturb (P2P): Text-Guided Diffusion-Based Adversarial Attack on Breast Ultrasound Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28564-28574} }
COAP: Memory-Efficient Training with Correlation-Aware Gradient Projection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Jinqi and Sang, Shen and Zhi, Tiancheng and Liu, Jing and Yan, Qing and Luo, Linjie and Yuan, Bo}, title = {COAP: Memory-Efficient Training with Correlation-Aware Gradient Projection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30116-30126} }
Gyro-based Neural Single Image Deblurring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Heemin and Rim, Jaesung and Lee, Seungyong and Baek, Seung-Hwan and Cho, Sunghyun}, title = {Gyro-based Neural Single Image Deblurring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23111-23120} }
Improved Monocular Depth Prediction Using Distance Transform Over Pre-semantic Contours with Self-supervised Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Hariat_2025_CVPR, author = {Hariat, Marwane and Manzanera, Antoine and Filliat, David}, title = {Improved Monocular Depth Prediction Using Distance Transform Over Pre-semantic Contours with Self-supervised Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21868-21879} }
Is this Generated Person Existed in Real-world? Fine-grained Detecting and Calibrating Abnormal Human-body-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zeqing and Ma, Qingyang and Wan, Wentao and Li, Haojie and Wang, Keze and Tian, Yonghong}, title = {Is this Generated Person Existed in Real-world? Fine-grained Detecting and Calibrating Abnormal Human-body}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21226-21237} }
Automated Generation of Challenging Multiple-Choice Questions for Vision Language Model Evaluation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yuhui and Su, Yuchang and Liu, Yiming and Wang, Xiaohan and Burgess, James and Sui, Elaine and Wang, Chenyu and Aklilu, Josiah and Lozano, Alejandro and Wei, Anjiang and Schmidt, Ludwig and Yeung-Levy, Serena}, title = {Automated Generation of Challenging Multiple-Choice Questions for Vision Language Model Evaluation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29580-29590} }
ROLL: Robust Noisy Pseudo-label Learning for Multi-View Clustering with Noisy Correspondence-
[pdf]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Yuan and Li, Yongxiang and Ren, Zhenwen and Duan, Guiduo and Peng, Dezhong and Hu, Peng}, title = {ROLL: Robust Noisy Pseudo-label Learning for Multi-View Clustering with Noisy Correspondence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30732-30741} }
Towards In-the-wild 3D Plane Reconstruction from a Single Image-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jiachen and Yu, Rui and Chen, Sili and Huang, Sharon X. and Guo, Hengkai}, title = {Towards In-the-wild 3D Plane Reconstruction from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27027-27037} }
PQPP: A Joint Benchmark for Text-to-Image Prompt and Query Performance Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Poesina_2025_CVPR, author = {Poesina, Eduard and Costache, Adriana Valentina and Chifu, Adrian-Gabriel and Mothe, Josiane and Ionescu, Radu Tudor}, title = {PQPP: A Joint Benchmark for Text-to-Image Prompt and Query Performance Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28651-28661} }
CheXwhatsApp: A Dataset for Exploring Challenges in the Diagnosis of Chest X-rays through Mobile Devices-
[pdf]
[supp]
[bibtex]@InProceedings{Antony_2025_CVPR, author = {Antony, Mariamma and Porana, Rajiv and Lathiya, Sahil M and Kakileti, Siva Teja and Bhattacharyya, Chiranjib}, title = {CheXwhatsApp: A Dataset for Exploring Challenges in the Diagnosis of Chest X-rays through Mobile Devices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25887-25896} }
Degradation-Aware Feature Perturbation for All-in-One Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Xiangpeng and Liao, Xiangyu and Liu, Xiao and Li, Meng and Ren, Chao}, title = {Degradation-Aware Feature Perturbation for All-in-One Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28165-28175} }
GenDeg: Diffusion-based Degradation Synthesis for Generalizable All-In-One Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rajagopalan_2025_CVPR, author = {Rajagopalan, Sudarshan and Nair, Nithin Gopalakrishnan and Paranjape, Jay N. and Patel, Vishal M.}, title = {GenDeg: Diffusion-based Degradation Synthesis for Generalizable All-In-One Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28144-28154} }
The Power of Context: How Multimodality Improves Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mei_2025_CVPR, author = {Mei, Kangfu and Talebi, Hossein and Ardakani, Mojtaba and Patel, Vishal M. and Milanfar, Peyman and Delbracio, Mauricio}, title = {The Power of Context: How Multimodality Improves Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23141-23152} }
Detect Any Mirrors: Boosting Learning Reliability on Large-Scale Unlabeled Data with an Iterative Data Engine-
[pdf]
[supp]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Zhaohu and Liu, Lihao and Yang, Yijun and Wang, Hongqiu and Ye, Tian and Chen, Sixiang and Li, Wenxue and Liu, Guang and Zhu, Lei}, title = {Detect Any Mirrors: Boosting Learning Reliability on Large-Scale Unlabeled Data with an Iterative Data Engine}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25476-25486} }
4D LangSplat: 4D Language Gaussian Splatting via Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Wanhua and Zhou, Renping and Zhou, Jiawei and Song, Yingwei and Herter, Johannes and Qin, Minghan and Huang, Gao and Pfister, Hanspeter}, title = {4D LangSplat: 4D Language Gaussian Splatting via Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22001-22011} }
MotionMap: Representing Multimodality in Human Pose Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hosseininejad_2025_CVPR, author = {Hosseininejad, Reyhaneh and Shukla, Megh and Saadatnejad, Saeed and Salzmann, Mathieu and Alahi, Alexandre}, title = {MotionMap: Representing Multimodality in Human Pose Forecasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22680-22689} }
Factored-NeuS: Reconstructing Surfaces, Illumination, and Materials of Possibly Glossy Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Yue and Fan, Ningjing and Skorokhodov, Ivan and Voynov, Oleg and Ignatyev, Savva and Burnaev, Evgeny and Wonka, Peter and Wang, Yiqun}, title = {Factored-NeuS: Reconstructing Surfaces, Illumination, and Materials of Possibly Glossy Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21317-21327} }
GaussianSpa: An "Optimizing-Sparsifying" Simplification Framework for Compact and High-Quality 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yangming and Jia, Wenqi and Niu, Wei and Yin, Miao}, title = {GaussianSpa: An ''Optimizing-Sparsifying'' Simplification Framework for Compact and High-Quality 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26673-26682} }
Navigating the Unseen: Zero-shot Scene Graph Generation via Capsule-Based Equivariant Features-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Wenhuan and JI, Yi and Zhu, Guiqian and Ying, Li and Liu, Chunping}, title = {Navigating the Unseen: Zero-shot Scene Graph Generation via Capsule-Based Equivariant Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29448-29457} }
VL-RewardBench: A Challenging Benchmark for Vision-Language Generative Reward Models-
[pdf]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Lei and Wei, Yuancheng and Xie, Zhihui and Yang, Xuqing and Song, Yifan and Wang, Peiyi and An, Chenxin and Liu, Tianyu and Li, Sujian and Lin, Bill Yuchen and Kong, Lingpeng and Liu, Qi}, title = {VL-RewardBench: A Challenging Benchmark for Vision-Language Generative Reward Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24657-24668} }
ASHiTA: Automatic Scene-grounded HIerarchical Task Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2025_CVPR, author = {Chang, Yun and Fermoselle, Leonor and Ta, Duy and Bucher, Bernadette and Carlone, Luca and Wang, Jiuguang}, title = {ASHiTA: Automatic Scene-grounded HIerarchical Task Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29458-29468} }
Discovering Fine-Grained Visual-Concept Relations by Disentangled Optimal Transport Concept Bottleneck Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Yan and Zeng, Zequn and Zhang, Hao and Ding, Yucheng and Wang, Yi and Wang, Zhengjue and Chen, Bo and Liu, Hongwei}, title = {Discovering Fine-Grained Visual-Concept Relations by Disentangled Optimal Transport Concept Bottleneck Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30199-30209} }
RoomTour3D: Geometry-Aware Video-Instruction Tuning for Embodied Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Mingfei and Ma, Liang and Zhumakhanova, Kamila and Radionova, Ekaterina and Zhang, Jingyi and Chang, Xiaojun and Liang, Xiaodan and Laptev, Ivan}, title = {RoomTour3D: Geometry-Aware Video-Instruction Tuning for Embodied Navigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27586-27596} }
Bringing CLIP to the Clinic: Dynamic Soft Labels and Negation-Aware Learning for Medical Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ko_2025_CVPR, author = {Ko, Hanbin and Park, Chang-Min}, title = {Bringing CLIP to the Clinic: Dynamic Soft Labels and Negation-Aware Learning for Medical Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25897-25906} }
A Semantic Knowledge Complementarity based Decoupling Framework for Semi-supervised Class-imbalanced Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zheng and Yin, Guanchun and Zhang, Bo and Liu, Wu and Zhou, Xiuzhuang and Wang, Wendong}, title = {A Semantic Knowledge Complementarity based Decoupling Framework for Semi-supervised Class-imbalanced Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25940-25949} }
FedBiP: Heterogeneous One-Shot Federated Learning with Personalized Latent Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Haokun and Li, Hang and Zhang, Yao and Bi, Jinhe and Zhang, Gengyuan and Zhang, Yueqi and Torr, Philip and Gu, Jindong and Krompass, Denis and Tresp, Volker}, title = {FedBiP: Heterogeneous One-Shot Federated Learning with Personalized Latent Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30440-30450} }
GCE-Pose: Global Context Enhancement for Category-level Object Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Weihang and XU, Hongli and Huang, Junwen and Jung, Hyunjun and Yu, Peter KT and Navab, Nassir and Busam, Benjamin}, title = {GCE-Pose: Global Context Enhancement for Category-level Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27154-27165} }
Learning from Neighbors: Category Extrapolation for Long-Tail Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shizhen and Wen, Xin and Liu, Jiahui and Ma, Chuofan and Yuan, Chunfeng and Qi, Xiaojuan}, title = {Learning from Neighbors: Category Extrapolation for Long-Tail Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30483-30492} }
Material Anything: Generating Materials for Any 3D Object via Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Xin and Wang, Tengfei and Liu, Ziwei and Wang, Qing}, title = {Material Anything: Generating Materials for Any 3D Object via Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26556-26565} }
ImagineFSL: Self-Supervised Pretraining Matters on Imagined Base Set for VLM-based Few-shot Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Haoyuan and Li, Xiaoou and Lv, Jiaming and Cheng, Xianjun and Wang, Qilong and Li, Peihua}, title = {ImagineFSL: Self-Supervised Pretraining Matters on Imagined Base Set for VLM-based Few-shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30020-30031} }
Continuous Locomotive Crowd Behavior Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Bae_2025_CVPR, author = {Bae, Inhwan and Lee, Junoh and Jeon, Hae-Gon}, title = {Continuous Locomotive Crowd Behavior Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22416-22431} }
Project-Probe-Aggregate: Efficient Fine-Tuning for Group Robustness-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Beier and Cui, Jiequan and Zhang, Hanwang and Zhang, Chi}, title = {Project-Probe-Aggregate: Efficient Fine-Tuning for Group Robustness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25487-25496} }
Implicit Bias Injection Attacks against Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Huayang and Jin, Xiangye and Miao, Jiaxu and Wu, Yu}, title = {Implicit Bias Injection Attacks against Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28779-28789} }
ROICtrl: Boosting Instance Control for Visual Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Yuchao and Zhou, Yipin and Ye, Yunfan and Nie, Yixin and Yu, Licheng and Ma, Pingchuan and Lin, Kevin Qinghong and Shou, Mike Zheng}, title = {ROICtrl: Boosting Instance Control for Visual Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23658-23667} }
Cropper: Vision-Language Model for Image Cropping through In-Context Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Seung Hyun and Jiang, Jijun and Xu, Yiran and Li, Zhuofang and Ke, Junjie and Li, Yinxiao and He, Junfeng and Hickson, Steven and Datsenko, Katie and Kim, Sangpil and Yang, Ming-Hsuan and Essa, Irfan and Yang, Feng}, title = {Cropper: Vision-Language Model for Image Cropping through In-Context Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30010-30019} }
ScaMo: Exploring the Scaling Law in Autoregressive Motion Generation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Shunlin and Wang, Jingbo and Lu, Zeyu and Chen, Ling-Hao and Dai, Wenxun and Dong, Junting and Dou, Zhiyang and Dai, Bo and Zhang, Ruimao}, title = {ScaMo: Exploring the Scaling Law in Autoregressive Motion Generation Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27872-27882} }
ICE: Intrinsic Concept Extraction from a Single Image via Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cendra_2025_CVPR, author = {Cendra, Fernando Julio and Han, Kai}, title = {ICE: Intrinsic Concept Extraction from a Single Image via Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23734-23743} }
ASIGN: An Anatomy-aware Spatial Imputation Graphic Network for 3D Spatial Transcriptomics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Junchao and Deng, Ruining and Yao, Tianyuan and Xiong, Juming and Qu, Chongyu and Guo, Junlin and Lu, Siqi and Yin, Mengmeng and Wang, Yu and Zhao, Shilin and Yang, Haichun and Huo, Yuankai}, title = {ASIGN: An Anatomy-aware Spatial Imputation Graphic Network for 3D Spatial Transcriptomics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30829-30838} }
MultiMorph: On-demand Atlas Construction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Abulnaga_2025_CVPR, author = {Abulnaga, S. Mazdak and Hoopes, Andrew and Dey, Neel and Hoffmann, Malte and Fischl, Bruce and Guttag, John and Dalca, Adrian}, title = {MultiMorph: On-demand Atlas Construction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30906-30917} }
Octopus: Alleviating Hallucination via Dynamic Contrastive Decoding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Suo_2025_CVPR, author = {Suo, Wei and Zhang, Lijun and Sun, Mengyang and Wu, Lin Yuanbo and Wang, Peng and Zhang, Yanning}, title = {Octopus: Alleviating Hallucination via Dynamic Contrastive Decoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29904-29914} }
Spiking Transformer: Introducing Accurate Addition-Only Spiking Self-Attention for Transformer-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Yufei and Liu, Xiaode and Chen, Yuanpei and Peng, Weihang and Zhang, Yuhan and Ma, Zhe}, title = {Spiking Transformer: Introducing Accurate Addition-Only Spiking Self-Attention for Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24398-24408} }
MOVIS: Enhancing Multi-Object Novel View Synthesis for Indoor Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Ruijie and Chen, Yixin and Ni, Junfeng and Jia, Baoxiong and Liu, Yu and Wan, Diwen and Zeng, Gang and Huang, Siyuan}, title = {MOVIS: Enhancing Multi-Object Novel View Synthesis for Indoor Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26767-26778} }
Symbolic Representation for Any-to-Any Generative Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Jiaqi and Zhu, Xiaoye and Wang, Yue and Liu, Tianyang and Chen, Xinhui and Chen, Ying and Leong, Chak Tou and Ke, Yifei and Liu, Joseph and Yuan, Yiwen and McAuley, Julian and Li, Li-jia}, title = {Symbolic Representation for Any-to-Any Generative Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27816-27826} }
Protecting Your Video Content: Disrupting Automated Video-based LLM Annotations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Haitong and Gao, Kuofeng and Bai, Yang and Li, Jinmin and Shan, Jinxiao and Dai, Tao and Xia, Shu-Tao}, title = {Protecting Your Video Content: Disrupting Automated Video-based LLM Annotations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24056-24065} }
MedUnifier: Unifying Vision-and-Language Pre-training on Medical Data with Vision Generation Task using Discrete Visual Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Ziyang and Yu, Yang and Chen, Yucheng and Yang, Xulei and Yeo, Si Yong}, title = {MedUnifier: Unifying Vision-and-Language Pre-training on Medical Data with Vision Generation Task using Discrete Visual Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29744-29755} }
ArticulatedGS: Self-supervised Digital Twin Modeling of Articulated Objects using 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Junfu and Xin, Yu and Liu, Gaoyi and Xu, Kai and Liu, Ligang and Hu, Ruizhen}, title = {ArticulatedGS: Self-supervised Digital Twin Modeling of Articulated Objects using 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27144-27153} }
Leveraging 3D Geometric Priors in 2D Rotation Symmetry Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2025_CVPR, author = {Seo, Ahyun and Cho, Minsu}, title = {Leveraging 3D Geometric Priors in 2D Rotation Symmetry Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22109-22118} }
Noise Calibration and Spatial-Frequency Interactive Network for STEM Image Enhancement-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Hesong and Wu, Ziqi and Shao, Ruiwen and Zhang, Tao and Fu, Ying}, title = {Noise Calibration and Spatial-Frequency Interactive Network for STEM Image Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21287-21296} }
Homogeneous Dynamics Space for Heterogeneous Humans-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xinpeng and Liang, Junxuan and Zhang, Chenshuo and Cai, Zixuan and Lu, Cewu and Li, Yong-Lu}, title = {Homogeneous Dynamics Space for Heterogeneous Humans}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27782-27793} }
TailedCore: Few-Shot Sampling for Unsupervised Long-Tail Noisy Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2025_CVPR, author = {Jung, Yoon Gyo and Park, Jaewoo and Yoon, Jaeho and Peng, Kuan-Chuan and Kim, Wonchul and Teoh, Andrew Beng Jin and Camps, Octavia}, title = {TailedCore: Few-Shot Sampling for Unsupervised Long-Tail Noisy Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25539-25548} }
Satellite Observations Guided Diffusion Model for Accurate Meteorological States at Arbitrary Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tu_2025_CVPR, author = {Tu, Siwei and Fei, Ben and Yang, Weidong and Ling, Fenghua and Chen, Hao and Liu, Zili and Chen, Kun and Fan, Hang and Ouyang, Wanli and Bai, Lei}, title = {Satellite Observations Guided Diffusion Model for Accurate Meteorological States at Arbitrary Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28071-28080} }
Reconstructing People, Places, and Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Muller_2025_CVPR, author = {M\"uller, Lea and Choi, Hongsuk and Zhang, Anthony and Yi, Brent and Malik, Jitendra and Kanazawa, Angjoo}, title = {Reconstructing People, Places, and Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21948-21958} }
InPO: Inversion Preference Optimization with Reparametrized DDIM for Efficient Diffusion Model Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Yunhong and Wang, Qichao and Cao, Hengyuan and Wang, Xierui and Xu, Xiaoyin and Zhang, Min}, title = {InPO: Inversion Preference Optimization with Reparametrized DDIM for Efficient Diffusion Model Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28629-28639} }
Identifying and Mitigating Spurious Correlation in Multi-Task Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Chai_2025_CVPR, author = {Chai, Junyi and Lu, Shenyu and Wang, Xiaoqian}, title = {Identifying and Mitigating Spurious Correlation in Multi-Task Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25698-25707} }
Immune: Improving Safety Against Jailbreaks in Multi-modal LLMs via Inference-Time Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghosal_2025_CVPR, author = {Ghosal, Soumya Suvra and Chakraborty, Souradip and Singh, Vaibhav and Guan, Tianrui and Wang, Mengdi and Beirami, Ahmad and Huang, Furong and Velasquez, Alvaro and Manocha, Dinesh and Bedi, Amrit Singh}, title = {Immune: Improving Safety Against Jailbreaks in Multi-modal LLMs via Inference-Time Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25038-25049} }
CustomKD: Customizing Large Vision Foundation for Edge Model Improvement via Knowledge Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Jungsoo and Das, Debasmit and Hayat, Munawar and Choi, Sungha and Hwang, Kyuwoong and Porikli, Fatih}, title = {CustomKD: Customizing Large Vision Foundation for Edge Model Improvement via Knowledge Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25176-25186} }
PMNI: Pose-free Multi-view Normal Integration for Reflective and Textureless Surface Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pei_2025_CVPR, author = {Pei, Mingzhi and Cao, Xu and Wang, Xiangyi and Guo, Heng and Ma, Zhanyu}, title = {PMNI: Pose-free Multi-view Normal Integration for Reflective and Textureless Surface Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26834-26843} }
LeanGaussian: Breaking Pixel or Point Cloud Correspondence in Modeling 3D Gaussians-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Jiamin and Liu, Kenkun and Gao, Han and Jiang, Xiaoke and Yao, Yuan and Zhang, Lei}, title = {LeanGaussian: Breaking Pixel or Point Cloud Correspondence in Modeling 3D Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26641-26651} }
Modeling Multiple Normal Action Representations for Error Detection in Procedural Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Wei-Jin and Li, Yuan-Ming and Xia, Zhi-Wei and Tang, Yu-Ming and Lin, Kun-Yu and Hu, Jian-Fang and Zheng, Wei-Shi}, title = {Modeling Multiple Normal Action Representations for Error Detection in Procedural Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27794-27804} }
A Unified Latent Schrodinger Bridge Diffusion Model for Unsupervised Anomaly Detection and Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Akshay_2025_CVPR, author = {Akshay, Shilhora and Narasimhan, Niveditha Lakshmi and George, Jacob and Balasubramanian, Vineeth N}, title = {A Unified Latent Schrodinger Bridge Diffusion Model for Unsupervised Anomaly Detection and Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25528-25538} }
MambaVision: A Hybrid Mamba-Transformer Vision Backbone-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hatamizadeh_2025_CVPR, author = {Hatamizadeh, Ali and Kautz, Jan}, title = {MambaVision: A Hybrid Mamba-Transformer Vision Backbone}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25261-25270} }
Multi-Label Prototype Visual Spatial Search for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2025_CVPR, author = {Duan, Songsong and Yang, Xi and Wang, Nannan}, title = {Multi-Label Prototype Visual Spatial Search for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30241-30250} }
Doppelgangers++: Improved Visual Disambiguation with Geometric 3D Features-
[pdf]
[bibtex]@InProceedings{Xiangli_2025_CVPR, author = {Xiangli, Yuanbo and Cai, Ruojin and Chen, Hanyu and Byrne, Jeffrey and Snavely, Noah}, title = {Doppelgangers++: Improved Visual Disambiguation with Geometric 3D Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27166-27175} }
Learnable Infinite Taylor Gaussian for Dynamic View Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Bingbing and Li, Yanyan and Xie, Rui and Xu, Bo and Dong, Haoye and Yao, Junfeng and Lee, Gim Hee}, title = {Learnable Infinite Taylor Gaussian for Dynamic View Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26844-26854} }
SaMam: Style-aware State Space Model for Arbitrary Image Style Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Hongda and Wang, Longguang and Zhang, Ye and Yu, Ziru and Guo, Yulan}, title = {SaMam: Style-aware State Space Model for Arbitrary Image Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28468-28478} }
Making Old Film Great Again: Degradation-aware State Space Model for Old Film Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2025_CVPR, author = {Mao, Yudong and Luo, Hao and Zhong, Zhiwei and Chen, Peilin and Zhang, Zhijiang and Wang, Shiqi}, title = {Making Old Film Great Again: Degradation-aware State Space Model for Old Film Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28039-28049} }
MP-SfM: Monocular Surface Priors for Robust Structure-from-Motion-
[pdf]
[supp]
[bibtex]@InProceedings{Pataki_2025_CVPR, author = {Pataki, Zador and Sarlin, Paul-Edouard and Sch\"onberger, Johannes L. and Pollefeys, Marc}, title = {MP-SfM: Monocular Surface Priors for Robust Structure-from-Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21891-21901} }
Deterministic-to-Stochastic Diverse Latent Feature Mapping for Human Motion Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hua_2025_CVPR, author = {Hua, Yu and Liu, Weiming and Xu, Gui and Hou, Yaqing and Ong, Yew-Soon and Zhang, Qiang}, title = {Deterministic-to-Stochastic Diverse Latent Feature Mapping for Human Motion Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22724-22734} }
CacheQuant: Comprehensively Accelerated Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xuewen and Li, Zhikai and Gu, Qingyi}, title = {CacheQuant: Comprehensively Accelerated Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23269-23280} }
Open-World Objectness Modeling Unifies Novel Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shan and Ni, Yao and Du, Jinhao and Xue, Yuan and Torr, Philip and Koniusz, Piotr and van den Hengel, Anton}, title = {Open-World Objectness Modeling Unifies Novel Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30332-30342} }
MotionPRO: Exploring the Role of Pressure in Human MoCap and Beyond-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Shenghao and Lu, Yi and Huang, Jiayi and Zhao, Jiayi and Zhang, He and Yu, Tao and Shen, Qiu and Cao, Xun}, title = {MotionPRO: Exploring the Role of Pressure in Human MoCap and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27760-27770} }
DiffVsgg: Diffusion-Driven Online Video Scene Graph Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Mu and Li, Liulei and Wang, Wenguan and Yang, Yi}, title = {DiffVsgg: Diffusion-Driven Online Video Scene Graph Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29161-29172} }
Towards Smart Point-and-Shoot Photography-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiawan and Zhou, Fei and Zhong, Zhipeng and Lin, Jiongzhi and Qiu, Guoping}, title = {Towards Smart Point-and-Shoot Photography}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28242-28251} }
Prototype-Based Image Prompting for Weakly Supervised Histopathological Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Qingchen and Fan, Lei and Pagnucco, Maurice and Song, Yang}, title = {Prototype-Based Image Prompting for Weakly Supervised Histopathological Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30271-30280} }
Mitigating the Human-Robot Domain Discrepancy in Visual Pre-training for Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jiaming and Ma, Teli and Lin, Kun-Yu and Wang, Zifan and Qiu, Ronghe and Liang, Junwei}, title = {Mitigating the Human-Robot Domain Discrepancy in Visual Pre-training for Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22551-22561} }
SpatialCLIP: Learning 3D-aware Image Representations from Spatially Discriminative Language-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zehan and Zhou, Sashuai and He, Shaoxuan and Huang, Haifeng and Yang, Lihe and Zhang, Ziang and Cheng, Xize and Ji, Shengpeng and Jin, Tao and Zhao, Hengshuang and Zhao, Zhou}, title = {SpatialCLIP: Learning 3D-aware Image Representations from Spatially Discriminative Language}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29656-29666} }
Mono2Stereo: A Benchmark and Empirical Study for Stereo Conversion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Songsong and Chen, Yuxin and Qi, Zhongang and Xie, Zeke and Wang, Yifan and Wang, Lijun and Shan, Ying and Lu, Huchuan}, title = {Mono2Stereo: A Benchmark and Empirical Study for Stereo Conversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21847-21856} }
SoftShadow: Leveraging Soft Masks for Penumbra-Aware Shadow Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xinrui and Guo, Lanqing and Wang, Xiyu and Huang, Siyu and Wen, Bihan}, title = {SoftShadow: Leveraging Soft Masks for Penumbra-Aware Shadow Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23217-23226} }
VTON-HandFit: Virtual Try-on for Arbitrary Hand Pose Guided by Hand Priors Embedding-
[pdf]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Yujie and Hu, Xiaobin and Jiang, Boyuan and Luo, Donghao and Peng, Xu and Wu, Kai and Xu, Chengming and Han, Wenhui and Jin, Taisong and Wang, Chengjie and Ji, Rongrong}, title = {VTON-HandFit: Virtual Try-on for Arbitrary Hand Pose Guided by Hand Priors Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22616-22626} }
Uni-Renderer: Unifying Rendering and Inverse Rendering Via Dual Stream Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zhifei and Xu, Tianshuo and Ge, Wenhang and Wu, Leyi and Yan, Dongyu and He, Jing and Wang, Luozhou and Zeng, Lu and Zhang, Shunsi and Chen, Ying-Cong}, title = {Uni-Renderer: Unifying Rendering and Inverse Rendering Via Dual Stream Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26504-26513} }
POSTA: A Go-to Framework for Customized Artistic Poster Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Haoyu and Xu, Xiaojie and Li, Wenbo and Ren, Jingjing and Ye, Tian and Liu, Songhua and Chen, Ying-Cong and Zhu, Lei and Wang, Xinchao}, title = {POSTA: A Go-to Framework for Customized Artistic Poster Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28694-28704} }
NSD-Imagery: A Benchmark Dataset for Extending fMRI Vision Decoding Methods to Mental Imagery-
[pdf]
[supp]
[bibtex]@InProceedings{Kneeland_2025_CVPR, author = {Kneeland, Reese and Scotti, Paul S. and St-Yves, Ghislain and Breedlove, Jesse and Kay, Kendrick and Naselaris, Thomas}, title = {NSD-Imagery: A Benchmark Dataset for Extending fMRI Vision Decoding Methods to Mental Imagery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28852-28862} }
VLsI: Verbalized Layers-to-Interactions from Large to Small Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Byung-Kwan and Hachiuma, Ryo and Wang, Yu-Chiang Frank and Ro, Yong Man and Wu, Yueh-Hua}, title = {VLsI: Verbalized Layers-to-Interactions from Large to Small Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29545-29557} }
Just Dance with pi! A Poly-modal Inductor for Weakly-supervised Video Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Majhi_2025_CVPR, author = {Majhi, Snehashis and D'Amicantonio, Giacomo and Dantcheva, Antitza and Kong, Quan and Garattoni, Lorenzo and Francesca, Gianpiero and Bondarev, Egor and Bremond, Francois}, title = {Just Dance with pi! A Poly-modal Inductor for Weakly-supervised Video Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24265-24274} }
Efficient Motion-Aware Video MLLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zijia and Huo, Yuqi and Yue, Tongtian and Guo, Longteng and Lu, Haoyu and Wang, Bingning and Chen, Weipeng and Liu, Jing}, title = {Efficient Motion-Aware Video MLLM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24159-24168} }
Zero-Shot 4D Lidar Panoptic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yushan and O\v{s}ep, Aljo\v{s}a and Leal-Taix\'e, Laura and Meinhardt, Tim}, title = {Zero-Shot 4D Lidar Panoptic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24506-24517} }
ADU: Adaptive Detection of Unknown Categories in Black-Box Domain Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Yushan and Li, Guowen and Liang, Haoyuan and Zheng, Juepeng and Ye, Zhiyu}, title = {ADU: Adaptive Detection of Unknown Categories in Black-Box Domain Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30588-30598} }
EmotiveTalk: Expressive Talking Head Generation through Audio Information Decoupling and Emotional Video Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Haotian and Weng, Yuzhe and Li, Yueyan and Guo, Zilu and Du, Jun and Niu, Shutong and Ma, Jiefeng and He, Shan and Wu, Xiaoyan and Hu, Qiming and Yin, Bing and Liu, Cong and Liu, Qingfeng}, title = {EmotiveTalk: Expressive Talking Head Generation through Audio Information Decoupling and Emotional Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26212-26221} }
Unsupervised Foundation Model-Agnostic Slide-Level Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lenz_2025_CVPR, author = {Lenz, Tim and Neidlinger, Peter and Ligero, Marta and W\"olflein, Georg and van Treeck, Marko and Kather, Jakob N.}, title = {Unsupervised Foundation Model-Agnostic Slide-Level Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30807-30817} }
UNIALIGN: Scaling Multimodal Alignment within One Unified Model-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Bo and Li, Liulei and Wang, Yujia and Liu, Huafeng and Yao, Yazhou and Wang, Wenguan}, title = {UNIALIGN: Scaling Multimodal Alignment within One Unified Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29644-29655} }
ShowHowTo: Generating Scene-Conditioned Step-by-Step Visual Instructions-
[pdf]
[supp]
[bibtex]@InProceedings{Soucek_2025_CVPR, author = {Sou\v{c}ek, Tom\'a\v{s} and Gatti, Prajwal and Wray, Michael and Laptev, Ivan and Damen, Dima and Sivic, Josef}, title = {ShowHowTo: Generating Scene-Conditioned Step-by-Step Visual Instructions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27435-27445} }
Exploration-Driven Generative Interactive Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Savov_2025_CVPR, author = {Savov, Nedko and Kazemi, Naser and Mahdi, Mohammad and Paudel, Danda Pani and Wang, Xi and Van Gool, Luc}, title = {Exploration-Driven Generative Interactive Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27597-27607} }
DreamText: High Fidelity Scene Text Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yibin and Zhang, Weizhong and Xu, Honghui and Jin, Cheng}, title = {DreamText: High Fidelity Scene Text Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28555-28563} }
ProKeR: A Kernel Perspective on Few-Shot Adaptation of Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bendou_2025_CVPR, author = {Bendou, Yassir and Ouasfi, Amine and Gripon, Vincent and Boukhayma, Adnane}, title = {ProKeR: A Kernel Perspective on Few-Shot Adaptation of Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25092-25102} }
MonoTAKD: Teaching Assistant Knowledge Distillation for Monocular 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Hou-I and Wu, Christine and Cheng, Jen-Hao and Chai, Wenhao and Wang, Shian-Yun and Liu, Gaowen and Latapie, Hugo and Wu, Jhih-Ciang and Hwang, Jenq-Neng and Shuai, Hong-Han and Cheng, Wen-Huang}, title = {MonoTAKD: Teaching Assistant Knowledge Distillation for Monocular 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22266-22275} }
Easy-editable Image Vectorization with Multi-layer Multi-scale Distributed Visual Feature Embedding-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Ye and Hu, Zhangli and Zhao, Zhongyin and Zhu, Yupeng and Shi, Yue and Xiong, Yuxuan and Ni, Bingbing}, title = {Easy-editable Image Vectorization with Multi-layer Multi-scale Distributed Visual Feature Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23345-23354} }
Acquire and then Adapt: Squeezing out Text-to-Image Model for Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Junyuan and Wu, Xinyi and Yang, Yongxing and Zhu, Congchao and Wang, Song and Wu, Zhenyao}, title = {Acquire and then Adapt: Squeezing out Text-to-Image Model for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23195-23206} }
Devils in Middle Layers of Large Vision-Language Models: Interpreting, Detecting and Mitigating Object Hallucinations via Attention Lens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Zhangqi and Chen, Junkai and Zhu, Beier and Luo, Tingjin and Shen, Yankun and Yang, Xu}, title = {Devils in Middle Layers of Large Vision-Language Models: Interpreting, Detecting and Mitigating Object Hallucinations via Attention Lens}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25004-25014} }
SpectroMotion: Dynamic 3D Reconstruction of Specular Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Cheng-De and Chang, Chen-Wei and Liu, Yi-Ruei and Lee, Jie-Ying and Huang, Jiun-Long and Tseng, Yu-Chee and Liu, Yu-Lun}, title = {SpectroMotion: Dynamic 3D Reconstruction of Specular Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21328-21338} }
VTON 360: High-Fidelity Virtual Try-On from Any Viewing Direction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Zijian and Ning, Yuwei and Qin, Yipeng and Wang, Guangrun and Yang, Sibei and Lin, Liang and Li, Guanbin}, title = {VTON 360: High-Fidelity Virtual Try-On from Any Viewing Direction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26388-26398} }
MVBoost: Boost 3D Reconstruction with Multi-View Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xiangyu and Zhang, Xiaomei and Ma, Zhiyuan and Zhu, Xiangyu and Lei, Zhen}, title = {MVBoost: Boost 3D Reconstruction with Multi-View Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21664-21673} }
Category-Agnostic Neural Object Rigging-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Guangzhao and Geng, Chen and Wu, Shangzhe and Wu, Jiajun}, title = {Category-Agnostic Neural Object Rigging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22078-22088} }
POPEN: Preference-Based Optimization and Ensemble for LVLM-Based Reasoning Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Lanyun and Chen, Tianrun and Xu, Qianxiong and Liu, Xuanyi and Ji, Deyi and Wu, Haiyang and Soh, De Wen and Liu, Jun}, title = {POPEN: Preference-Based Optimization and Ensemble for LVLM-Based Reasoning Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30231-30240} }
MMAudio: Taming Multimodal Joint Training for High-Quality Video-to-Audio Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Ho Kei and Ishii, Masato and Hayakawa, Akio and Shibuya, Takashi and Schwing, Alexander and Mitsufuji, Yuki}, title = {MMAudio: Taming Multimodal Joint Training for High-Quality Video-to-Audio Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28901-28911} }
Mimic In-Context Learning for Multimodal Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yuchu and Fu, Jiale and Hao, Chenduo and Hu, Xinting and Peng, Yingzhe and Geng, Xin and Yang, Xu}, title = {Mimic In-Context Learning for Multimodal Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29825-29835} }
Vision-Language Models Do Not Understand Negation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alhamoud_2025_CVPR, author = {Alhamoud, Kumail and Alshammari, Shaden and Tian, Yonglong and Li, Guohao and Torr, Philip H.S. and Kim, Yoon and Ghassemi, Marzyeh}, title = {Vision-Language Models Do Not Understand Negation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29612-29622} }
NexusGS: Sparse View Synthesis with Epipolar Depth Priors in 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Yulong and Jiang, Zicheng and He, Shengfeng and Sun, Yandu and Dong, Junyu and Zhang, Huaidong and Du, Yong}, title = {NexusGS: Sparse View Synthesis with Epipolar Depth Priors in 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26800-26809} }
HyperNet Fields: Efficiently Training Hypernetworks without Ground Truth by Learning Weight Trajectories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hedlin_2025_CVPR, author = {Hedlin, Eric and Hayat, Munawar and Porikli, Fatih and Yi, Kwang Moo and Mahajan, Shweta}, title = {HyperNet Fields: Efficiently Training Hypernetworks without Ground Truth by Learning Weight Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22129-22138} }
RICCARDO: Radar Hit Prediction and Convolution for Camera-Radar 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Long_2025_CVPR, author = {Long, Yunfei and Kumar, Abhinav and Liu, Xiaoming and Morris, Daniel}, title = {RICCARDO: Radar Hit Prediction and Convolution for Camera-Radar 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22276-22285} }
BLADE: Single-view Body Mesh Estimation through Accurate Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shengze and Li, Jiefeng and Li, Tianye and Yuan, Ye and Fuchs, Henry and Nagano, Koki and De Mello, Shalini and Stengel, Michael}, title = {BLADE: Single-view Body Mesh Estimation through Accurate Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21991-22000} }
MoEE: Mixture of Emotion Experts for Audio-Driven Portrait Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Huaize and Sun, Wenzhang and Di, Donglin and Sun, Shibo and Yang, Jiahui and Zou, Changqing and Bao, Hujun}, title = {MoEE: Mixture of Emotion Experts for Audio-Driven Portrait Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26222-26231} }
ReCap: Better Gaussian Relighting with Cross-Environment Captures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jingzhi and Wu, Zongwei and Zamfir, Eduard and Timofte, Radu}, title = {ReCap: Better Gaussian Relighting with Cross-Environment Captures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21307-21316} }
Vision-Language Embodiment for Monocular Depth Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinchang and Lu, Guoyu}, title = {Vision-Language Embodiment for Monocular Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29479-29489} }
Frequency Dynamic Convolution for Dense Image Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Linwei and Gu, Lin and Li, Liang and Yan, Chenggang and Fu, Ying}, title = {Frequency Dynamic Convolution for Dense Image Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30178-30188} }
IDEA: Inverted Text with Cooperative Deformable Aggregation for Multi-modal Object Re-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuhao and Lv, Yongfeng and Zhang, Pingping and Lu, Huchuan}, title = {IDEA: Inverted Text with Cooperative Deformable Aggregation for Multi-modal Object Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29701-29710} }
Consistency Posterior Sampling for Diverse Image Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Purohit_2025_CVPR, author = {Purohit, Vishal and Repasky, Matthew and Lu, Jianfeng and Qiu, Qiang and Xie, Yao and Cheng, Xiuyuan}, title = {Consistency Posterior Sampling for Diverse Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28327-28336} }
IMFine: 3D Inpainting via Geometry-guided Multi-view Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Zhihao and Huo, Dong and Zhou, Yuhongze and Min, Yan and Lu, Juwei and Zuo, Xinxin}, title = {IMFine: 3D Inpainting via Geometry-guided Multi-view Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26694-26703} }
DeepCompress-ViT: Rethinking Model Compression to Enhance Efficiency of Vision Transformers at the Edge-
[pdf]
[bibtex]@InProceedings{Ahmed_2025_CVPR, author = {Ahmed, Sabbir and Al Arafat, Abdullah and Najafi, Deniz and Mahmood, Akhlak and Rizve, Mamshad Nayeem and Al Nahian, Mohaiminul and Zhou, Ranyang and Angizi, Shaahin and Rakin, Adnan Siraj}, title = {DeepCompress-ViT: Rethinking Model Compression to Enhance Efficiency of Vision Transformers at the Edge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30147-30156} }
EvOcc: Accurate Semantic Occupancy for Automated Driving Using Evidence Theory-
[pdf]
[supp]
[bibtex]@InProceedings{Kalble_2025_CVPR, author = {K\"alble, Jonas and Wirges, Sascha and Tatarchenko, Maxim and Ilg, Eddy}, title = {EvOcc: Accurate Semantic Occupancy for Automated Driving Using Evidence Theory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27467-27476} }
Towards Continual Universal Segmentation-
[pdf]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Zihan and Wang, Zilei and Wang, Xu}, title = {Towards Continual Universal Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29417-29427} }
PGC: Physics-Based Gaussian Cloth from a Single Pose-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Michelle and Chiang, Matt Jen-Yuan and Santesteban, Igor and Sarafianos, Nikolaos and Chen, Hsiao-yu and Halimi, Oshri and Bo\v{z}i\v{c}, Alja\v{z} and Saito, Shunsuke and Wu, Jiajun and Liu, C. Karen and Stuyck, Tuur and Larionov, Egor}, title = {PGC: Physics-Based Gaussian Cloth from a Single Pose}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21215-21225} }
OFER: Occluded Face Expression Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Selvaraju_2025_CVPR, author = {Selvaraju, Pratheba and Abrevaya, Victoria Fernandez and Bolkart, Timo and Akkerman, Rick and Ding, Tianyu and Amjadi, Faezeh and Zharkov, Ilya}, title = {OFER: Occluded Face Expression Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26985-26995} }
Cubify Anything: Scaling Indoor 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lazarow_2025_CVPR, author = {Lazarow, Justin and Griffiths, David and Kohavi, Gefen and Crespo, Francisco and Dehghan, Afshin}, title = {Cubify Anything: Scaling Indoor 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22225-22233} }
DEAL: Data-Efficient Adversarial Learning for High-Quality Infrared Imaging-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhu and Wang, Zijun and Liu, Jinyuan and Meng, Fanqi and Ma, Long and Liu, Risheng}, title = {DEAL: Data-Efficient Adversarial Learning for High-Quality Infrared Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28198-28207} }
BimArt: A Unified Approach for the Synthesis of 3D Bimanual Interaction with Articulated Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Wanyue and Dabral, Rishabh and Golyanik, Vladislav and Choutas, Vasileios and Alvarado, Eduardo and Beeler, Thabo and Habermann, Marc and Theobalt, Christian}, title = {BimArt: A Unified Approach for the Synthesis of 3D Bimanual Interaction with Articulated Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27694-27705} }
CoSpace: Benchmarking Continuous Space Perception Ability for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yiqi and Wang, Ziyue and Zhang, Can and Li, Peng and Liu, Yang}, title = {CoSpace: Benchmarking Continuous Space Perception Ability for Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29569-29579} }
FreeTimeGS: Free Gaussian Primitives at Anytime Anywhere for Dynamic Scene Reconstruction-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yifan and Yang, Peishan and Xu, Zhen and Sun, Jiaming and Zhang, Zhanhua and Chen, Yong and Bao, Hujun and Peng, Sida and Zhou, Xiaowei}, title = {FreeTimeGS: Free Gaussian Primitives at Anytime Anywhere for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21750-21760} }
Show and Tell: Visually Explainable Deep Neural Nets via Spatially-Aware Concept Bottleneck Models-
[pdf]
[supp]
[bibtex]@InProceedings{Benou_2025_CVPR, author = {Benou, Itay and Raviv, Tammy Riklin}, title = {Show and Tell: Visually Explainable Deep Neural Nets via Spatially-Aware Concept Bottleneck Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30063-30072} }
Learning 4D Panoptic Scene Graph Generation from Rich 2D Visual Scene-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Shengqiong and Fei, Hao and Yang, Jingkang and Li, Xiangtai and Li, Juncheng and Zhang, Hanwang and Chua, Tat-seng}, title = {Learning 4D Panoptic Scene Graph Generation from Rich 2D Visual Scene}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24539-24549} }
Knowledge Bridger: Towards Training-Free Missing Modality Completion-
[pdf]
[supp]
[bibtex]@InProceedings{Ke_2025_CVPR, author = {Ke, Guanzhou and He, Shengfeng and Wang, Xiaoli and Wang, Bo and Chao, Guoqing and Zhang, Yuanyang and Xie, Yi and Su, Hexing}, title = {Knowledge Bridger: Towards Training-Free Missing Modality Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25864-25873} }
TexGarment: Consistent Garment UV Texture Generation via Efficient 3D Structure-Guided Diffusion Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jialun and Wu, Jinbo and Gao, Xiaobo and Hu, Jiakui and Xiong, Bojun and Liu, Xing and Zhao, Chen and Pei, Hongbin and Feng, Haocheng and Li, Yingying and Ding, Errui and Wang, Jingdong}, title = {TexGarment: Consistent Garment UV Texture Generation via Efficient 3D Structure-Guided Diffusion Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26566-26575} }
Semi-Supervised State-Space Model with Dynamic Stacking Filter for Real-World Video Deraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Shangquan and Ren, Wenqi and Zhou, Juxiang and Wang, Shu and Gan, Jianhou and Cao, Xiaochun}, title = {Semi-Supervised State-Space Model with Dynamic Stacking Filter for Real-World Video Deraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26114-26124} }
TIDE: Training Locally Interpretable Domain Generalization Models Enables Test-time Correction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Agarwal_2025_CVPR, author = {Agarwal, Aishwarya and Karanam, Srikrishna and Gandhi, Vineet}, title = {TIDE: Training Locally Interpretable Domain Generalization Models Enables Test-time Correction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30210-30220} }
VSNet: Focusing on the Linguistic Characteristics of Sign Language-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yuhao and Chen, Xinyue and Li, Hongkai and Pu, Xiaorong and Jin, Peng and Ren, Yazhou}, title = {VSNet: Focusing on the Linguistic Characteristics of Sign Language}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24320-24330} }
Learning to Sample Effective and Diverse Prompts for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yun_2025_CVPR, author = {Yun, Taeyoung and Zhang, Dinghuai and Park, Jinkyoo and Pan, Ling}, title = {Learning to Sample Effective and Diverse Prompts for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23625-23635} }
Multi-modal Medical Diagnosis via Large-small Model Collaboration-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Wanyi and Zhao, Zihua and Yao, Jiangchao and Zhang, Ya and Bu, Jiajun and Wang, Haishuai}, title = {Multi-modal Medical Diagnosis via Large-small Model Collaboration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30763-30773} }
Image Referenced Sketch Colorization Based on Animation Creation Workflow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Dingkun and Wang, Xinrui and Li, Zhuoru and Saito, Suguru and Iwasawa, Yusuke and Matsuo, Yutaka and Guo, Jiaxian}, title = {Image Referenced Sketch Colorization Based on Animation Creation Workflow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23391-23400} }
GaPT-DAR: Category-level Garments Pose Tracking via Integrated 2D Deformation and 3D Reconstruction-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Li and Xu, Mingliang and Wang, Jianan and Yu, Qiaojun and Yang, Lixin and Li, Yonglu and Lu, Cewu and Wang, Rujing and Liu, Liu}, title = {GaPT-DAR: Category-level Garments Pose Tracking via Integrated 2D Deformation and 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22638-22647} }
ProbPose: A Probabilistic Approach to 2D Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Purkrabek_2025_CVPR, author = {Purkrabek, Miroslav and Matas, Jiri}, title = {ProbPose: A Probabilistic Approach to 2D Human Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27124-27133} }
MIDI: Multi-Instance Diffusion for Single Image to 3D Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zehuan and Guo, Yuan-Chen and An, Xingqiao and Yang, Yunhan and Li, Yangguang and Zou, Zi-Xin and Liang, Ding and Liu, Xihui and Cao, Yan-Pei and Sheng, Lu}, title = {MIDI: Multi-Instance Diffusion for Single Image to 3D Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23646-23657} }
ABC-Former: Auxiliary Bimodal Cross-domain Transformer with Interactive Channel Attention for White Balance-
[pdf]
[supp]
[bibtex]@InProceedings{Chiu_2025_CVPR, author = {Chiu, Yu-Cheng and Chen, Guan-Rong and Chen, Zihao and Peng, Yan-Tsung}, title = {ABC-Former: Auxiliary Bimodal Cross-domain Transformer with Interactive Channel Attention for White Balance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21258-21266} }
Fingerprinting Denoising Diffusion Probabilistic Models-
[pdf]
[supp]
[bibtex]@InProceedings{Teng_2025_CVPR, author = {Teng, Huan and Quan, Yuhui and Wang, Chengyu and Huang, Jun and Ji, Hui}, title = {Fingerprinting Denoising Diffusion Probabilistic Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28811-28820} }
NightAdapter: Learning a Frequency Adapter for Generalizable Night-time Scene Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Bi_2025_CVPR, author = {Bi, Qi and Yi, Jingjun and Huang, Huimin and Zheng, Hao and Zhan, Haolan and Huang, Yawen and Li, Yuexiang and Wu, Xian and Zheng, Yefeng}, title = {NightAdapter: Learning a Frequency Adapter for Generalizable Night-time Scene Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23838-23849} }
UMFN: Unified Multi-Domain Face Normalization for Joint Cross-domain Prototype Learning and Heterogeneous Face Recognition-
[pdf]
[bibtex]@InProceedings{Pang_2025_CVPR, author = {Pang, Meng and Zhang, Wenjun and Zhou, Nanrun and Chen, Shengbo and Rao, Hong}, title = {UMFN: Unified Multi-Domain Face Normalization for Joint Cross-domain Prototype Learning and Heterogeneous Face Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29299-29308} }
LUCAS: Layered Universal Codec Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Di and Deng, Teng and Nam, Giljoo and Rong, Yu and Pidhorskyi, Stanislav and Li, Junxuan and Saragih, Jason and Metaxas, Dimitris N. and Cao, Chen}, title = {LUCAS: Layered Universal Codec Avatars}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21127-21137} }
D^3: Scaling Up Deepfake Detection by Learning from Discrepancy-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Yongqi and Qian, Zhihao and Zhu, Ye and Russakovsky, Olga and Wu, Yu}, title = {D{\textasciicircum}3: Scaling Up Deepfake Detection by Learning from Discrepancy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23850-23859} }
Jailbreaking the Non-Transferable Barrier via Test-Time Data Disguising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2025_CVPR, author = {Xiang, Yongli and Hong, Ziming and Yao, Lina and Wang, Dadong and Liu, Tongliang}, title = {Jailbreaking the Non-Transferable Barrier via Test-Time Data Disguising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30671-30681} }
3D-GRAND: A Million-Scale Dataset for 3D-LLMs with Better Grounding and Less Hallucination-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jianing and Chen, Xuweiyi and Madaan, Nikhil and Iyengar, Madhavan and Qian, Shengyi and Fouhey, David F. and Chai, Joyce}, title = {3D-GRAND: A Million-Scale Dataset for 3D-LLMs with Better Grounding and Less Hallucination}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29501-29512} }
Generative Zero-Shot Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Lan and Ao, Wei and Boddeti, Vishnu Naresh and Lim, Ser-Nam}, title = {Generative Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29690-29700} }
Towards Better Alignment: Training Diffusion Models with Reinforcement Learning Against Sparse Rewards-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Zijing and Zhang, Fengda and Chen, Long and Kuang, Kun and Li, Jiahui and Gao, Kaifeng and Xiao, Jun and Wang, Xin and Zhu, Wenwu}, title = {Towards Better Alignment: Training Diffusion Models with Reinforcement Learning Against Sparse Rewards}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23604-23614} }
Spatial457: A Diagnostic Benchmark for 6D Spatial Reasoning of Large Mutimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xingrui and Ma, Wufei and Zhang, Tiezheng and de Melo, Celso M and Chen, Jieneng and Yuille, Alan}, title = {Spatial457: A Diagnostic Benchmark for 6D Spatial Reasoning of Large Mutimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24669-24679} }
Omnidirectional Multi-Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Kai and Shi, Hao and Wu, Sheng and Teng, Fei and Duan, Mengfei and Huang, Chang and Wang, Yuhang and Wang, Kaiwei and Yang, Kailun}, title = {Omnidirectional Multi-Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21959-21969} }
Potential Field Based Deep Metric Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bhatnagar_2025_CVPR, author = {Bhatnagar, Shubhang and Ahuja, Narendra}, title = {Potential Field Based Deep Metric Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25549-25559} }
Enhancing Vision-Language Compositional Understanding with Multimodal Synthetic Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Haoxin and Li, Boyang}, title = {Enhancing Vision-Language Compositional Understanding with Multimodal Synthetic Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24849-24861} }
Directional Label Diffusion Model for Learning from Noisy Labels-
[pdf]
[supp]
[bibtex]@InProceedings{Hou_2025_CVPR, author = {Hou, Senyu and Jiang, Gaoxia and Zhang, Jia and Yang, Shangrong and Guo, Husheng and Guo, Yaqing and Wang, Wenjian}, title = {Directional Label Diffusion Model for Learning from Noisy Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25738-25748} }
Learning Endogenous Attention for Incremental Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Xiang and He, Yuhang and Li, Jingyuan and Wang, Qiang and Gong, Yihong}, title = {Learning Endogenous Attention for Incremental Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30354-30364} }
StarGen: A Spatiotemporal Autoregression Framework with Video Diffusion Model for Scalable and Controllable Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhai_2025_CVPR, author = {Zhai, Shangjin and Ye, Zhichao and Liu, Jialin and Xie, Weijian and Hu, Jiaqi and Peng, Zhen and Xue, Hua and Chen, Danpeng and Wang, Xiaomeng and Yang, Lei and Wang, Nan and Liu, Haomin and Zhang, Guofeng}, title = {StarGen: A Spatiotemporal Autoregression Framework with Video Diffusion Model for Scalable and Controllable Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26822-26833} }
HomoGen: Enhanced Video Inpainting via Homography Propagation and Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2025_CVPR, author = {Ding, Ding and Pan, Yueming and Feng, Ruoyu and Dai, Qi and Qiu, Kai and Bao, Jianmin and Luo, Chong and Chen, Zhenzhong}, title = {HomoGen: Enhanced Video Inpainting via Homography Propagation and Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22953-22962} }
Do ImageNet-trained Models Learn Shortcuts? The Impact of Frequency Shortcuts on Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shunxin and Veldhuis, Raymond and Strisciuglio, Nicola}, title = {Do ImageNet-trained Models Learn Shortcuts? The Impact of Frequency Shortcuts on Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25198-25207} }
HORP: Human-Object Relation Priors Guided HOI Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Geng_2025_CVPR, author = {Geng, Pei and Yang, Jian and Zhang, Shanshan}, title = {HORP: Human-Object Relation Priors Guided HOI Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25325-25335} }
Building a Mind Palace: Structuring Environment-Grounded Semantic Graphs for Effective Long Video Analysis with LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zeyi and Ji, Yuyang and Wang, Xiaofang and Mehta, Nikhil and Xiao, Tong and Lee, Donghyun and Vanvalkenburgh, Sigmund and Zha, Shengxin and Lai, Bolin and Yu, Licheng and Zhang, Ning and Lee, Yong Jae and Liu, Miao}, title = {Building a Mind Palace: Structuring Environment-Grounded Semantic Graphs for Effective Long Video Analysis with LLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24169-24179} }
Back