Papers
- Back
DynScene: Scalable Generation of Dynamic Robotic Manipulation Scenes for Embodied AI-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Sangmin and Park, Sungyong and Kim, Heewon}, title = {DynScene: Scalable Generation of Dynamic Robotic Manipulation Scenes for Embodied AI}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12166-12175} }
DiffLocks: Generating 3D Hair from a Single Image using Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rosu_2025_CVPR, author = {Rosu, Radu Alexandru and Wu, Keyu and Feng, Yao and Zheng, Youyi and Black, Michael J.}, title = {DiffLocks: Generating 3D Hair from a Single Image using Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10847-10857} }
Harnessing Frequency Spectrum Insights for Image Copyright Protection Against Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhenguang and Shuai, Chao and Fan, Shaojing and Dong, Ziping and Hu, Jinwu and Ba, Zhongjie and Ren, Kui}, title = {Harnessing Frequency Spectrum Insights for Image Copyright Protection Against Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18653-18662} }
IDEA-Bench: How Far are Generative Models from Professional Designing?-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Chen and Huang, Lianghua and Fang, Jingwu and Dou, Huanzhang and Wang, Wei and Wu, Zhi-Fan and Shi, Yupeng and Zhang, Junge and Zhao, Xin and Liu, Yu}, title = {IDEA-Bench: How Far are Generative Models from Professional Designing?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18541-18551} }
PhD: A ChatGPT-Prompted Visual Hallucination Evaluation Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jiazhen and Fu, Yuhan and Xie, Ruobing and Xie, Runquan and Sun, Xingwu and Lian, Fengzong and Kang, Zhanhui and Li, Xirong}, title = {PhD: A ChatGPT-Prompted Visual Hallucination Evaluation Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19857-19866} }
ClimbingCap: Multi-Modal Dataset and Method for Rock Climbing in World Coordinate-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Ming and Lin, Xincheng and Luo, Yuhua and Fan, Shuqi and Dai, Yudi and Zhong, Qixin and Zhong, Lincai and Ma, Yuexin and Xu, Lan and Wen, Chenglu and Shen, Siqi and Wang, Cheng}, title = {ClimbingCap: Multi-Modal Dataset and Method for Rock Climbing in World Coordinate}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12312-12323} }
A Bias-Free Training Paradigm for More General AI-generated Image Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guillaro_2025_CVPR, author = {Guillaro, Fabrizio and Zingarini, Giada and Usman, Ben and Sud, Avneesh and Cozzolino, Davide and Verdoliva, Luisa}, title = {A Bias-Free Training Paradigm for More General AI-generated Image Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18685-18694} }
FALCON: Fairness Learning via Contrastive Attention Approach to Continual Semantic Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Truong_2025_CVPR, author = {Truong, Thanh-Dat and Prabhu, Utsav and Raj, Bhiksha and Cothren, Jackson and Luu, Khoa}, title = {FALCON: Fairness Learning via Contrastive Attention Approach to Continual Semantic Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15065-15075} }
Certified Human Trajectory Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bahari_2025_CVPR, author = {Bahari, Mohammadhossein and Saadatnejad, Saeed and Farsangi, Amirhossein Askari and Moosavi-Dezfooli, Seyed-Mohsen and Alahi, Alexandre}, title = {Certified Human Trajectory Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12301-12311} }
Transformers without Normalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jiachen and Chen, Xinlei and He, Kaiming and LeCun, Yann and Liu, Zhuang}, title = {Transformers without Normalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14901-14911} }
HiPART: Hierarchical Pose AutoRegressive Transformer for Occluded 3D Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Hongwei and Li, Han and Dai, Wenrui and Zheng, Ziyang and Li, Chenglin and Zou, Junni and Xiong, Hongkai}, title = {HiPART: Hierarchical Pose AutoRegressive Transformer for Occluded 3D Human Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16807-16817} }
From Faces to Voices: Learning Hierarchical Representations for High-quality Video-to-Speech-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Ji-Hoon and Choi, Jeongsoo and Kim, Jaehun and Jung, Chaeyoung and Chung, Joon Son}, title = {From Faces to Voices: Learning Hierarchical Representations for High-quality Video-to-Speech}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15874-15884} }
DFM: Differentiable Feature Matching for Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Sheng and Wang, Yimi and Liu, Xudong and Yang, Yuguang and Wang, Runqi and Guo, Guodong and Doermann, David and Zhang, Baochang}, title = {DFM: Differentiable Feature Matching for Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15224-15233} }
PointSR: Self-Regularized Point Supervision for Drone-View Object Detection-
[pdf]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Weizhuo and Xi, Yue and Jia, Wenjing and Zhang, Zehao and Li, Fei and Liu, Xiangzeng and Miao, Qiguang}, title = {PointSR: Self-Regularized Point Supervision for Drone-View Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11707-11716} }
v-CLR: View-Consistent Learning for Open-World Instance Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chang-Bin and Ni, Jinhong and Zhong, Yujie and Han, Kai}, title = {v-CLR: View-Consistent Learning for Open-World Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20307-20317} }
Reloc3r: Large-Scale Training of Relative Camera Pose Regression for Generalizable, Fast, and Accurate Visual Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Siyan and Wang, Shuzhe and Liu, Shaohui and Cai, Lulu and Fan, Qingnan and Kannala, Juho and Yang, Yanchao}, title = {Reloc3r: Large-Scale Training of Relative Camera Pose Regression for Generalizable, Fast, and Accurate Visual Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16739-16752} }
Janus: Decoupling Visual Encoding for Unified Multimodal Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Chengyue and Chen, Xiaokang and Wu, Zhiyu and Ma, Yiyang and Liu, Xingchao and Pan, Zizheng and Liu, Wen and Xie, Zhenda and Yu, Xingkai and Ruan, Chong and Luo, Ping}, title = {Janus: Decoupling Visual Encoding for Unified Multimodal Understanding and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12966-12977} }
MagicArticulate: Make Your 3D Models Articulation-Ready-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Chaoyue and Zhang, Jianfeng and Li, Xiu and Yang, Fan and Chen, Yiwen and Xu, Zhongcong and Liew, Jun Hao and Guo, Xiaoyang and Liu, Fayao and Feng, Jiashi and Lin, Guosheng}, title = {MagicArticulate: Make Your 3D Models Articulation-Ready}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15998-16007} }
Dual Prompting Image Restoration with Diffusion Transformers-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kong_2025_CVPR, author = {Kong, Dehong and Li, Fan and Wang, Zhixin and Xu, Jiaqi and Pei, Renjing and Li, Wenbo and Ren, WenQi}, title = {Dual Prompting Image Restoration with Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12809-12819} }
DepthCues: Evaluating Monocular Depth Perception in Large Vision Models-
[pdf]
[supp]
[bibtex]@InProceedings{Danier_2025_CVPR, author = {Danier, Duolikun and Ayg\"un, Mehmet and Li, Changjian and Bilen, Hakan and Mac Aodha, Oisin}, title = {DepthCues: Evaluating Monocular Depth Perception in Large Vision Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20049-20059} }
SpecTRe-GS: Modeling Highly Specular Surfaces with Reflected Nearby Objects by Tracing Rays in 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Jiajun and Fei, Fan and Li, Zhihao and Tang, Xiao and Liu, Shiyong and Chen, Youyu and Huang, Binxiao and Chen, Zhenyu and Wu, Xiaofei and Shi, Boxin}, title = {SpecTRe-GS: Modeling Highly Specular Surfaces with Reflected Nearby Objects by Tracing Rays in 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16133-16142} }
AuraFusion360: Augmented Unseen Region Alignment for Reference-based 360deg Unbounded Scene Inpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Chung-Ho and Chen, Yang-Jung and Chen, Ying-Huan and Lee, Jie-Ying and Ke, Bo-Hsu and Mu, Chun-Wei Tuan and Huang, Yi-Chuan and Lin, Chin-Yang and Chen, Min-Hung and Lin, Yen-Yu and Liu, Yu-Lun}, title = {AuraFusion360: Augmented Unseen Region Alignment for Reference-based 360deg Unbounded Scene Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16366-16376} }
Language-Guided Image Tokenization for Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zha_2025_CVPR, author = {Zha, Kaiwen and Yu, Lijun and Fathi, Alireza and Ross, David A. and Schmid, Cordelia and Katabi, Dina and Gu, Xiuye}, title = {Language-Guided Image Tokenization for Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15713-15722} }
Hyperbolic Uncertainty-Aware Few-Shot Incremental Point Cloud Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Sur_2025_CVPR, author = {Sur, Tanuj and Mukherjee, Samrat and Rahaman, Kaizer and Chaudhuri, Subhasis and Khan, Muhammad Haris and Banerjee, Biplab}, title = {Hyperbolic Uncertainty-Aware Few-Shot Incremental Point Cloud Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11810-11821} }
D^3-Human: Dynamic Disentangled Digital Human from Monocular Video-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Honghu and Peng, Bo and Tao, Yunfan and Zhang, Juyong}, title = {D{\textasciicircum}3-Human: Dynamic Disentangled Digital Human from Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10836-10846} }
Curriculum Coarse-to-Fine Selection for High-IPC Dataset Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Yanda and Chen, Gongwei and Zhang, Miao and Guan, Weili and Nie, Liqiang}, title = {Curriculum Coarse-to-Fine Selection for High-IPC Dataset Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20437-20446} }
BADGR: Bundle Adjustment Diffusion Conditioned by Gradients for Wide-Baseline Floor Plan Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yuguang and Boyadzhiev, Ivaylo and Liu, Zixuan and Shapiro, Linda and Colburn, Alex}, title = {BADGR: Bundle Adjustment Diffusion Conditioned by Gradients for Wide-Baseline Floor Plan Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16785-16795} }
Three Cars Approaching within 100m! Enhancing Distant Geometry by Tri-Axis Voxel Scanning for Camera-based Semantic Scene Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bae_2025_CVPR, author = {Bae, Jongseong and Ha, Junwoo and Kim, Ha Young}, title = {Three Cars Approaching within 100m! Enhancing Distant Geometry by Tri-Axis Voxel Scanning for Camera-based Semantic Scene Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11939-11948} }
DViN: Dynamic Visual Routing Network for Weakly Supervised Referring Expression Comprehension-
[pdf]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xiaofu and Luo, Yaxin and Luo, Gen and Ji, Jiayi and Ding, Henghui and Zhou, Yiyi}, title = {DViN: Dynamic Visual Routing Network for Weakly Supervised Referring Expression Comprehension}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14347-14357} }
Spiking Transformer with Spatial-Temporal Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Donghyun and Li, Yuhang and Kim, Youngeun and Xiao, Shiting and Panda, Priyadarshini}, title = {Spiking Transformer with Spatial-Temporal Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13948-13958} }
Perceptual Video Compression with Neural Wrapping-
[pdf]
[supp]
[bibtex]@InProceedings{Khan_2025_CVPR, author = {Khan, Muhammad Umar Karim and Chadha, Aaron and Anam, Mohammad Ashraful and Andreopoulos, Yiannis}, title = {Perceptual Video Compression with Neural Wrapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17743-17754} }
ViKIENet: Towards Efficient 3D Object Detection with Virtual Key Instance Enhanced Network-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Zhuochen and Qiu, Bijie and Khong, Andy W. H.}, title = {ViKIENet: Towards Efficient 3D Object Detection with Virtual Key Instance Enhanced Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11844-11853} }
Data-free Universal Adversarial Perturbation with Pseudo-semantic Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Chanhui and Song, Yeonghwan and Son, Jeany}, title = {Data-free Universal Adversarial Perturbation with Pseudo-semantic Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13907-13916} }
FRAME: Floor-aligned Representation for Avatar Motion from Egocentric Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Camiletto_2025_CVPR, author = {Camiletto, Andrea Boscolo and Wang, Jian and Alvarado, Eduardo and Dabral, Rishabh and Beeler, Thabo and Habermann, Marc and Theobalt, Christian}, title = {FRAME: Floor-aligned Representation for Avatar Motion from Egocentric Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17497-17507} }
Generalized Zero-Shot Classification via Semantics-Free Inter-Class Feature Generation-
[pdf]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Libiao and Nie, Dong and Pan, Junjun and Yan, Jing and Tang, Zhenyu}, title = {Generalized Zero-Shot Classification via Semantics-Free Inter-Class Feature Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20286-20295} }
Multi-Modal Aerial-Ground Cross-View Place Recognition with Neural ODEs-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Sijie and She, Rui and Kang, Qiyu and Li, Siqi and Li, Disheng and Geng, Tianyu and Yu, Shangshu and Tay, Wee Peng}, title = {Multi-Modal Aerial-Ground Cross-View Place Recognition with Neural ODEs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11717-11728} }
MaDCoW: Marginal Distortion Correction for Wide-Angle Photography with Arbitrary Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Kevin and Huang, Jia-Bin and Echevarria, Jose and DiVerdi, Stephen and Hertzmann, Aaron}, title = {MaDCoW: Marginal Distortion Correction for Wide-Angle Photography with Arbitrary Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10923-10932} }
Any6D: Model-free 6D Pose Estimation of Novel Objects-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Taeyeop and Wen, Bowen and Kang, Minjun and Kang, Gyuree and Kweon, In So and Yoon, Kuk-Jin}, title = {Any6D: Model-free 6D Pose Estimation of Novel Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11633-11643} }
DrVideo: Document Retrieval Based Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Ziyu and Gou, Chenhui and Shi, Hengcan and Sun, Bin and Li, Shutao and Rezatofighi, Hamid and Cai, Jianfei}, title = {DrVideo: Document Retrieval Based Long Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18936-18946} }
Buffer Anytime: Zero-Shot Video Depth and Normal from Image Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kuang_2025_CVPR, author = {Kuang, Zhengfei and Zhang, Tianyuan and Zhang, Kai and Tan, Hao and Bi, Sai and Hu, Yiwei and Xu, Zexiang and Hasan, Milos and Wetzstein, Gordon and Luan, Fujun}, title = {Buffer Anytime: Zero-Shot Video Depth and Normal from Image Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17660-17670} }
PSHuman: Photorealistic Single-image 3D Human Reconstruction using Cross-Scale Multiview Diffusion and Explicit Remeshing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Peng and Zheng, Wangguandong and Liu, Yuan and Yu, Tao and Li, Yangguang and Qi, Xingqun and Chi, Xiaowei and Xia, Siyu and Cao, Yan-Pei and Xue, Wei and Luo, Wenhan and Guo, Yike}, title = {PSHuman: Photorealistic Single-image 3D Human Reconstruction using Cross-Scale Multiview Diffusion and Explicit Remeshing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16008-16018} }
Hiding Images in Diffusion Models by Editing Learned Score Functions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Haoyu and Yang, Yunqiao and Zhong, Nan and Ma, Kede}, title = {Hiding Images in Diffusion Models by Editing Learned Score Functions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18663-18673} }
WeatherGen: A Unified Diverse Weather Generator for LiDAR Point Clouds via Spider Mamba Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Yang and Zhu, Yun and Zhang, Kaihua and Qian, Jianjun and Xie, Jin and Yang, Jian}, title = {WeatherGen: A Unified Diverse Weather Generator for LiDAR Point Clouds via Spider Mamba Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17019-17028} }
MUST: The First Dataset and Unified Framework for Multispectral UAV Single Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Haolin and Xu, Tingfa and Li, Tianhao and Chen, Zhenxiang and Feng, Tao and Li, Jianan}, title = {MUST: The First Dataset and Unified Framework for Multispectral UAV Single Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16882-16891} }
Tightening Robustness Verification of MaxPool-based Neural Networks via Minimizing the Over-Approximation Zone-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Yuan and Chen, Yuchen and Ma, Shiqing and Fang, Chunrong and Bai, Tongtong and Gu, Mingzheng and Cheng, Yuxin and Chen, Yanwei and Chen, Zhenyu}, title = {Tightening Robustness Verification of MaxPool-based Neural Networks via Minimizing the Over-Approximation Zone}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20695-20705} }
PhysicsGen: Can Generative Models Learn from Images to Predict Complex Physical Relations?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Spitznagel_2025_CVPR, author = {Spitznagel, Martin and Vaillant, Jan and Keuper, Janis}, title = {PhysicsGen: Can Generative Models Learn from Images to Predict Complex Physical Relations?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11125-11134} }
Spectral Informed Mamba for Robust Point Cloud Processing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bahri_2025_CVPR, author = {Bahri, Ali and Yazdanpanah, Moslem and Noori, Mehrdad and Dastani, Sahar and Cheraghalikhani, Milad and Hakim, Gustavo Adolfo Vargas and Osowiechi, David and Beizaee, Farzad and Ben Ayed, Ismail and Desrosiers, Christian}, title = {Spectral Informed Mamba for Robust Point Cloud Processing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11799-11809} }
BlobGEN-Vid: Compositional Text-to-Video Generation with Blob Video Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Weixi and Liu, Chao and Liu, Sifei and Wang, William Yang and Vahdat, Arash and Nie, Weili}, title = {BlobGEN-Vid: Compositional Text-to-Video Generation with Blob Video Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12989-12998} }
D2SP: Dynamic Dual-Stage Purification Framework for Dual Noise Mitigation in Vision-based Affective Recognition.-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Haoran and Mai, Xinji and Tao, Zeng and Tong, Xuan and Lin, Junxiong and Wang, Yan and Yu, Jiawen and Yan, Shaoqi and Zhou, Ziheng and Zhang, Wenqiang}, title = {D2SP: Dynamic Dual-Stage Purification Framework for Dual Noise Mitigation in Vision-based Affective Recognition.}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19218-19229} }
LaVin-DiT: Large Vision Diffusion Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhaoqing and Xia, Xiaobo and Chen, Runnan and Yu, Dongdong and Wang, Changhu and Gong, Mingming and Liu, Tongliang}, title = {LaVin-DiT: Large Vision Diffusion Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20060-20070} }
CAP-Net: A Unified Network for 6D Pose and Size Estimation of Categorical Articulated Parts from a Single RGB-D Image-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Jingshun and Lin, Haitao and Wang, Tianyu and Fu, Yanwei and Xue, Xiangyang and Zhu, Yi}, title = {CAP-Net: A Unified Network for 6D Pose and Size Estimation of Categorical Articulated Parts from a Single RGB-D Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11654-11664} }
Aesthetic Post-Training Diffusion Models from Generic Preferences with Step-by-step Preference Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Zhanhao and Yuan, Yuhui and Gu, Shuyang and Chen, Bohan and Hang, Tiankai and Cheng, Mingxi and Li, Ji and Zheng, Liang}, title = {Aesthetic Post-Training Diffusion Models from Generic Preferences with Step-by-step Preference Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13199-13208} }
BARD-GS: Blur-Aware Reconstruction of Dynamic Scenes via Gaussian Splatting-
[pdf]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Yiren and Zhou, Yunlai and Liu, Disheng and Liang, Tuo and Yin, Yu}, title = {BARD-GS: Blur-Aware Reconstruction of Dynamic Scenes via Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16532-16542} }
DiN: Diffusion Model for Robust Medical VQA with Semantic Noisy Labels-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Erjian and Zhao, Zhen and Wang, Zicheng and Chen, Tong and Liu, Yunyi and Zhou, Luping}, title = {DiN: Diffusion Model for Robust Medical VQA with Semantic Noisy Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14337-14346} }
S^3-Face: SSS-Compliant Facial Reflectance Estimation via Diffusion Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Xingyu and Deng, Jiankang and Cheng, Yuhao and Zhu, Wenhan and Yan, Yichao and Yang, Xiaokang and Zafeiriou, Stefanos and Ma, Chao}, title = {S{\textasciicircum}3-Face: SSS-Compliant Facial Reflectance Estimation via Diffusion Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16051-16060} }
FSBench: A Figure Skating Benchmark for Advancing Artistic Sports Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Rong and Liu, Xin and Hu, Zhuozhao and Xing, Bohao and Xia, Baiqiang and Yu, Zitong and K\"alvi\"ainen, Heikki}, title = {FSBench: A Figure Skating Benchmark for Advancing Artistic Sports Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13595-13605} }
Keep the Balance: A Parameter-Efficient Symmetrical Framework for RGB+X Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Jiaxin and Su, Jingze and Li, Qi and Yang, Wenjie and Wang, Shu and Zhao, Tiesong and He, Shengfeng and Liu, Wenxi}, title = {Keep the Balance: A Parameter-Efficient Symmetrical Framework for RGB+X Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10587-10598} }
LLM-driven Multimodal and Multi-Identity Listening Head Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Peiwen and Zhong, Weizhi and Qin, Yipeng and Ren, Xiaohang and Wang, Baoyuan and Li, Guanbin}, title = {LLM-driven Multimodal and Multi-Identity Listening Head Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10656-10666} }
OffsetOPT: Explicit Surface Reconstruction without Normals-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2025_CVPR, author = {Lei, Huan}, title = {OffsetOPT: Explicit Surface Reconstruction without Normals}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11729-11738} }
Any-Resolution AI-Generated Image Detection by Spectral Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karageorgiou_2025_CVPR, author = {Karageorgiou, Dimitrios and Papadopoulos, Symeon and Kompatsiaris, Ioannis and Gavves, Efstratios}, title = {Any-Resolution AI-Generated Image Detection by Spectral Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18706-18717} }
STOP: Integrated Spatial-Temporal Dynamic Prompting for Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zichen and Xu, Kunlun and Su, Bing and Zou, Xu and Peng, Yuxin and Zhou, Jiahuan}, title = {STOP: Integrated Spatial-Temporal Dynamic Prompting for Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13776-13786} }
TimeTracker: Event-based Continuous Point Tracking for Video Frame Interpolation with Non-linear Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Haoyue and Xu, Jinghan and Chang, Yi and Zhou, Hanyu and Zhao, Haozhi and Wang, Lin and Yan, Luxin}, title = {TimeTracker: Event-based Continuous Point Tracking for Video Frame Interpolation with Non-linear Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17649-17659} }
Shading Meets Motion: Self-supervised Indoor 3D Reconstruction Via Simultaneous Shape-from-Shading and Structure-from-Motion-
[pdf]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Guoyu}, title = {Shading Meets Motion: Self-supervised Indoor 3D Reconstruction Via Simultaneous Shape-from-Shading and Structure-from-Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16508-16519} }
Believing is Seeing: Unobserved Object Detection using Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bhattacharjee_2025_CVPR, author = {Bhattacharjee, Subhransu S. and Campbell, Dylan and Shome, Rahul}, title = {Believing is Seeing: Unobserved Object Detection using Generative Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19366-19377} }
NLPrompt: Noise-Label Prompt Learning for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2025_CVPR, author = {Pan, Bikang and Li, Qun and Tang, Xiaoying and Huang, Wei and Fang, Zhen and Liu, Feng and Wang, Jingya and Yu, Jingyi and Shi, Ye}, title = {NLPrompt: Noise-Label Prompt Learning for Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19963-19973} }
PBR-NeRF: Inverse Rendering with Physics-Based Neural Fields-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Sean and Basu, Shamik and Broedermann, Tim and Van Gool, Luc and Sakaridis, Christos}, title = {PBR-NeRF: Inverse Rendering with Physics-Based Neural Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10974-10984} }
No Pains, More Gains: Recycling Sub-Salient Patches for Efficient High-Resolution Image Recognition-
[pdf]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Rong and Liu, Xin and Liu, Xingyu and Liu, Jiaxuan and Shi, Jinglei and Lin, Liang and Yang, Jufeng}, title = {No Pains, More Gains: Recycling Sub-Salient Patches for Efficient High-Resolution Image Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14965-14975} }
ClearSight: Visual Signal Enhancement for Object Hallucination Mitigation in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Hao and Si, Guangzong and Wang, Zilei}, title = {ClearSight: Visual Signal Enhancement for Object Hallucination Mitigation in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14625-14634} }
TacoDepth: Towards Efficient Radar-Camera Depth Estimation with One-stage Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yiran and Li, Jiaqi and Hong, Chaoyi and Li, Ruibo and Sun, Liusheng and Song, Xiao and Wang, Zhe and Cao, Zhiguo and Lin, Guosheng}, title = {TacoDepth: Towards Efficient Radar-Camera Depth Estimation with One-stage Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10523-10533} }
Physical Plausibility-aware Trajectory Prediction via Locomotion Embodiment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Taketsugu_2025_CVPR, author = {Taketsugu, Hiromu and Oba, Takeru and Maeda, Takahiro and Nobuhara, Shohei and Ukita, Norimichi}, title = {Physical Plausibility-aware Trajectory Prediction via Locomotion Embodiment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12324-12334} }
AvatarArtist: Open-Domain 4D Avatarization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Hongyu and Wang, Xuan and Wan, Ziyu and Ma, Yue and Chen, Jingye and Fan, Yanbo and Shen, Yujun and Song, Yibing and Chen, Qifeng}, title = {AvatarArtist: Open-Domain 4D Avatarization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10758-10769} }
Using Powerful Prior Knowledge of Diffusion Model in Deep Unfolding Networks for Image Compressive Sensing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_CVPR, author = {Liao, Chen and Shen, Yan and Li, Dan and Wang, Zhongli}, title = {Using Powerful Prior Knowledge of Diffusion Model in Deep Unfolding Networks for Image Compressive Sensing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18000-18010} }
UniGoal: Towards Universal Zero-shot Goal-oriented Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Hang and Xu, Xiuwei and Zhao, Linqing and Wang, Ziwei and Zhou, Jie and Lu, Jiwen}, title = {UniGoal: Towards Universal Zero-shot Goal-oriented Navigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19057-19066} }
Noise-Consistent Siamese-Diffusion for Medical Image Synthesis and Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2025_CVPR, author = {Qiu, Kunpeng and Gao, Zhiqiang and Zhou, Zhiying and Sun, Mingjie and Guo, Yongxin}, title = {Noise-Consistent Siamese-Diffusion for Medical Image Synthesis and Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15672-15681} }
DefectFill: Realistic Defect Generation with Inpainting Diffusion Model for Visual Inspection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Jaewoo and Park, Daemin and Baek, Kanghyun and Lee, Sangyub and Choi, Jooyoung and Kim, Eunji and Yoon, Sungroh}, title = {DefectFill: Realistic Defect Generation with Inpainting Diffusion Model for Visual Inspection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18718-18727} }
Less is More: Efficient Image Vectorization with Adaptive Parameterization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Kaibo and Bao, Liang and Li, Yufei and Su, Xu and Zhang, Ke and Qiao, Xiaotian}, title = {Less is More: Efficient Image Vectorization with Adaptive Parameterization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18166-18175} }
FedMIA: An Effective Membership Inference Attack Exploiting "All for One" Principle in Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Gongxi and Li, Donghao and Gu, Hanlin and Yao, Yuan and Fan, Lixin and Han, Yuxing}, title = {FedMIA: An Effective Membership Inference Attack Exploiting ''All for One'' Principle in Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20643-20653} }
DPFlow: Adaptive Optical Flow Estimation with a Dual-Pyramid Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Morimitsu_2025_CVPR, author = {Morimitsu, Henrique and Zhu, Xiaobin and Cesar, Roberto M. and Ji, Xiangyang and Yin, Xu-Cheng}, title = {DPFlow: Adaptive Optical Flow Estimation with a Dual-Pyramid Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17810-17820} }
DocSAM: Unified Document Image Segmentation via Query Decomposition and Heterogeneous Mixed Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xiao-Hui and Yin, Fei and Liu, Cheng-Lin}, title = {DocSAM: Unified Document Image Segmentation via Query Decomposition and Heterogeneous Mixed Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15021-15032} }
Spatiotemporal Skip Guidance for Enhanced Video Diffusion Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hyung_2025_CVPR, author = {Hyung, Junha and Kim, Kinam and Hong, Susung and Kim, Min-Jung and Choo, Jaegul}, title = {Spatiotemporal Skip Guidance for Enhanced Video Diffusion Sampling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11006-11015} }
ODE: Open-Set Evaluation of Hallucinations in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tu_2025_CVPR, author = {Tu, Yahan and Hu, Rui and Sang, Jitao}, title = {ODE: Open-Set Evaluation of Hallucinations in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19836-19845} }
Masking meets Supervision: A Strong Learning Alliance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Heo_2025_CVPR, author = {Heo, Byeongho and Kim, Taekyung and Yun, Sangdoo and Han, Dongyoon}, title = {Masking meets Supervision: A Strong Learning Alliance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20447-20457} }
DI-PCG: Diffusion-based Efficient Inverse Procedural Content Generation for High-quality 3D Asset Creation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Wang and Cao, Yan-Pei and Xu, Jiale and Dong, Yuejiang and Shan, Ying}, title = {DI-PCG: Diffusion-based Efficient Inverse Procedural Content Generation for High-quality 3D Asset Creation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11061-11072} }
Notes-guided MLLM Reasoning: Enhancing MLLM with Knowledge and Visual Notes for Visual Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Wenlong and Wu, Qiaofeng and Chen, Jing and Xue, Yun}, title = {Notes-guided MLLM Reasoning: Enhancing MLLM with Knowledge and Visual Notes for Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19597-19607} }
UniRestore: Unified Perceptual and Task-Oriented Image Restoration Model Using Diffusion Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, I-Hsiang and Chen, Wei-Ting and Liu, Yu-Wei and Chiang, Yuan-Chun and Kuo, Sy-Yen and Yang, Ming-Hsuan}, title = {UniRestore: Unified Perceptual and Task-Oriented Image Restoration Model Using Diffusion Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17969-17979} }
Condensing Action Segmentation Datasets via Generative Network Inversion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2025_CVPR, author = {Ding, Guodong and Chen, Rongyu and Yao, Angela}, title = {Condensing Action Segmentation Datasets via Generative Network Inversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17733-17742} }
Can Generative Video Models Help Pose Estimation?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Ruojin and Zhang, Jason Y. and Henzler, Philipp and Li, Zhengqi and Snavely, Noah and Martin-Brualla, Ricardo}, title = {Can Generative Video Models Help Pose Estimation?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16764-16773} }
DriveGPT4-V2: Harnessing Large Language Model Capabilities for Enhanced Closed-Loop Autonomous Driving-
[pdf]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Zhenhua and Bai, Yan and Zhang, Yujia and Li, Zhuoling and Xia, Fei and Wong, Kwan-Yee K. and Wang, Jianqiang and Zhao, Hengshuang}, title = {DriveGPT4-V2: Harnessing Large Language Model Capabilities for Enhanced Closed-Loop Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17261-17270} }
High-Fidelity Lightweight Mesh Reconstruction from Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chen and Wang, Wentao and Li, Ximeng and Liao, Xinyao and Su, Wanjuan and Tao, Wenbing}, title = {High-Fidelity Lightweight Mesh Reconstruction from Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11739-11748} }
MDP: Multidimensional Vision Model Pruning with Latency Constraint-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Xinglong and Lakshmanan, Barath and Shen, Maying and Lan, Shiyi and Chen, Jingde and Alvarez, Jose M.}, title = {MDP: Multidimensional Vision Model Pruning with Latency Constraint}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20113-20123} }
OSDFace: One-Step Diffusion Model for Face Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jingkai and Gong, Jue and Zhang, Lin and Chen, Zheng and Liu, Xing and Gu, Hong and Liu, Yutong and Zhang, Yulun and Yang, Xiaokang}, title = {OSDFace: One-Step Diffusion Model for Face Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12626-12636} }
Task Singular Vectors: Reducing Task Interference in Model Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gargiulo_2025_CVPR, author = {Gargiulo, Antonio Andrea and Crisostomi, Donato and Bucarelli, Maria Sofia and Scardapane, Simone and Silvestri, Fabrizio and Rodol\`a, Emanuele}, title = {Task Singular Vectors: Reducing Task Interference in Model Merging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18695-18705} }
Self-Evolving Visual Concept Library using Vision-Language Critics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sehgal_2025_CVPR, author = {Sehgal, Atharva and Yuan, Patrick and Hu, Ziniu and Yue, Yisong and Sun, Jennifer J. and Chaudhuri, Swarat}, title = {Self-Evolving Visual Concept Library using Vision-Language Critics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13124-13134} }
Boosting Point-Supervised Temporal Action Localization through Integrating Query Reformation and Optimal Transport-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Mengnan and Wang, Le and Zhou, Sanping and Xia, Kun and Sun, Xiaolong and Hua, Gang}, title = {Boosting Point-Supervised Temporal Action Localization through Integrating Query Reformation and Optimal Transport}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13865-13875} }
Effective Cloud Removal for Remote Sensing Images by an Improved Mean-Reverting Denoising Model with Elucidated Design Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yi and Li, Wengen and Guan, Jihong and Zhou, Shuigeng and Zhang, Yichao}, title = {Effective Cloud Removal for Remote Sensing Images by an Improved Mean-Reverting Denoising Model with Elucidated Design Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17851-17861} }
OpticalNet: An Optical Imaging Dataset and Benchmark Beyond the Diffraction Limit-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Benquan and An, Ruyi and So, Jin-Kyu and Kurdiumov, Sergei and Chan, Eng Aik and Adamo, Giorgio and Peng, Yuhan and Li, Yewen and An, Bo}, title = {OpticalNet: An Optical Imaging Dataset and Benchmark Beyond the Diffraction Limit}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10900-10912} }
Empowering Vector Graphics with Consistently Arbitrary Viewing and View-dependent Visibility-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yidi and Xiao, Jun and Lu, Zhengda and Wang, Yiqun and Jiang, Haiyong}, title = {Empowering Vector Graphics with Consistently Arbitrary Viewing and View-dependent Visibility}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18531-18540} }
DIFFER: Disentangling Identity Features via Semantic Cues for Clothes-Changing Person Re-ID-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Xin and Rawat, Yogesh S}, title = {DIFFER: Disentangling Identity Features via Semantic Cues for Clothes-Changing Person Re-ID}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13980-13989} }
HyperPose: Hypernetwork-Infused Camera Pose Localization and an Extended Cambridge Landmarks Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ferens_2025_CVPR, author = {Ferens, Ron and Keller, Yosi}, title = {HyperPose: Hypernetwork-Infused Camera Pose Localization and an Extended Cambridge Landmarks Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11547-11557} }
Mono3DVLT: Monocular-Video-Based 3D Visual Language Tracking-
[pdf]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Hongkai and Yang, Yang and Sun, Shijie and Feng, Mingtao and Song, Xiangyu and Lei, Qi and Hu, Hongli and Wang, Rong and Song, Huansheng and Akhtar, Naveed and Mian, Ajmal Saeed}, title = {Mono3DVLT: Monocular-Video-Based 3D Visual Language Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13886-13896} }
Towards Universal Dataset Distillation via Task-Driven Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2025_CVPR, author = {Qi, Ding and Li, Jian and Gao, Junyao and Dou, Shuguang and Tai, Ying and Hu, Jianlong and Zhao, Bo and Wang, Yabiao and Wang, Chengjie and Zhao, Cairong}, title = {Towards Universal Dataset Distillation via Task-Driven Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10557-10566} }
Parametric Point Cloud Completion for Polygonal Surface Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zhaiyu and Wang, Yuqing and Nan, Liangliang and Zhu, Xiao Xiang}, title = {Parametric Point Cloud Completion for Polygonal Surface Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11749-11758} }
SyncSDE: A Probabilistic Framework for Diffusion Synchronization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Hyunjun and Lee, Hyunsoo and Han, Sookwan}, title = {SyncSDE: A Probabilistic Framework for Diffusion Synchronization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17508-17517} }
MCCD: Multi-Agent Collaboration-based Compositional Diffusion for Complex Text-to-Image Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Mingcheng and Hou, Xiaolu and Liu, Ziyang and Yang, Dingkang and Qian, Ziyun and Chen, Jiawei and Wei, Jinjie and Jiang, Yue and Xu, Qingyao and Zhang, Lihua}, title = {MCCD: Multi-Agent Collaboration-based Compositional Diffusion for Complex Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13263-13272} }
Dual Semantic Guidance for Open Vocabulary Semantic Segmentation-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhengyang and Feng, Tingliang and Lyu, Fan and Shang, Fanhua and Feng, Wei and Wan, Liang}, title = {Dual Semantic Guidance for Open Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20212-20222} }
Generalizable Object Keypoint Localization from Generative Priors-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Dongkai and Duan, Jiang and Wen, Liangjian and Xuan, Shiyu and Chen, Hao and Zhang, Shiliang}, title = {Generalizable Object Keypoint Localization from Generative Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20265-20274} }
FedCALM: Conflict-aware Layer-wise Mitigation for Selective Aggregation in Deeper Personalized Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Hao and Hu, Zhigang and Yang, Liu and Zheng, Meiguang and Xu, Aikun and Wang, Boyu}, title = {FedCALM: Conflict-aware Layer-wise Mitigation for Selective Aggregation in Deeper Personalized Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15444-15453} }
CaricatureBooth: Data-Free Interactive Caricature Generation in a Photo Booth-
[pdf]
[supp]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Zhiyu and Miao, Yunqi and Zhang, Zhensong and Song, Jifei and Deng, Jiankang and Song, Yi-Zhe}, title = {CaricatureBooth: Data-Free Interactive Caricature Generation in a Photo Booth}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10815-10824} }
FlexGS: Train Once, Deploy Everywhere with Many-in-One Flexible 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Hengyu and Wang, Yuehao and Li, Chenxin and Cai, Ruisi and Wang, Kevin and Li, Wuyang and Molchanov, Pavlo and Wang, Peihao and Wang, Zhangyang}, title = {FlexGS: Train Once, Deploy Everywhere with Many-in-One Flexible 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16336-16345} }
Generalizing Deepfake Video Detection with Plug-and-Play: Video-Level Blending and Spatiotemporal Adapter Tuning-
[pdf]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Zhiyuan and Zhao, Yandan and Chen, Shen and Guo, Mingyi and Fu, Xinghe and Yao, Taiping and Ding, Shouhong and Wu, Yunsheng and Yuan, Li}, title = {Generalizing Deepfake Video Detection with Plug-and-Play: Video-Level Blending and Spatiotemporal Adapter Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12615-12625} }
T2ISafety: Benchmark for Assessing Fairness, Toxicity, and Privacy in Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Lijun and Shi, Zhelun and Hu, Xuhao and Dong, Bowen and Qin, Yiran and Liu, Xihui and Sheng, Lu and Shao, Jing}, title = {T2ISafety: Benchmark for Assessing Fairness, Toxicity, and Privacy in Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13381-13392} }
Make It Count: Text-to-Image Generation with an Accurate Number of Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Binyamin_2025_CVPR, author = {Binyamin, Lital and Tewel, Yoad and Segev, Hilit and Hirsch, Eran and Rassin, Royi and Chechik, Gal}, title = {Make It Count: Text-to-Image Generation with an Accurate Number of Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13242-13251} }
TraF-Align: Trajectory-aware Feature Alignment for Asynchronous Multi-agent Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Zhiying and Yang, Lei and Wen, Fuxi and Li, Jun}, title = {TraF-Align: Trajectory-aware Feature Alignment for Asynchronous Multi-agent Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12048-12057} }
DreamCache: Finetuning-Free Lightweight Personalized Image Generation via Feature Caching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aiello_2025_CVPR, author = {Aiello, Emanuele and Michieli, Umberto and Valsesia, Diego and Ozay, Mete and Magli, Enrico}, title = {DreamCache: Finetuning-Free Lightweight Personalized Image Generation via Feature Caching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12480-12489} }
FlexUOD: The Answer to Real-world Unsupervised Image Outlier Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhonghang and Zhou, Kun and Wang, Changshuo and Lin, Wen-Yan and Lu, Jiangbo}, title = {FlexUOD: The Answer to Real-world Unsupervised Image Outlier Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15183-15193} }
Focusing on Tracks for Online Multi-Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Shim_2025_CVPR, author = {Shim, Kyujin and Ko, Kangwook and Yang, Yujin and Kim, Changick}, title = {Focusing on Tracks for Online Multi-Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11687-11696} }
Identity-preserving Distillation Sampling by Fixed-Point Iterator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, SeonHwa and Kim, Jiwon and Park, Soobin and Ahn, Donghoon and Kang, Jiwon and Kim, Seungryong and Jin, Kyong Hwan and Cha, Eunju}, title = {Identity-preserving Distillation Sampling by Fixed-Point Iterator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11115-11124} }
WiLoR: End-to-end 3D Hand Localization and Reconstruction in-the-wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Potamias_2025_CVPR, author = {Potamias, Rolandos Alexandros and Zhang, Jinglei and Deng, Jiankang and Zafeiriou, Stefanos}, title = {WiLoR: End-to-end 3D Hand Localization and Reconstruction in-the-wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12242-12254} }
BiomedCoOp: Learning to Prompt for Biomedical Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koleilat_2025_CVPR, author = {Koleilat, Taha and Asgariandehkordi, Hojat and Rivaz, Hassan and Xiao, Yiming}, title = {BiomedCoOp: Learning to Prompt for Biomedical Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14766-14776} }
MV-SSM: Multi-View State Space Modeling for 3D Human Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Chharia_2025_CVPR, author = {Chharia, Aviral and Gou, Wenbo and Dong, Haoye}, title = {MV-SSM: Multi-View State Space Modeling for 3D Human Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11590-11599} }
AnySat: One Earth Observation Model for Many Resolutions, Scales, and Modalities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Astruc_2025_CVPR, author = {Astruc, Guillaume and Gonthier, Nicolas and Mallet, Cl\'ement and Landrieu, Loic}, title = {AnySat: One Earth Observation Model for Many Resolutions, Scales, and Modalities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19530-19540} }
OVO-Bench: How Far is Your Video-LLMs from Real-World Online Video Understanding?-
[pdf]
[supp]
[bibtex]@InProceedings{Niu_2025_CVPR, author = {Niu, Junbo and Li, Yifei and Miao, Ziyang and Ge, Chunjiang and Zhou, Yuanhang and He, Qihao and Dong, Xiaoyi and Duan, Haodong and Ding, Shuangrui and Qian, Rui and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and He, Conghui and Wang, Jiaqi}, title = {OVO-Bench: How Far is Your Video-LLMs from Real-World Online Video Understanding?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18902-18913} }
GuardSplat: Efficient and Robust Watermarking for 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zixuan and Wang, Guangcong and Zhu, Jiahao and Lai, Jianhuang and Xie, Xiaohua}, title = {GuardSplat: Efficient and Robust Watermarking for 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16325-16335} }
RoadSocial: A Diverse VideoQA Dataset and Benchmark for Road Event Understanding from Social Video Narratives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parikh_2025_CVPR, author = {Parikh, Chirag and Rawat, Deepti and T., Rakshitha R. and Ghosh, Tathagata and Sarvadevabhatla, Ravi Kiran}, title = {RoadSocial: A Diverse VideoQA Dataset and Benchmark for Road Event Understanding from Social Video Narratives}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19002-19011} }
Is Your World Simulator a Good Story Presenter? A Consecutive Events-Based Benchmark for Future Long Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yiping and He, Xuehai and Wang, Kuan and Ma, Luyao and Yang, Jianwei and Wang, Shuohang and Du, Simon Shaolei and Shen, Yelong}, title = {Is Your World Simulator a Good Story Presenter? A Consecutive Events-Based Benchmark for Future Long Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13629-13638} }
LookCloser: Frequency-aware Radiance Field for Tiny-Detail Scene-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xiaoyu and Pan, Weihong and Bao, Chong and Zhang, Xiyu and Xiang, Xiaojun and Jiang, Hanqing and Bao, Hujun}, title = {LookCloser: Frequency-aware Radiance Field for Tiny-Detail Scene}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16122-16132} }
Convex Relaxation for Robust Vanishing Point Estimation in Manhattan World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_CVPR, author = {Liao, Bangyan and Zhao, Zhenjun and Li, Haoang and Zhou, Yi and Zeng, Yingping and Li, Hao and Liu, Peidong}, title = {Convex Relaxation for Robust Vanishing Point Estimation in Manhattan World}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15823-15832} }
FruitNinja: 3D Object Interior Texture Generation with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Fangyu and Chen, Yuhao}, title = {FruitNinja: 3D Object Interior Texture Generation with Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11051-11060} }
Take the Bull by the Horns: Learning to Segment Hard Samples-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Yuan and Kong, Jingyu and Wang, Yu and Duan, Yuping}, title = {Take the Bull by the Horns: Learning to Segment Hard Samples}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15642-15652} }
EIDT-V: Exploiting Intersections in Diffusion Trajectories for Model-Agnostic, Zero-Shot, Training-Free Text-to-Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Jagpal_2025_CVPR, author = {Jagpal, Diljeet and Chen, Xi and Namboodiri, Vinay P.}, title = {EIDT-V: Exploiting Intersections in Diffusion Trajectories for Model-Agnostic, Zero-Shot, Training-Free Text-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18219-18228} }
Reproducible Vision-Language Models Meet Concepts Out of Pre-Training-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Ziliang and Huang, Xin and Fan, Xiaoxuan and Wang, Keze and Zhou, Yuyu and Guan, Quanlong and Lin, Liang}, title = {Reproducible Vision-Language Models Meet Concepts Out of Pre-Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14701-14711} }
Through-The-Mask: Mask-based Motion Trajectories for Image-to-Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yariv_2025_CVPR, author = {Yariv, Guy and Kirstain, Yuval and Zohar, Amit and Sheynin, Shelly and Taigman, Yaniv and Adi, Yossi and Benaim, Sagie and Polyak, Adam}, title = {Through-The-Mask: Mask-based Motion Trajectories for Image-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18198-18208} }
MAGE : Single Image to Material-Aware 3D via the Multi-View G-Buffer Estimation Model-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Haoyuan and Wang, Zhenwei and Long, Xiaoxiao and Lin, Cheng and Hancke, Gerhard and Lau, Rynson W.H.}, title = {MAGE : Single Image to Material-Aware 3D via the Multi-View G-Buffer Estimation Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10985-10995} }
MESC-3D:Mining Effective Semantic Cues for 3D Reconstruction from a Single Image-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Shaoming and Cai, Qing and Kong, Songqi and Tan, Runqing and Tong, Heng and Qiu, Shiji and Jiang, Yongguo and Liu, Zhi}, title = {MESC-3D:Mining Effective Semantic Cues for 3D Reconstruction from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16912-16921} }
Advancing Multiple Instance Learning with Continual Learning for Whole Slide Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xianrui and Cui, Yufei and Li, Jun and Chan, Antoni B.}, title = {Advancing Multiple Instance Learning with Continual Learning for Whole Slide Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20800-20809} }
Decoupled Distillation to Erase: A General Unlearning Method for Any Class-centric Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yu and Zheng, Dian and Mo, Qijie and Lu, Renjie and Lin, Kun-Yu and Zheng, Wei-Shi}, title = {Decoupled Distillation to Erase: A General Unlearning Method for Any Class-centric Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20350-20359} }
TAET: Two-Stage Adversarial Equalization Training on Long-Tailed Distributions-
[pdf]
[supp]
[bibtex]@InProceedings{Yu-Hang_2025_CVPR, author = {Yu-Hang, Wang and Guo, Junkang and Liu, Aolei and Wang, Kaihao and Wu, Zaitong and Liu, Zhenyu and Yin, Wenfei and Liu, Jian}, title = {TAET: Two-Stage Adversarial Equalization Training on Long-Tailed Distributions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15476-15485} }
Few-shot Personalized Scanpath Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2025_CVPR, author = {Xue, Ruoyu and Xu, Jingyi and Mondal, Sounak and Le, Hieu and Zelinsky, Greg and Hoai, Minh and Samaras, Dimitris}, title = {Few-shot Personalized Scanpath Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13497-13507} }
Mamba4D: Efficient 4D Point Cloud Video Understanding with Disentangled Spatial-Temporal State Space Models-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jiuming and Han, Jinru and Liu, Lihao and Aviles-Rivero, Angelica I. and Jiang, Chaokang and Liu, Zhe and Wang, Hesheng}, title = {Mamba4D: Efficient 4D Point Cloud Video Understanding with Disentangled Spatial-Temporal State Space Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17626-17636} }
Mamba as a Bridge: Where Vision Foundation Models Meet Vision Language Models for Domain-Generalized Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xin and Tan, Robby T.}, title = {Mamba as a Bridge: Where Vision Foundation Models Meet Vision Language Models for Domain-Generalized Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14527-14537} }
Vision-Guided Action: Enhancing 3D Human Motion Prediction with Gaze-informed Affordance in 3D Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Ting and Lin, Yi and Yu, Jun and Lou, Zhenyu and Cui, Qiongjie}, title = {Vision-Guided Action: Enhancing 3D Human Motion Prediction with Gaze-informed Affordance in 3D Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12335-12346} }
ChainHOI: Joint-based Kinematic Chain Modeling for Human-Object Interaction Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_CVPR, author = {Zeng, Ling-An and Huang, Guohong and Wei, Yi-Lin and Gu, Shengbo and Tang, Yu-Ming and Meng, Jingke and Zheng, Wei-Shi}, title = {ChainHOI: Joint-based Kinematic Chain Modeling for Human-Object Interaction Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12358-12369} }
CLOC: Contrastive Learning for Ordinal Classification with Multi-Margin N-pair Loss-
[pdf]
[supp]
[bibtex]@InProceedings{Pitawela_2025_CVPR, author = {Pitawela, Dileepa and Carneiro, Gustavo and Chen, Hsiang-Ting}, title = {CLOC: Contrastive Learning for Ordinal Classification with Multi-Margin N-pair Loss}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15538-15548} }
ObjectMover: Generative Object Movement with Video Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Xin and Wang, Tianyu and Kim, Soo Ye and Guerrero, Paul and Chen, Xi and Liu, Qing and Lin, Zhe and Qi, Xiaojuan}, title = {ObjectMover: Generative Object Movement with Video Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17682-17691} }
MLLM-as-a-Judge for Image Safety without Human Labeling-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhenting and Hu, Shuming and Zhao, Shiyu and Lin, Xiaowen and Juefei-Xu, Felix and Li, Zhuowei and Han, Ligong and Subramanyam, Harihar and Chen, Li and Chen, Jianfa and Jiang, Nan and Lyu, Lingjuan and Ma, Shiqing and Metaxas, Dimitris N. and Jain, Ankit}, title = {MLLM-as-a-Judge for Image Safety without Human Labeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14657-14666} }
Learning to Filter Outlier Edges in Global SfM-
[pdf]
[supp]
[bibtex]@InProceedings{Damblon_2025_CVPR, author = {Damblon, Nicole and Pollefeys, Marc and Barath, Daniel}, title = {Learning to Filter Outlier Edges in Global SfM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11558-11568} }
Forensics Adapter: Adapting CLIP for Generalizable Face Forgery Detection-
[pdf]
[bibtex]@InProceedings{Cui_2025_CVPR, author = {Cui, Xinjie and Li, Yuezun and Luo, Ao and Zhou, Jiaran and Dong, Junyu}, title = {Forensics Adapter: Adapting CLIP for Generalizable Face Forgery Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19207-19217} }
KAC: Kolmogorov-Arnold Classifier for Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Yusong and Liang, Zichen and Yang, Fei and Hou, Qibin and Liu, Xialei and Cheng, Ming-Ming}, title = {KAC: Kolmogorov-Arnold Classifier for Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15297-15307} }
BOOTPLACE: Bootstrapped Object Placement with Detection Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Hang and Zuo, Xinxin and Ma, Rui and Cheng, Li}, title = {BOOTPLACE: Bootstrapped Object Placement with Detection Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19294-19303} }
FASTer: Focal token Acquiring-and-Scaling Transformer for Long-term 3D Objection Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dang_2025_CVPR, author = {Dang, Chenxu and Duan, ZaiPeng and An, Pei and Zhang, Xinmin and Hu, Xuzhong and Ma, Jie}, title = {FASTer: Focal token Acquiring-and-Scaling Transformer for Long-term 3D Objection Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17029-17038} }
Geometry-guided Online 3D Video Synthesis with Multi-View Temporal Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ha_2025_CVPR, author = {Ha, Hyunho and Xiao, Lei and Richardt, Christian and Nguyen-Phuoc, Thu and Kim, Changil and Kim, Min H. and Lanman, Douglas and Khan, Numair}, title = {Geometry-guided Online 3D Video Synthesis with Multi-View Temporal Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11275-11285} }
Point2RBox-v2: Rethinking Point-supervised Oriented Object Detection with Spatial Layout Among Instances-
[pdf]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Yi and Ren, Botao and Zhang, Peiyuan and Liu, Mingxin and Luo, Junwei and Zhang, Shaofeng and Da, Feipeng and Yan, Junchi and Yang, Xue}, title = {Point2RBox-v2: Rethinking Point-supervised Oriented Object Detection with Spatial Layout Among Instances}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19283-19293} }
CoCoGaussian: Leveraging Circle of Confusion for Gaussian Splatting from Defocused Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Jungho and Cho, Suhwan and Kim, Taeoh and Jang, Ho-Deok and Lee, Minhyeok and Cha, Geonho and Wee, Dongyoon and Lee, Dogyoon and Lee, Sangyoun}, title = {CoCoGaussian: Leveraging Circle of Confusion for Gaussian Splatting from Defocused Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16101-16110} }
Semantic and Sequential Alignment for Referring Video Object Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2025_CVPR, author = {Pan, Feiyu and Fang, Hao and Li, Fangkai and Xu, Yanyu and Li, Yawei and Benini, Luca and Lu, Xiankai}, title = {Semantic and Sequential Alignment for Referring Video Object Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19067-19076} }
Continual SFT Matches Multimodal RLHF with Negative Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Ke and Wang, Yu and Sun, Yanpeng and Chen, Qiang and Liu, Jiangjiang and Zhang, Gang and Wang, Jingdong}, title = {Continual SFT Matches Multimodal RLHF with Negative Supervision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14615-14624} }
Semantic-guided Cross-Modal Prompt Learning for Skeleton-based Zero-shot Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Anqi and Zhu, Jingmin and Bailey, James and Gong, Mingming and Ke, Qiuhong}, title = {Semantic-guided Cross-Modal Prompt Learning for Skeleton-based Zero-shot Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13876-13885} }
ChatGen: Automatic Text-to-Image Generation From FreeStyle Chatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2025_CVPR, author = {Jia, Chengyou and Xia, Changliang and Dang, Zhuohang and Wu, Weijia and Qian, Hangwei and Luo, Minnan}, title = {ChatGen: Automatic Text-to-Image Generation From FreeStyle Chatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13284-13293} }
VEU-Bench: Towards Comprehensive Understanding of Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Bozheng and Wu, Yongliang and Lu, Yi and Yu, Jiashuo and Tang, Licheng and Cao, Jiawang and Zhu, Wenqing and Sun, Yuyang and Wu, Jay and Zhu, Wenbo}, title = {VEU-Bench: Towards Comprehensive Understanding of Video Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13671-13680} }
Decouple Distortion from Perception: Region Adaptive Diffusion for Extreme-low Bitrate Perception Image Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Jinchang and Wang, Shaokang and Chen, Jintao and Li, Zhe and Jia, Peidong and Zhao, Fei and Xiang, Guoqing and Hao, Zhijian and Zhang, Shanghang and Xie, Xiaodong}, title = {Decouple Distortion from Perception: Region Adaptive Diffusion for Extreme-low Bitrate Perception Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18051-18061} }
Yo'Chameleon: Personalized Vision and Language Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Thao and Singh, Krishna Kumar and Shi, Jing and Bui, Trung and Lee, Yong Jae and Li, Yuheng}, title = {Yo'Chameleon: Personalized Vision and Language Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14438-14448} }
PatchVSR: Breaking Video Diffusion Resolution Limits with Patch-wise Video Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Shian and Xia, Menghan and Liu, Chang and Wang, Xintao and Wang, Jing and Wan, Pengfei and Zhang, Di and Ji, Xiangyang}, title = {PatchVSR: Breaking Video Diffusion Resolution Limits with Patch-wise Video Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17799-17809} }
FluxSpace: Disentangled Semantic Editing in Rectified Flow Models-
[pdf]
[supp]
[bibtex]@InProceedings{Dalva_2025_CVPR, author = {Dalva, Yusuf and Venkatesh, Kavana and Yanardag, Pinar}, title = {FluxSpace: Disentangled Semantic Editing in Rectified Flow Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13083-13092} }
Adversarial Domain Prompt Tuning and Generation for Single Domain Generalization-
[pdf]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Zhipeng and Cheng, De and Jiang, Xinyang and Wang, Nannan and Li, Dongsheng and Gao, Xinbo}, title = {Adversarial Domain Prompt Tuning and Generation for Single Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18584-18595} }
ShapeWords: Guiding Text-to-Image Synthesis with 3D Shape-Aware Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Petrov_2025_CVPR, author = {Petrov, Dmitry and Goyal, Pradyumn and Shivashok, Divyansh and Tao, Yuanming and Averkiou, Melinos and Kalogerakis, Evangelos}, title = {ShapeWords: Guiding Text-to-Image Synthesis with 3D Shape-Aware Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13305-13314} }
Auto-Encoded Supervision for Perceptual Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, MinKyu and Hyun, Sangeek and Jun, Woojin and Heo, Jae-Pil}, title = {Auto-Encoded Supervision for Perceptual Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17958-17968} }
Silence is Golden: Leveraging Adversarial Examples to Nullify Audio Control in LDM-based Talking-Head Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Gan_2025_CVPR, author = {Gan, Yuan and Miao, Jiaxu and Wang, Yunze and Yang, Yi}, title = {Silence is Golden: Leveraging Adversarial Examples to Nullify Audio Control in LDM-based Talking-Head Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13434-13444} }
Iterative Predictor-Critic Code Decoding for Real-World Image Dehazing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Jiayi and Liu, Siyu and Liu, Zikun and Guo, Chun-Le and Park, Hyunhee and Wu, Ruiqi and Wang, Guoqing and Li, Chongyi}, title = {Iterative Predictor-Critic Code Decoding for Real-World Image Dehazing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12700-12709} }
Filter Images First, Generate Instructions Later: Pre-Instruction Data Selection for Visual Instruction Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Safaei_2025_CVPR, author = {Safaei, Bardia and Siddiqui, Faizan and Xu, Jiacong and Patel, Vishal M. and Lo, Shao-Yuan}, title = {Filter Images First, Generate Instructions Later: Pre-Instruction Data Selection for Visual Instruction Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14247-14256} }
Gradient-Guided Annealing for Domain Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ballas_2025_CVPR, author = {Ballas, Aristotelis and Diou, Christos}, title = {Gradient-Guided Annealing for Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20558-20568} }
MicroVQA: A Multimodal Reasoning Benchmark for Microscopy-Based Scientific Research-
[pdf]
[supp]
[bibtex]@InProceedings{Burgess_2025_CVPR, author = {Burgess, James and Nirschl, Jeffrey J and Bravo-S\'anchez, Laura and Lozano, Alejandro and Gupte, Sanket Rajan and Galaz-Montoya, Jesus G. and Zhang, Yuhui and Su, Yuchang and Bhowmik, Disha and Coman, Zachary and Hasan, Sarina M and Johannesson, Alexandra and Leineweber, William D. and Nair, Malvika G and Yarlagadda, Ridhi and Zuraski, Connor and Chiu, Wah and Cohen, Sarah and Hansen, Jan N. and Leonetti, Manuel D and Liu, Chad and Lundberg, Emma and Yeung-Levy, Serena}, title = {MicroVQA: A Multimodal Reasoning Benchmark for Microscopy-Based Scientific Research}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19552-19564} }
Event-based Video Super-Resolution via State Space Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Zeyu and Wang, Xinchao}, title = {Event-based Video Super-Resolution via State Space Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12564-12574} }
Masked Scene Modeling: Narrowing the Gap Between Supervised and Self-Supervised Learning in 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hermosilla_2025_CVPR, author = {Hermosilla, Pedro and Stippel, Christian and Sick, Leon}, title = {Masked Scene Modeling: Narrowing the Gap Between Supervised and Self-Supervised Learning in 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14835-14844} }
VidHalluc: Evaluating Temporal Hallucinations in Multimodal Large Language Models for Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Chaoyu and Im, Eun Woo and Fazli, Pooyan}, title = {VidHalluc: Evaluating Temporal Hallucinations in Multimodal Large Language Models for Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13723-13733} }
CARE Transformer: Mobile-Friendly Linear Visual Transformer via Decoupled Dual Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yuan and Xu, Qingshan and Cui, Jiequan and Zhou, Junbao and Zhang, Jing and Hong, Richang and Zhang, Hanwang}, title = {CARE Transformer: Mobile-Friendly Linear Visual Transformer via Decoupled Dual Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20135-20145} }
Paint by Inpaint: Learning to Add Image Objects by Removing Them First-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wasserman_2025_CVPR, author = {Wasserman, Navve and Rotstein, Noam and Ganz, Roy and Kimmel, Ron}, title = {Paint by Inpaint: Learning to Add Image Objects by Removing Them First}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18313-18324} }
PMA: Towards Parameter-Efficient Point Cloud Understanding via Point Mamba Adapter-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zha_2025_CVPR, author = {Zha, Yaohua and Wang, Yanzi and Guo, Hang and Wang, Jinpeng and Dai, Tao and Chen, Bin and Ouyang, Zhihao and Yuerong, Xue and Chen, Ke and Xia, Shu-Tao}, title = {PMA: Towards Parameter-Efficient Point Cloud Understanding via Point Mamba Adapter}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16976-16986} }
LC-Mamba: Local and Continuous Mamba with Shifted Windows for Frame Interpolation-
[pdf]
[supp]
[bibtex]@InProceedings{Jeong_2025_CVPR, author = {Jeong, Min Wu and Rhee, Chae Eun}, title = {LC-Mamba: Local and Continuous Mamba with Shifted Windows for Frame Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17671-17681} }
Zero-Shot Head Swapping in Real-World Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_CVPR, author = {Kang, Taewoong and Jeong, Sohyun and Jang, Hyojin and Choo, Jaegul}, title = {Zero-Shot Head Swapping in Real-World Scenarios}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10805-10814} }
CAV-MAE Sync: Improving Contrastive Audio-Visual Mask Autoencoders via Fine-Grained Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Araujo_2025_CVPR, author = {Araujo, Edson and Rouditchenko, Andrew and Gong, Yuan and Bhati, Saurabhchand and Thomas, Samuel and Kingsbury, Brian and Karlinsky, Leonid and Feris, Rogerio and Glass, James R. and Kuehne, Hilde}, title = {CAV-MAE Sync: Improving Contrastive Audio-Visual Mask Autoencoders via Fine-Grained Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18794-18803} }
COBRA: COmBinatorial Retrieval Augmentation for Few-Shot Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Das_2025_CVPR, author = {Das, Arnav M. and Bhatt, Gantavya and Kumari, Lilly and Verma, Sahil and Bilmes, Jeff}, title = {COBRA: COmBinatorial Retrieval Augmentation for Few-Shot Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20534-20546} }
ProbeSDF: Light Field Probes For Neural Surface Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Toussaint_2025_CVPR, author = {Toussaint, Briac and Thomas, Diego and Franco, Jean-S\'ebastien}, title = {ProbeSDF: Light Field Probes For Neural Surface Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11026-11035} }
Hybrid Concept Bottleneck Models-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yang and Zhang, Tianwei and Gu, Shi}, title = {Hybrid Concept Bottleneck Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20179-20189} }
Dual Consolidation for Pre-Trained Model-Based Domain-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Da-Wei and Cai, Zi-Wen and Ye, Han-Jia and Zhang, Lijun and Zhan, De-Chuan}, title = {Dual Consolidation for Pre-Trained Model-Based Domain-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20547-20557} }
RORem: Training a Robust Object Remover with Human-in-the-Loop-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Ruibin and Yang, Tao and Guo, Song and Zhang, Lei}, title = {RORem: Training a Robust Object Remover with Human-in-the-Loop}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14024-14035} }
All Languages Matter: Evaluating LMMs on Culturally Diverse 100 Languages-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vayani_2025_CVPR, author = {Vayani, Ashmal and Dissanayake, Dinura and Watawana, Hasindri and Ahsan, Noor and Sasikumar, Nevasini and Thawakar, Omkar and Ademtew, Henok Biadglign and Hmaiti, Yahya and Kumar, Amandeep and Kukreja, Kartik and Maslych, Mykola and Al Ghallabi, Wafa and Mihaylov, Mihail Minkov and Qin, Chao and Shaker, Abdelrahman M. and Zhang, Mike and Ihsani, Mahardika Krisna and Esplana, Amiel Gian and Gokani, Monil and Mirkin, Shachar and Singh, Harsh and Srivastava, Ashay and Hamerlik, Endre and Izzati, Fathinah Asma and Maani, Fadillah Adamsyah and Cavada, Sebastian and Chim, Jenny and Gupta, Rohit and Manjunath, Sanjay and Zhumakhanova, Kamila and Rabevohitra, Feno Heriniaina and Amirudin, Azril Hafizi and Ridzuan, Muhammad and Kareem, Daniya Najiha Abdul and More, Ketan Pravin and Li, Kunyang and Shakya, Pramesh and Saad, Muhammad and Ghasemaghaei, Amirpouya and Djanibekov, Amirbek and Azizov, Dilshod and Jankovic, Branislava and Bhatia, Naman and Cabrera, Alvaro and Obando-Ceron, Johan and Otieno, Olympiah and Farestam, Febian and Rabbani, Muztoba and Ballah, Sanoojan and Sanjeev, Santosh and Shtanchaev, Abduragim and Fatima, Maheen and Nguyen, Thao and Kareem, Amrin and Aremu, Toluwani and Xavier, Nathan Augusto Zacarias and Bhatkal, Amit and Toyin, Hawau Olamide and Chadha, Aman and Cholakkal, Hisham and Anwer, Rao Muhammad and Felsberg, Michael and Laaksonen, Jorma and Solorio, Thamar and Choudhury, Monojit and Laptev, Ivan and Shah, Mubarak and Khan, Salman and Khan, Fahad Shahbaz}, title = {All Languages Matter: Evaluating LMMs on Culturally Diverse 100 Languages}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19565-19575} }
Video-Bench: Human-Aligned Video Generation Benchmark-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Hui and Li, Siyuan and Chen, Jiaqi and Yuan, Yiwen and Wu, Yuling and Deng, Yufan and Leong, Chak Tou and Du, Hanwen and Fu, Junchen and Li, Youhua and Zhang, Jie and Zhang, Chi and Li, Li-jia and Ni, Yongxin}, title = {Video-Bench: Human-Aligned Video Generation Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18858-18868} }
MergeVQ: A Unified Framework for Visual Generation and Representation with Disentangled Token Merging and Quantization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Siyuan and Zhang, Luyuan and Wang, Zedong and Tian, Juanxi and Tan, Cheng and Liu, Zicheng and Yu, Chang and Xie, Qingsong and Lu, Haonan and Wang, Haoqian and Lei, Zhen}, title = {MergeVQ: A Unified Framework for Visual Generation and Representation with Disentangled Token Merging and Quantization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19713-19723} }
Anyattack: Towards Large-scale Self-supervised Adversarial Attacks on Vision-language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiaming and Ye, Junhong and Ma, Xingjun and Li, Yige and Yang, Yunfan and Chen, Yunhao and Sang, Jitao and Yeung, Dit-Yan}, title = {Anyattack: Towards Large-scale Self-supervised Adversarial Attacks on Vision-language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19900-19909} }
Joint Optimization of Neural Radiance Fields and Continuous Camera Motion from a Monocular Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Hoang Chuong and Mao, Wei and Alvarez, Jose M. and Liu, Miaomiao}, title = {Joint Optimization of Neural Radiance Fields and Continuous Camera Motion from a Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11472-11481} }
IRGS: Inter-Reflective Gaussian Splatting with 2D Gaussian Ray Tracing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Chun and Wei, Xiaofei and Zeng, Zixuan and Yao, Yuxuan and Zhang, Li}, title = {IRGS: Inter-Reflective Gaussian Splatting with 2D Gaussian Ray Tracing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10943-10952} }
InterMimic: Towards Universal Whole-Body Control for Physics-Based Human-Object Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Sirui and Ling, Hung Yu and Wang, Yu-Xiong and Gui, Liang-Yan}, title = {InterMimic: Towards Universal Whole-Body Control for Physics-Based Human-Object Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12266-12277} }
Efficient Test-time Adaptive Object Detection via Sensitivity-Guided Pruning-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Kunyu and Fu, Xueyang and Lu, Xin and Ge, Chengjie and Cao, Chengzhi and Zhai, Wei and Zha, Zheng-Jun}, title = {Efficient Test-time Adaptive Object Detection via Sensitivity-Guided Pruning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10577-10586} }
A Data-Centric Revisit of Pre-Trained Vision Models for Robot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2025_CVPR, author = {Wen, Xin and Zhao, Bingchen and Chen, Yilun and Pang, Jiangmiao and Qi, Xiaojuan}, title = {A Data-Centric Revisit of Pre-Trained Vision Models for Robot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12143-12154} }
Visual Agentic AI for Spatial Reasoning with a Dynamic API-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Marsili_2025_CVPR, author = {Marsili, Damiano and Agrawal, Rohun and Yue, Yisong and Gkioxari, Georgia}, title = {Visual Agentic AI for Spatial Reasoning with a Dynamic API}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19446-19455} }
Feature Spectrum Learning for Remote Sensing Change Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zang_2025_CVPR, author = {Zang, Qi and Zhao, Dong and Wang, Shuang and Quan, Dou and Zhong, Zhun}, title = {Feature Spectrum Learning for Remote Sensing Change Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12647-12657} }
DriveDreamer4D: World Models Are Effective Data Machines for 4D Driving Scene Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Guosheng and Ni, Chaojun and Wang, Xiaofeng and Zhu, Zheng and Zhang, Xueyang and Wang, Yida and Huang, Guan and Chen, Xinze and Wang, Boyuan and Zhang, Youyi and Mei, Wenjun and Wang, Xingang}, title = {DriveDreamer4D: World Models Are Effective Data Machines for 4D Driving Scene Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12015-12026} }
LoKi: Low-dimensional KAN for Efficient Fine-tuning Image Models-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Xuan and Pan, Renjie and Yang, Hua}, title = {LoKi: Low-dimensional KAN for Efficient Fine-tuning Image Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14869-14880} }
Dr. Splat: Directly Referring 3D Gaussian Splatting via Direct Language Embedding Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jun-Seong_2025_CVPR, author = {Jun-Seong, Kim and Kim, GeonU and Yu-Ji, Kim and Wang, Yu-Chiang Frank and Choe, Jaesung and Oh, Tae-Hyun}, title = {Dr. Splat: Directly Referring 3D Gaussian Splatting via Direct Language Embedding Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14137-14146} }
Consistent Normal Orientation for 3D Point Clouds via Least Squares on Delaunay Graph-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Rao and Zheng, Jianmin and Yu, Liang}, title = {Consistent Normal Orientation for 3D Point Clouds via Least Squares on Delaunay Graph}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16932-16942} }
ATA: Adaptive Transformation Agent for Text-Guided Subject-Position Variable Background Inpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Yizhe and Sun, Zhimin and Du, Yuzhen and Yi, Ran and Lu, Guangben and Hu, Teng and Li, Luying and Ma, Lizhuang and Zou, Fangyuan}, title = {ATA: Adaptive Transformation Agent for Text-Guided Subject-Position Variable Background Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18335-18345} }
Optimizing for the Shortest Path in Denoising Diffusion Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Ping and Zhang, Xingpeng and Liu, Zhaoxiang and Hu, Huan and Liu, Xiang and Wang, Kai and Wang, Min and Qian, Yanlin and Lian, Shiguo}, title = {Optimizing for the Shortest Path in Denoising Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18021-18030} }
Antidote: A Unified Framework for Mitigating LVLM Hallucinations in Counterfactual Presupposition and Object Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Yuanchen and Zhang, Lu and Yao, Hang and Du, Junlong and Yan, Ke and Ding, Shouhong and Wu, Yunsheng and Li, Xiaoqiang}, title = {Antidote: A Unified Framework for Mitigating LVLM Hallucinations in Counterfactual Presupposition and Object Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14646-14656} }
Dynamic Pseudo Labeling via Gradient Cutting for High-Low Entropy Exploration-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Jae Hyeon and Jeon, Joo Hyeon and Lee, Jae Yun and Ahn, Sangyeon and Cha, Min Hee and Kim, Min Geol and Nam, Hyeok and Cho, Sung In}, title = {Dynamic Pseudo Labeling via Gradient Cutting for High-Low Entropy Exploration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20602-20611} }
VODiff: Controlling Object Visibility Order in Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Dong and Jia, Jinyuan and Liu, Yuhao and Ke, Zhanghan and Fu, Hongbo and Lau, Rynson W. H.}, title = {VODiff: Controlling Object Visibility Order in Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18379-18389} }
CAD-Llama: Leveraging Large Language Models for Computer-Aided Design Parametric 3D Model Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiahao and Ma, Weijian and Li, Xueyang and Lou, Yunzhong and Zhou, Guichun and Zhou, Xiangdong}, title = {CAD-Llama: Leveraging Large Language Models for Computer-Aided Design Parametric 3D Model Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18563-18573} }
Leveraging SD Map to Augment HD Map-based Trajectory Prediction-
[pdf]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Zhiwei and Ding, Ran and Li, Wei and Zhang, Peng and Tang, Guobin and Guo, Jia}, title = {Leveraging SD Map to Augment HD Map-based Trajectory Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17219-17228} }
ONDA-Pose: Occlusion-Aware Neural Domain Adaptation for Self-Supervised 6D Object Pose Estimation-
[pdf]
[bibtex]@InProceedings{Tan_2025_CVPR, author = {Tan, Tao and Dong, Qiulei}, title = {ONDA-Pose: Occlusion-Aware Neural Domain Adaptation for Self-Supervised 6D Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16829-16838} }
Q-PART: Quasi-Periodic Adaptive Regression with Test-time Training for Pediatric Left Ventricular Ejection Fraction Regression-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jie and Qin, Tiexin and Liu, Hui and Shi, Yilei and Mou, Lichao and Zhu, Xiao Xiang and Wang, Shiqi and Li, Haoliang}, title = {Q-PART: Quasi-Periodic Adaptive Regression with Test-time Training for Pediatric Left Ventricular Ejection Fraction Regression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15560-15569} }
Composing Parts for Expressive Object Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Rangwani_2025_CVPR, author = {Rangwani, Harsh and Agarwal, Aishwarya and Kulkarni, Kuldeep and Babu, R. Venkatesh and Karanam, Srikrishna}, title = {Composing Parts for Expressive Object Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13209-13219} }
CarPlanner: Consistent Auto-regressive Trajectory Planning for Large-Scale Reinforcement Learning in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Dongkun and Liang, Jiaming and Guo, Ke and Lu, Sha and Wang, Qi and Xiong, Rong and Miao, Zhenwei and Wang, Yue}, title = {CarPlanner: Consistent Auto-regressive Trajectory Planning for Large-Scale Reinforcement Learning in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17239-17248} }
Apply Hierarchical-Chain-of-Generation to Complex Attributes Text-to-3D Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Yiming and Xu, Zhu and Liu, Yang}, title = {Apply Hierarchical-Chain-of-Generation to Complex Attributes Text-to-3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18521-18530} }
SOLVE: Synergy of Language-Vision and End-to-End Networks for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xuesong and Huang, Linjiang and Ma, Tao and Fang, Rongyao and Shi, Shaoshuai and Li, Hongsheng}, title = {SOLVE: Synergy of Language-Vision and End-to-End Networks for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12068-12077} }
Shift the Lens: Environment-Aware Unsupervised Camouflaged Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Ji and Hao, Fangwei and Yu, Mingyang and Kong, Desheng and Wu, Jiesheng and Wang, Bin and Xu, Jing and Li, Ping}, title = {Shift the Lens: Environment-Aware Unsupervised Camouflaged Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19271-19282} }
DriveScape: High-Resolution Driving Video Generation by Multi-View Feature Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Wei and Guo, Xi and Tang, Weixuan and Huang, Tingxuan and Wang, Chiyu and Ding, Chenjing}, title = {DriveScape: High-Resolution Driving Video Generation by Multi-View Feature Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17187-17196} }
Training-free Neural Architecture Search through Variance of Knowledge of Deep Network Weights-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tybl_2025_CVPR, author = {Tybl, Ondrej and Neumann, Lukas}, title = {Training-free Neural Architecture Search through Variance of Knowledge of Deep Network Weights}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14881-14890} }
Every SAM Drop Counts: Embracing Semantic Priors for Multi-Modality Image Fusion and Beyond-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Guanyao and Liu, Haoyu and Fu, Hongming and Peng, Yichuan and Liu, Jinyuan and Fan, Xin and Liu, Risheng}, title = {Every SAM Drop Counts: Embracing Semantic Priors for Multi-Modality Image Fusion and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17882-17891} }
RAEncoder: A Label-Free Reversible Adversarial Examples Encoder for Dataset Intellectual Property Protection-
[pdf]
[supp]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Fan and Tian, Zhuo and Fan, Xuefeng and Zhou, Xiaoyi}, title = {RAEncoder: A Label-Free Reversible Adversarial Examples Encoder for Dataset Intellectual Property Protection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20665-20674} }
LatentHOI: On the Generalizable Hand Object Motion Generation with Latent Hand Diffusion.-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Muchen and Christen, Sammy and Wan, Chengde and Cai, Yujun and Liao, Renjie and Sigal, Leonid and Ma, Shugao}, title = {LatentHOI: On the Generalizable Hand Object Motion Generation with Latent Hand Diffusion.}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17416-17425} }
Chapter-Llama: Efficient Chaptering in Hour-Long Videos with LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Ventura_2025_CVPR, author = {Ventura, Lucas and Yang, Antoine and Schmid, Cordelia and Varol, G\"ul}, title = {Chapter-Llama: Efficient Chaptering in Hour-Long Videos with LLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18947-18958} }
Distribution Prototype Diffusion Learning for Open-set Supervised Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Fuyun and Zhang, Tong and Wang, Yuanzhi and Qiu, Yide and Liu, Xin and Guo, Xu and Cui, Zhen}, title = {Distribution Prototype Diffusion Learning for Open-set Supervised Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20416-20426} }
Full-DoF Egomotion Estimation for Event Cameras Using Geometric Solvers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Ji and Guan, Banglei and Liu, Zibin and Kneip, Laurent}, title = {Full-DoF Egomotion Estimation for Event Cameras Using Geometric Solvers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11515-11524} }
Teaching Large Language Models to Regress Accurate Image Quality Scores Using Score Distribution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2025_CVPR, author = {You, Zhiyuan and Cai, Xin and Gu, Jinjin and Xue, Tianfan and Dong, Chao}, title = {Teaching Large Language Models to Regress Accurate Image Quality Scores Using Score Distribution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14483-14494} }
Your Scale Factors are My Weapon: Targeted Bit-Flip Attacks on Vision Transformers via Scale Factor Manipulation-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jialai and Wu, Yuxiao and Xu, Weiye and Huang, Yating and Zhang, Chao and Li, Zongpeng and Xu, Mingwei and Liang, Zhenkai}, title = {Your Scale Factors are My Weapon: Targeted Bit-Flip Attacks on Vision Transformers via Scale Factor Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20103-20112} }
Marten: Visual Question Answering with Mask Generation for Multi-modal Document Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zining and Guan, Tongkun and Fu, Pei and Duan, Chen and Jiang, Qianyi and Guo, Zhentao and Guo, Shan and Luo, Junfeng and Shen, Wei and Yang, Xiaokang}, title = {Marten: Visual Question Answering with Mask Generation for Multi-modal Document Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14460-14471} }
Mamba-Reg: Vision Mamba Also Needs Registers-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Feng and Wang, Jiahao and Ren, Sucheng and Wei, Guoyizhe and Mei, Jieru and Shao, Wei and Zhou, Yuyin and Yuille, Alan and Xie, Cihang}, title = {Mamba-Reg: Vision Mamba Also Needs Registers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14944-14953} }
Visual Persona: Foundation Model for Full-Body Human Customization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2025_CVPR, author = {Nam, Jisu and Son, Soowon and Xu, Zhan and Shi, Jing and Liu, Difan and Liu, Feng and Kim, Seungryong and Zhou, Yang}, title = {Visual Persona: Foundation Model for Full-Body Human Customization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18630-18641} }
SOGS: Second-Order Anchor for Advanced 3D Gaussian Splatting-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiahui and Zhan, Fangneng and Shao, Ling and Lu, Shijian}, title = {SOGS: Second-Order Anchor for Advanced 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11167-11176} }
MExD: An Expert-Infused Diffusion Model for Whole-Slide Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jianwei and Li, Xin and Yang, Fan and Zhai, Qiang and Luo, Ao and Zhao, Yang and Cheng, Hong and Fu, Huazhu}, title = {MExD: An Expert-Infused Diffusion Model for Whole-Slide Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20789-20799} }
Let Samples Speak: Mitigating Spurious Correlation by Exploiting the Clusterness of Samples-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Weiwei and Liu, Junzhuo and Ren, Yuanyuan and Zheng, Yuchen and Liu, Yahao and Li, Wen}, title = {Let Samples Speak: Mitigating Spurious Correlation by Exploiting the Clusterness of Samples}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15486-15496} }
MoDec-GS: Global-to-Local Motion Decomposition and Temporal Interval Adjustment for Compact Dynamic 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Kwak_2025_CVPR, author = {Kwak, Sangwoon and Kim, Joonsoo and Jeong, Jun Young and Cheong, Won-Sik and Oh, Jihyong and Kim, Munchurl}, title = {MoDec-GS: Global-to-Local Motion Decomposition and Temporal Interval Adjustment for Compact Dynamic 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11338-11348} }
DaCapo: Score Distillation as Stacked Bridge for Fast and High-quality 3D Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yufei and Liao, Bangyan and Hu, Yuqi and Lin, Haitao and Wu, Lirong and Li, Siyuan and Tan, Cheng and Liu, Zicheng and Liu, Yunfan and Zang, Zelin and Yu, Chang and Lei, Zhen}, title = {DaCapo: Score Distillation as Stacked Bridge for Fast and High-quality 3D Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16304-16313} }
Number it: Temporal Grounding Videos like Flipping Manga-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Yongliang and Hu, Xinting and Sun, Yuyang and Zhou, Yizhou and Zhu, Wenbo and Rao, Fengyun and Schiele, Bernt and Yang, Xu}, title = {Number it: Temporal Grounding Videos like Flipping Manga}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13754-13765} }
SyncVP: Joint Diffusion for Synchronous Multi-Modal Video Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pallotta_2025_CVPR, author = {Pallotta, Enrico and Azar, Sina Mokhtarzadeh and Li, Shuai and Zatsarynna, Olga and Gall, Juergen}, title = {SyncVP: Joint Diffusion for Synchronous Multi-Modal Video Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13787-13797} }
HUSH: Holistic Panoramic 3D Scene Understanding using Spherical Harmonics-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Jongsung and Park, Harin and Lee, Byeong-Uk and Joo, Kyungdon}, title = {HUSH: Holistic Panoramic 3D Scene Understanding using Spherical Harmonics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16599-16608} }
SkillMimic: Learning Basketball Interaction Skills from Demonstrations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yinhuai and Zhao, Qihan and Yu, Runyi and Tsui, Hok Wai and Zeng, Ailing and Lin, Jing and Luo, Zhengyi and Yu, Jiwen and Li, Xiu and Chen, Qifeng and Zhang, Jian and Zhang, Lei and Tan, Ping}, title = {SkillMimic: Learning Basketball Interaction Skills from Demonstrations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17540-17549} }
RGBAvatar: Reduced Gaussian Blendshapes for Online Modeling of Head Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Linzhou and Li, Yumeng and Weng, Yanlin and Zheng, Youyi and Zhou, Kun}, title = {RGBAvatar: Reduced Gaussian Blendshapes for Online Modeling of Head Avatars}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10747-10757} }
EEE-Bench: A Comprehensive Multimodal Electrical And Electronics Engineering Benchmark-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Ming and Zhong, Jike and Chen, Tianle and Lai, Yuxiang and Psounis, Konstantinos}, title = {EEE-Bench: A Comprehensive Multimodal Electrical And Electronics Engineering Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13337-13349} }
A Unified Framework for Heterogeneous Semi-supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Heidari_2025_CVPR, author = {Heidari, Marzi and Alchihabi, Abdullah and Yan, Hao and Guo, Yuhong}, title = {A Unified Framework for Heterogeneous Semi-supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15371-15380} }
Free360: Layered Gaussian Splatting for Unbounded 360-Degree View Synthesis from Extremely Sparse and Unposed Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bao_2025_CVPR, author = {Bao, Chong and Zhang, Xiyu and Yu, Zehao and Shi, Jiale and Zhang, Guofeng and Peng, Songyou and Cui, Zhaopeng}, title = {Free360: Layered Gaussian Splatting for Unbounded 360-Degree View Synthesis from Extremely Sparse and Unposed Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16377-16387} }
Open Ad-hoc Categorization with Contextualized Feature Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zilin and Mo, Sangwoo and Yu, Stella X. and Behpour, Sima and Ren, Liu}, title = {Open Ad-hoc Categorization with Contextualized Feature Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15108-15117} }
Dynamic Updates for Language Adaptation in Visual-Language Tracking-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xiaohai and Zhong, Bineng and Liang, Qihua and Mo, Zhiyi and Nong, Jian and Song, Shuxiang}, title = {Dynamic Updates for Language Adaptation in Visual-Language Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19165-19174} }
Multi-focal Conditioned Latent Diffusion for Person Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jiaqi and Zhang, Jichao and Rota, Paolo and Sebe, Nicu}, title = {Multi-focal Conditioned Latent Diffusion for Person Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16019-16028} }
Uncertainty Meets Diversity: A Comprehensive Active Learning Framework for Indoor 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jiangyi and Zhao, Na}, title = {Uncertainty Meets Diversity: A Comprehensive Active Learning Framework for Indoor 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20329-20339} }
Identity-Clothing Similarity Modeling for Unsupervised Clothing Change Person Re-Identification-
[pdf]
[bibtex]@InProceedings{Pang_2025_CVPR, author = {Pang, Zhiqi and Wang, Junjie and Zhao, Lingling and Wang, Chunyu}, title = {Identity-Clothing Similarity Modeling for Unsupervised Clothing Change Person Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19251-19260} }
OccMamba: Semantic Occupancy Prediction with State Space Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Heng and Hou, Yuenan and Xing, Xiaohan and Ma, Yuexin and Sun, Xiao and Zhang, Yanyong}, title = {OccMamba: Semantic Occupancy Prediction with State Space Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11949-11959} }
Cheb-GR: Rethinking K-nearest Neighbor Search in Re-ranking for Person Re-identification-
[pdf]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jinxi and Li, He and Du, Bo and Ye, Mang}, title = {Cheb-GR: Rethinking K-nearest Neighbor Search in Re-ranking for Person Re-identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19261-19270} }
Spotting the Unexpected (STU): A 3D LiDAR Dataset for Anomaly Segmentation in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nekrasov_2025_CVPR, author = {Nekrasov, Alexey and Burdorf, Malcolm and Worrall, Stewart and Leibe, Bastian and Perez, Julie Stephany Berrio}, title = {Spotting the Unexpected (STU): A 3D LiDAR Dataset for Anomaly Segmentation in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11875-11885} }
Is `Right' Right? Enhancing Object Orientation Understanding in Multimodal Large Language Models through Egocentric Instruction Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2025_CVPR, author = {Jung, Ji Hyeok and Kim, Eun Tae and Kim, Seoyeon and Lee, Joo Ho and Kim, Bumsoo and Chang, Buru}, title = {Is `Right' Right? Enhancing Object Orientation Understanding in Multimodal Large Language Models through Egocentric Instruction Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14257-14267} }
GCC: Generative Color Constancy via Diffusing a Color Checker-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2025_CVPR, author = {Chang, Chen-Wei and Fan, Cheng-De and Chang, Chia-Che and Lo, Yi-Chen and Tseng, Yu-Chee and Huang, Jiun-Long and Liu, Yu-Lun}, title = {GCC: Generative Color Constancy via Diffusing a Color Checker}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10868-10878} }
On Denoising Walking Videos for Gait Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Dongyang and Fan, Chao and Ma, Jingzhe and Zhou, Jingkai and Chen, Weihua and Yu, Shiqi}, title = {On Denoising Walking Videos for Gait Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12347-12357} }
Conformal Prediction for Zero-Shot Models-
[pdf]
[supp]
[bibtex]@InProceedings{Silva-Rodriguez_2025_CVPR, author = {Silva-Rodr{\'\i}guez, Julio and Ben Ayed, Ismail and Dolz, Jose}, title = {Conformal Prediction for Zero-Shot Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19931-19941} }
PhysAnimator: Physics-Guided Generative Cartoon Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Tianyi and Zhao, Yiwei and Jiang, Ying and Jiang, Chenfanfu}, title = {PhysAnimator: Physics-Guided Generative Cartoon Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10793-10804} }
FIMA-Q: Post-Training Quantization for Vision Transformers by Fisher Information Matrix Approximation-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Zhuguanyu and Wang, Shihe and Zhang, Jiayi and Chen, Jiaxin and Wang, Yunhong}, title = {FIMA-Q: Post-Training Quantization for Vision Transformers by Fisher Information Matrix Approximation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14891-14900} }
BACON: Improving Clarity of Image Captions via Bag-of-Concept Graphs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Zhantao and Feng, Ruili and Yan, Keyu and Wang, Huangji and Wang, Zhicai and Zhu, Shangwen and Zhang, Han and Xiao, Jie and Wu, Pingyu and Zhu, Kai and Chen, Jixuan and Xie, Chen-Wei and Yang, Yue and Zhang, Hongyang and Liu, Yu and Cheng, Fan}, title = {BACON: Improving Clarity of Image Captions via Bag-of-Concept Graphs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14380-14389} }
VasTSD: Learning 3D Vascular Tree-state Space Diffusion Model for Angiography Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhifeng and Yi, Renjiao and Wen, Xin and Zhu, Chenyang and Xu, Kai}, title = {VasTSD: Learning 3D Vascular Tree-state Space Diffusion Model for Angiography Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15693-15702} }
PanSplat: 4K Panorama Synthesis with Feed-Forward Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Cheng and Xu, Haofei and Wu, Qianyi and Gambardella, Camilo Cruz and Phung, Dinh and Cai, Jianfei}, title = {PanSplat: 4K Panorama Synthesis with Feed-Forward Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11437-11447} }
WISNet: Pseudo Label Generation on Unbalanced and Patch Annotated Waste Images-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shifan and Zhu, Hongzi and He, Yinan and Guo, Minyi and Lou, Ziyang and Chang, Shan}, title = {WISNet: Pseudo Label Generation on Unbalanced and Patch Annotated Waste Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15076-15085} }
MixerMDM: Learnable Composition of Human Motion Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ruiz-Ponce_2025_CVPR, author = {Ruiz-Ponce, Pablo and Barquero, German and Palmero, Cristina and Escalera, Sergio and Garc{\'\i}a-Rodr{\'\i}guez, Jos\'e}, title = {MixerMDM: Learnable Composition of Human Motion Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12380-12390} }
Hand-held Object Reconstruction from RGB Video with Dynamic Interaction-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Shijian and Ye, Qi and Xie, Rengan and Huo, Yuchi and Chen, Jiming}, title = {Hand-held Object Reconstruction from RGB Video with Dynamic Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12220-12230} }
AudCast: Audio-Driven Human Video Generation by Cascaded Diffusion Transformers-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guan_2025_CVPR, author = {Guan, Jiazhi and Wang, Kaisiyuan and Xu, Zhiliang and Yang, Quanwei and Sun, Yasheng and He, Shengyi and Liang, Borong and Cao, Yukang and Li, Yingying and Feng, Haocheng and Ding, Errui and Wang, Jingdong and Zhao, Youjian and Zhou, Hang and Liu, Ziwei}, title = {AudCast: Audio-Driven Human Video Generation by Cascaded Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10678-10689} }
Thinking in Space: How Multimodal Large Language Models See, Remember, and Recall Spaces-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jihan and Yang, Shusheng and Gupta, Anjali W. and Han, Rilyn and Fei-Fei, Li and Xie, Saining}, title = {Thinking in Space: How Multimodal Large Language Models See, Remember, and Recall Spaces}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10632-10643} }
A Stitch in Time Saves Nine: Small VLM is a Precise Guidance for Accelerating Large VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Wangbo and Han, Yizeng and Tang, Jiasheng and Li, Zhikai and Song, Yibing and Wang, Kai and Wang, Zhangyang and You, Yang}, title = {A Stitch in Time Saves Nine: Small VLM is a Precise Guidance for Accelerating Large VLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19814-19824} }
SemanticDraw: Towards Real-Time Interactive Content Creation from Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Jaerin and Jung, Daniel Sungho and Lee, Kanggeon and Lee, Kyoung Mu}, title = {SemanticDraw: Towards Real-Time Interactive Content Creation from Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13021-13030} }
Arc2Avatar: Generating Expressive 3D Avatars from a Single Image via ID Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gerogiannis_2025_CVPR, author = {Gerogiannis, Dimitrios and Papantoniou, Foivos Paraperas and Potamias, Rolandos Alexandros and Lattas, Alexandros and Zafeiriou, Stefanos}, title = {Arc2Avatar: Generating Expressive 3D Avatars from a Single Image via ID Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10770-10782} }
Seeing Speech and Sound: Distinguishing and Locating Audio Sources in Visual Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Ryu_2025_CVPR, author = {Ryu, Hyeonggon and Kim, Seongyu and Chung, Joon Son and Senocak, Arda}, title = {Seeing Speech and Sound: Distinguishing and Locating Audio Sources in Visual Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13540-13549} }
Structure from Collision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kaneko_2025_CVPR, author = {Kaneko, Takuhiro}, title = {Structure from Collision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16314-16324} }
Crab: A Unified Audio-Visual Scene Understanding Model with Explicit Cooperation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Henghui and Li, Guangyao and Zhou, Chang and Zhang, Chunjie and Zhao, Alan and Hu, Di}, title = {Crab: A Unified Audio-Visual Scene Understanding Model with Explicit Cooperation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18804-18814} }
Nullu: Mitigating Object Hallucinations in Large Vision-Language Models via HalluSpace Projection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Le and Zheng, Ziwei and Chen, Boxu and Zhao, Zhengyu and Lin, Chenhao and Shen, Chao}, title = {Nullu: Mitigating Object Hallucinations in Large Vision-Language Models via HalluSpace Projection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14635-14645} }
OralXrays-9: Towards Hospital-Scale Panoramic X-ray Anomaly Detection via Personalized Multi-Object Query-Aware Mining-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Bingzhi and Fu, Sisi and Fang, Xiaocheng and Cai, Jieyi and Zhang, Boya and Lu, Minhua and Liu, Yishu}, title = {OralXrays-9: Towards Hospital-Scale Panoramic X-ray Anomaly Detection via Personalized Multi-Object Query-Aware Mining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15570-15579} }
SplatAD: Real-Time Lidar and Camera Rendering with 3D Gaussian Splatting for Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Hess_2025_CVPR, author = {Hess, Georg and Lindstr\"om, Carl and Fatemi, Maryam and Petersson, Christoffer and Svensson, Lennart}, title = {SplatAD: Real-Time Lidar and Camera Rendering with 3D Gaussian Splatting for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11982-11992} }
Audio-Visual Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Ruohao and Ying, Xianghua and Chen, Yaru and Niu, Dantong and Li, Guangyao and Qu, Liao and Qi, Yanyu and Zhou, Jinxing and Xing, Bowei and Yue, Wenzhen and Shi, Ji and Wang, Qixun and Zhang, Peiliang and Liang, Buwen}, title = {Audio-Visual Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13550-13560} }
UniHOPE: A Unified Approach for Hand-Only and Hand-Object Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yinqiao and Xu, Hao and Heng, Pheng-Ann and Fu, Chi-Wing}, title = {UniHOPE: A Unified Approach for Hand-Only and Hand-Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12231-12241} }
RL-RC-DoT: A Block-level RL agent for Task-Aware Video Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Gadot_2025_CVPR, author = {Gadot, Uri and Shocher, Assaf and Mannor, Shie and Chechik, Gal and Hallak, Assaf}, title = {RL-RC-DoT: A Block-level RL agent for Task-Aware Video Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12533-12542} }
Recognition-Synergistic Scene Text Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Zhengyao and Lyu, Pengyuan and Wu, Jingjing and Zhang, Chengquan and Yu, Jun and Lu, Guangming and Pei, Wenjie}, title = {Recognition-Synergistic Scene Text Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13104-13113} }
WildAvatar: Learning In-the-wild 3D Avatars from the Web-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zihao and Hu, Shoukang and Wang, Guangcong and Liu, Tianqi and Zang, Yuhang and Cao, Zhiguo and Li, Wei and Liu, Ziwei}, title = {WildAvatar: Learning In-the-wild 3D Avatars from the Web}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15963-15975} }
Rectified Diffusion Guidance for Conditional Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Mengfei and Xue, Nan and Shen, Yujun and Yi, Ran and Gong, Tieliang and Liu, Yong-Jin}, title = {Rectified Diffusion Guidance for Conditional Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13371-13380} }
IAAO: Interactive Affordance Learning for Articulated Objects in 3D Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Can and Lee, Gim Hee}, title = {IAAO: Interactive Affordance Learning for Articulated Objects in 3D Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12132-12142} }
RaSS: Improving Denoising Diffusion Samplers with Reinforced Active Sampling Scheduler-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2025_CVPR, author = {Ding, Xin and Yu, Lei and Li, Xin and Tu, Zhijun and Chen, Hanting and Hu, Jie and Chen, Zhibo}, title = {RaSS: Improving Denoising Diffusion Samplers with Reinforced Active Sampling Scheduler}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12923-12933} }
OSV: One Step is Enough for High-Quality Image to Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2025_CVPR, author = {Mao, Xiaofeng and Jiang, Zhengkai and Wang, Fu-yun and Zhang, Jiangning and Chen, Hao and Chi, Mingmin and Wang, Yabiao and Luo, Wenhan}, title = {OSV: One Step is Enough for High-Quality Image to Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12585-12594} }
Fuzzy Multimodal Learning for Trusted Cross-modal Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2025_CVPR, author = {Duan, Siyuan and Sun, Yuan and Peng, Dezhong and Liu, Zheng and Song, Xiaomin and Hu, Peng}, title = {Fuzzy Multimodal Learning for Trusted Cross-modal Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20747-20756} }
GUI-Xplore: Empowering Generalizable GUI Agents with One Exploration-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Yuchen and Zhao, Shanhui and Yu, Tao and Wen, Hao and Va, Samith and Xu, Mengwei and Li, Yuanchun and Zhang, Chongyang}, title = {GUI-Xplore: Empowering Generalizable GUI Agents with One Exploration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19477-19486} }
Few-Shot Recognition via Stage-Wise Retrieval-Augmented Finetuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Tian and Zhang, Huixin and Parashar, Shubham and Kong, Shu}, title = {Few-Shot Recognition via Stage-Wise Retrieval-Augmented Finetuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15086-15097} }
RestorGS: Depth-aware Gaussian Splatting for Efficient 3D Scene Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Qiao_2025_CVPR, author = {Qiao, Yuanjian and Shao, Mingwen and Meng, Lingzhuang and Xu, Kai}, title = {RestorGS: Depth-aware Gaussian Splatting for Efficient 3D Scene Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11177-11186} }
4Real-Video: Learning Generalizable Photo-Realistic 4D Video Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Chaoyang and Zhuang, Peiye and Ngo, Tuan Duc and Menapace, Willi and Siarohin, Aliaksandr and Vasilkovsky, Michael and Skorokhodov, Ivan and Tulyakov, Sergey and Wonka, Peter and Lee, Hsin-Ying}, title = {4Real-Video: Learning Generalizable Photo-Realistic 4D Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17723-17732} }
Z-Magic: Zero-shot Multiple Attributes Guided Image Creator-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Yingying and He, Xiangyu and Tang, Fan and Dong, Weiming}, title = {Z-Magic: Zero-shot Multiple Attributes Guided Image Creator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18390-18400} }
On the Zero-shot Adversarial Robustness of Vision-Language Models: A Truly Zero-shot and Training-free Approach-
[pdf]
[bibtex]@InProceedings{Tong_2025_CVPR, author = {Tong, Baoshun and Lai, Hanjiang and Pan, Yan and Yin, Jian}, title = {On the Zero-shot Adversarial Robustness of Vision-Language Models: A Truly Zero-shot and Training-free Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19921-19930} }
Towards General Visual-Linguistic Face Forgery Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Ke and Chen, Shen and Yao, Taiping and Zhou, Ziyin and Ji, Jiayi and Sun, Xiaoshuai and Lin, Chia-Wen and Ji, Rongrong}, title = {Towards General Visual-Linguistic Face Forgery Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19576-19586} }
Movie Weaver: Tuning-Free Multi-Concept Video Personalization with Anchored Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Feng and Ma, Haoyu and He, Zecheng and Hou, Tingbo and Hou, Ji and Li, Kunpeng and Dai, Xiaoliang and Juefei-Xu, Felix and Azadi, Samaneh and Sinha, Animesh and Zhang, Peizhao and Vajda, Peter and Marculescu, Diana}, title = {Movie Weaver: Tuning-Free Multi-Concept Video Personalization with Anchored Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13146-13156} }
LongVALE: Vision-Audio-Language-Event Benchmark Towards Time-Aware Omni-Modal Perception of Long Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Geng_2025_CVPR, author = {Geng, Tiantian and Zhang, Jinrui and Wang, Qingni and Wang, Teng and Duan, Jinming and Zheng, Feng}, title = {LongVALE: Vision-Audio-Language-Event Benchmark Towards Time-Aware Omni-Modal Perception of Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18959-18969} }
Mitigating Hallucinations in Large Vision-Language Models via DPO: On-Policy Data Hold the Key-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Zhihe and Luo, Xufang and Han, Dongqi and Xu, Yunjian and Li, Dongsheng}, title = {Mitigating Hallucinations in Large Vision-Language Models via DPO: On-Policy Data Hold the Key}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10610-10620} }
Simpler Diffusion: 1.5 FID on ImageNet512 with Pixel-space Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Hoogeboom_2025_CVPR, author = {Hoogeboom, Emiel and Mensink, Thomas and Heek, Jonathan and Lamerigts, Kay and Gao, Ruiqi and Salimans, Tim}, title = {Simpler Diffusion: 1.5 FID on ImageNet512 with Pixel-space Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18062-18071} }
STING-BEE: Towards Vision-Language Model for Real-World X-ray Baggage Security Inspection-
[pdf]
[supp]
[bibtex]@InProceedings{Velayudhan_2025_CVPR, author = {Velayudhan, Divya and Ahmed, Abdelfatah and Alansari, Mohamad and Gour, Neha and Behouch, Abderaouf and Hassan, Taimur and Wasim, Syed Talal and Maalej, Nabil and Naseer, Muzammal and Gall, Juergen and Bennamoun, Mohammed and Damiani, Ernesto and Werghi, Naoufel}, title = {STING-BEE: Towards Vision-Language Model for Real-World X-ray Baggage Security Inspection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20767-20777} }
Not All Parameters Matter: Masking Diffusion Models for Enhancing Generation Ability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Lei and Li, Senmao and Yang, Fei and Wang, Jianye and Zhang, Ziheng and Liu, Yuhan and Wang, Yaxing and Yang, Jian}, title = {Not All Parameters Matter: Masking Diffusion Models for Enhancing Generation Ability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12880-12890} }
Complexity Experts are Task-Discriminative Learners for Any Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zamfir_2025_CVPR, author = {Zamfir, Eduard and Wu, Zongwei and Mehta, Nancy and Tan, Yuedong and Paudel, Danda Pani and Zhang, Yulun and Timofte, Radu}, title = {Complexity Experts are Task-Discriminative Learners for Any Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12753-12763} }
Generative Omnimatte: Learning to Decompose Video into Layers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Yao-Chih and Lu, Erika and Rumbley, Sarah and Geyer, Michal and Huang, Jia-Bin and Dekel, Tali and Cole, Forrester}, title = {Generative Omnimatte: Learning to Decompose Video into Layers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12522-12532} }
5%>100%: Breaking Performance Shackles of Full Fine-Tuning on Visual Recognition Tasks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Dongshuo and Hu, Leiyi and Li, Bin and Zhang, Youqun and Yang, Xue}, title = {5\%\ensuremath{>}100\%: Breaking Performance Shackles of Full Fine-Tuning on Visual Recognition Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20071-20081} }
Real-IAD D3: A Real-World 2D/Pseudo-3D/3D Dataset for Industrial Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Wenbing and Wang, Lidong and Zhou, Ziqing and Wang, Chengjie and Pan, Yurui and Zhang, Ruoyi and Chen, Zhuhao and Cheng, Linjie and Gao, Bin-Bin and Zhang, Jiangning and Gan, Zhenye and Wang, Yuxie and Chen, Yulong and Qian, Shuguang and Chi, Mingmin and Peng, Bo and Ma, Lizhuang}, title = {Real-IAD D3: A Real-World 2D/Pseudo-3D/3D Dataset for Industrial Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15214-15223} }
ATP: Adaptive Threshold Pruning for Efficient Data Encoding in Quantum Neural Networks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Afane_2025_CVPR, author = {Afane, Mohamed and Ebbrecht, Gabrielle and Wang, Ying and Chen, Juntao and Farooq, Junaid}, title = {ATP: Adaptive Threshold Pruning for Efficient Data Encoding in Quantum Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20427-20436} }
Decoupled Motion Expression Video Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Hao and Cong, Runmin and Lu, Xiankai and Zhou, Xiaofei and Kwong, Sam and Zhang, Wei}, title = {Decoupled Motion Expression Video Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13821-13831} }
K-LoRA: Unlocking Training-Free Fusion of Any Subject and Style LoRAs-
[pdf]
[supp]
[bibtex]@InProceedings{Ouyang_2025_CVPR, author = {Ouyang, Ziheng and Li, Zhen and Hou, Qibin}, title = {K-LoRA: Unlocking Training-Free Fusion of Any Subject and Style LoRAs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13041-13050} }
WF-VAE: Enhancing Video VAE by Wavelet-Driven Energy Flow for Latent Video Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zongjian and Lin, Bin and Ye, Yang and Chen, Liuhan and Cheng, Xinhua and Yuan, Shenghai and Yuan, Li}, title = {WF-VAE: Enhancing Video VAE by Wavelet-Driven Energy Flow for Latent Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17778-17788} }
XLRS-Bench: Could Your Multimodal LLMs Understand Extremely Large Ultra-High-Resolution Remote Sensing Imagery?-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Fengxiang and Wang, Hongzhen and Guo, Zonghao and Wang, Di and Wang, Yulin and Chen, Mingshuo and Ma, Qiang and Lan, Long and Yang, Wenjing and Zhang, Jing and Liu, Zhiyuan and Sun, Maosong}, title = {XLRS-Bench: Could Your Multimodal LLMs Understand Extremely Large Ultra-High-Resolution Remote Sensing Imagery?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14325-14336} }
Efficient Data Driven Mixture-of-Expert Extraction from Trained Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Berisha_2025_CVPR, author = {Berisha, Uranik and Mehnert, Jens and Condurache, Alexandru Paul}, title = {Efficient Data Driven Mixture-of-Expert Extraction from Trained Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20082-20091} }
StyleSSP: Sampling StartPoint Enhancement for Training-free Diffusion-based Method for Style Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Ruojun and Xi, Weijie and Wang, XiaoDi and Mao, Yongbo and Cheng, Zach}, title = {StyleSSP: Sampling StartPoint Enhancement for Training-free Diffusion-based Method for Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18260-18269} }
Motions as Queries: One-Stage Multi-Person Holistic Human Motion Capture-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Kenkun and Fu, Yurong and Yuan, Weihao and Lin, Jing and Li, Peihao and Gu, Xiaodong and Qiu, Lingteng and Wang, Haoqian and Dong, Zilong and Han, Xiaoguang}, title = {Motions as Queries: One-Stage Multi-Person Holistic Human Motion Capture}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17529-17539} }
AMO Sampler: Enhancing Text Rendering with Overshooting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Xixi and Xu, Keyang and Liu, Bo and Liu, Qiang and Fei, Hongliang}, title = {AMO Sampler: Enhancing Text Rendering with Overshooting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13157-13166} }
ImViD: Immersive Volumetric Videos for Enhanced VR Engagement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Zhengxian and Pan, Shi and Wang, Shengqi and Wang, Haoxiang and Lin, Li and Li, Guanjun and Wen, Zhengqi and Lin, Borong and Tao, Jianhua and Yu, Tao}, title = {ImViD: Immersive Volumetric Videos for Enhanced VR Engagement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16554-16564} }
I2VGuard: Safeguarding Images against Misuse in Diffusion-based Image-to-Video Models-
[pdf]
[supp]
[bibtex]@InProceedings{Gui_2025_CVPR, author = {Gui, Dongnan and Guo, Xun and Zhou, Wengang and Lu, Yan}, title = {I2VGuard: Safeguarding Images against Misuse in Diffusion-based Image-to-Video Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12595-12604} }
Saliuitl: Ensemble Salience Guided Recovery of Adversarial Patches against CNNs-
[pdf]
[supp]
[bibtex]@InProceedings{Victorica_2025_CVPR, author = {Victorica, Mauricio Byrd and D\'an, Gy\"orgy and Sandberg, Henrik}, title = {Saliuitl: Ensemble Salience Guided Recovery of Adversarial Patches against CNNs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20360-20369} }
OPTICAL: Leveraging Optimal Transport for Contribution Allocation in Dataset Distillation-
[pdf]
[bibtex]@InProceedings{Cui_2025_CVPR, author = {Cui, Xiao and Qin, Yulei and Zhou, Wengang and Li, Hongsheng and Li, Houqiang}, title = {OPTICAL: Leveraging Optimal Transport for Contribution Allocation in Dataset Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15245-15254} }
Show and Segment: Universal Medical Image Segmentation via In-Context Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Yunhe and Liu, Di and Li, Zhuowei and Li, Yunsheng and Chen, Dongdong and Zhou, Mu and Metaxas, Dimitris N.}, title = {Show and Segment: Universal Medical Image Segmentation via In-Context Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20830-20840} }
CADCrafter: Generating Computer-Aided Design Models from Unconstrained Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Cheng and Wei, Jiacheng and Chen, Tianrun and Zhang, Chi and Yang, Xiaofeng and Zhang, Shangzhan and Yang, Bingchen and Foo, Chuan-Sheng and Lin, Guosheng and Huang, Qixing and Liu, Fayao}, title = {CADCrafter: Generating Computer-Aided Design Models from Unconstrained Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11073-11082} }
Generative Multiview Relighting for 3D Reconstruction under Extreme Illumination Variation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alzayer_2025_CVPR, author = {Alzayer, Hadi and Henzler, Philipp and Barron, Jonathan T. and Huang, Jia-Bin and Srinivasan, Pratul P. and Verbin, Dor}, title = {Generative Multiview Relighting for 3D Reconstruction under Extreme Illumination Variation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10933-10942} }
DyMO: Training-Free Diffusion Model Alignment with Dynamic Multi-Objective Scheduling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Xin and Gong, Dong}, title = {DyMO: Training-Free Diffusion Model Alignment with Dynamic Multi-Objective Scheduling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13220-13230} }
GENIUS: A Generative Framework for Universal Multimodal Search-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Sungyeon and Zhu, Xinliang and Lin, Xiaofan and Bastan, Muhammet and Gray, Douglas and Kwak, Suha}, title = {GENIUS: A Generative Framework for Universal Multimodal Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19659-19669} }
SF3D: Stable Fast 3D Mesh Reconstruction with UV-unwrapping and Illumination Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Boss_2025_CVPR, author = {Boss, Mark and Huang, Zixuan and Vasishta, Aaryaman and Jampani, Varun}, title = {SF3D: Stable Fast 3D Mesh Reconstruction with UV-unwrapping and Illumination Disentanglement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16240-16250} }
Towards Precise Embodied Dialogue Localization via Causality Guided Diffusion-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Haoyu and Wang, Le and Zhou, Sanping and Tian, Jingyi and Qin, Zheng and Wang, Yabing and Hua, Gang and Tang, Wei}, title = {Towards Precise Embodied Dialogue Localization via Causality Guided Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13350-13360} }
EfficientViM: Efficient Vision Mamba with Hidden State Mixer based State Space Duality-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Sanghyeok and Choi, Joonmyung and Kim, Hyunwoo J.}, title = {EfficientViM: Efficient Vision Mamba with Hidden State Mixer based State Space Duality}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14923-14933} }
A4A: Adapter for Adapter Transfer via All-for-All Mapping for Cross-Architecture Models-
[pdf]
[supp]
[bibtex]@InProceedings{Tu_2025_CVPR, author = {Tu, Keyu and Huang, Mengqi and Chen, Zhuowei and Mao, Zhendong}, title = {A4A: Adapter for Adapter Transfer via All-for-All Mapping for Cross-Architecture Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18476-18485} }
ViCaS: A Dataset for Combining Holistic and Pixel-level Video Understanding using Captions with Grounded Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Athar_2025_CVPR, author = {Athar, Ali and Deng, Xueqing and Chen, Liang-Chieh}, title = {ViCaS: A Dataset for Combining Holistic and Pixel-level Video Understanding using Captions with Grounded Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19023-19035} }
A Universal Scale-Adaptive Deformable Transformer for Image Restoration across Diverse Artifacts-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Xuyi and Quan, Yuhui and Xu, Ruotao and Ji, Hui}, title = {A Universal Scale-Adaptive Deformable Transformer for Image Restoration across Diverse Artifacts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12731-12741} }
Towards Precise Scaling Laws for Video Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Yuanyang and Zhao, Yaqi and Zheng, Mingwu and Lin, Ke and Ou, Jiarong and Chen, Rui and Huang, Victor Shea-Jay and Wang, Jiahao and Tao, Xin and Wan, Pengfei and Zhang, Di and Yin, Baoqun and Zhang, Wentao and Gai, Kun}, title = {Towards Precise Scaling Laws for Video Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18155-18165} }
SPMTrack: Spatio-Temporal Parameter-Efficient Fine-Tuning with Mixture of Experts for Scalable Visual Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Wenrui and Liu, Qingjie and Wang, Yunhong}, title = {SPMTrack: Spatio-Temporal Parameter-Efficient Fine-Tuning with Mixture of Experts for Scalable Visual Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16871-16881} }
AnyCam: Learning to Recover Camera Poses and Intrinsics from Casual Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wimbauer_2025_CVPR, author = {Wimbauer, Felix and Chen, Weirong and Muhle, Dominik and Rupprecht, Christian and Cremers, Daniel}, title = {AnyCam: Learning to Recover Camera Poses and Intrinsics from Casual Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16717-16727} }
Pixel-aligned RGB-NIR Stereo Imaging and Dataset for Robot Vision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Jinnyeong and Baek, Seung-Hwan}, title = {Pixel-aligned RGB-NIR Stereo Imaging and Dataset for Robot Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11482-11492} }
Can Machines Understand Composition? Dataset and Benchmark for Photographic Image Composition Embedding and Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zhaoran and Lu, Peng and Zhang, Anran and Li, Peipei and Li, Xia and Liu, Xuannan and Hu, Yang and Chen, Shiyi and Wang, Liwei and Guo, Wenhao}, title = {Can Machines Understand Composition? Dataset and Benchmark for Photographic Image Composition Embedding and Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14411-14421} }
Towards Efficient Foundation Model for Zero-shot Amodal Segmentation-
[pdf]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhaochen and Qiao, Limeng and Chu, Xiangxiang and Ma, Lin and Jiang, Tingting}, title = {Towards Efficient Foundation Model for Zero-shot Amodal Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20254-20264} }
Scaling Properties of Diffusion Models For Perceptual Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ravishankar_2025_CVPR, author = {Ravishankar, Rahul and Patel, Zeeshan and Rajasegaran, Jathushan and Malik, Jitendra}, title = {Scaling Properties of Diffusion Models For Perceptual Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12945-12954} }
Exact: Exploring Space-Time Perceptive Clues for Weakly Supervised Satellite Image Time Series Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Hao and Zhu, Yan and Xiao, Jiayu and Xiao, Tianxiang and Ma, Yike and Zhang, Yucheng and Dai, Feng}, title = {Exact: Exploring Space-Time Perceptive Clues for Weakly Supervised Satellite Image Time Series Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14036-14045} }
PolarFree: Polarization-based Reflection-Free Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_CVPR, author = {Yao, Mingde and Wang, Menglu and Tam, King-Man and Li, Lingen and Xue, Tianfan and Gu, Jinwei}, title = {PolarFree: Polarization-based Reflection-Free Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10890-10899} }
Seeking Consistent Flat Minima for Better Domain Generalization via Refining Loss Landscapes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Aodi and Zhuang, Liansheng and Long, Xiao and Yao, Minghong and Wang, Shafei}, title = {Seeking Consistent Flat Minima for Better Domain Generalization via Refining Loss Landscapes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15349-15359} }
MultimodalStudio: A Heterogeneous Sensor Dataset and Framework for Neural Rendering across Multiple Imaging Modalities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lincetto_2025_CVPR, author = {Lincetto, Federico and Agresti, Gianluca and Rossi, Mattia and Zanuttigh, Pietro}, title = {MultimodalStudio: A Heterogeneous Sensor Dataset and Framework for Neural Rendering across Multiple Imaging Modalities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10964-10973} }
MuTri: Multi-view Tri-alignment for OCT to OCTA 3D Image Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zhuangzhuang and Wang, Hualiang and Ou, Chubin and Li, Xiaomeng}, title = {MuTri: Multi-view Tri-alignment for OCT to OCTA 3D Image Translation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20885-20894} }
Image Quality Assessment: Investigating Causal Perceptual Effects with Abductive Counterfactual Inference-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Wenhao and Zhou, Mingliang and Chen, Yu and Wei, Xuekai and Feng, Yong and Pu, Huayan and Jia, Weijia}, title = {Image Quality Assessment: Investigating Causal Perceptual Effects with Abductive Counterfactual Inference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17990-17999} }
Pos3R: 6D Pose Estimation for Unseen Objects Made Easy-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Weijian and Campbell, Dylan and Sun, Chunyi and Zhang, Jiahao and Kanitkar, Shubham and Shaffer, Matt E. and Gould, Stephen}, title = {Pos3R: 6D Pose Estimation for Unseen Objects Made Easy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16818-16828} }
RaCFormer: Towards High-Quality 3D Object Detection via Query-based Radar-Camera Fusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chu_2025_CVPR, author = {Chu, Xiaomeng and Deng, Jiajun and You, Guoliang and Duan, Yifan and Li, Houqiang and Zhang, Yanyong}, title = {RaCFormer: Towards High-Quality 3D Object Detection via Query-based Radar-Camera Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17081-17091} }
Understanding Multi-Task Activities from Single-Task Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Yuhan and Elhamifar, Ehsan}, title = {Understanding Multi-Task Activities from Single-Task Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19120-19131} }
Co-Speech Gesture Video Generation with Implicit Motion-Audio Entanglement-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xinjie and Chen, Ziyi and Yu, Xinlu and Chu, Iek-Heng and Chang, Peng and Xiao, Jing}, title = {Co-Speech Gesture Video Generation with Implicit Motion-Audio Entanglement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11384-11394} }
TransPixeler: Advancing Text-to-Video Generation with Transparency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Luozhou and Li, Yijun and Chen, Zhifei and Wang, Jui-Hsien and Zhang, Zhifei and Zhang, He and Lin, Zhe and Chen, Ying-Cong}, title = {TransPixeler: Advancing Text-to-Video Generation with Transparency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18229-18239} }
What's in the Image? A Deep-Dive into the Vision of Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Kaduri_2025_CVPR, author = {Kaduri, Omri and Bagon, Shai and Dekel, Tali}, title = {What's in the Image? A Deep-Dive into the Vision of Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14549-14558} }
FreeSim: Toward Free-viewpoint Camera Simulation in Driving Scenes-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Lue and Zhang, Hao and Wang, Qitai and Li, Hongsheng and Zhang, Zhaoxiang}, title = {FreeSim: Toward Free-viewpoint Camera Simulation in Driving Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12004-12014} }
Seq2Time: Sequential Knowledge Transfer for Video LLM Temporal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Andong and Gao, Zhongpai and Choudhuri, Anwesa and Planche, Benjamin and Zheng, Meng and Wang, Bin and Chen, Terrence and Chen, Chen and Wu, Ziyan}, title = {Seq2Time: Sequential Knowledge Transfer for Video LLM Temporal Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13766-13775} }
GPVK-VL: Geometry-Preserving Virtual Keyframes for Visual Localization under Large Viewpoint Changes-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yunxuan and Fan, Lei and Xing, Xiaoying and Zhou, Jianxiong and Wu, Ying}, title = {GPVK-VL: Geometry-Preserving Virtual Keyframes for Visual Localization under Large Viewpoint Changes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16728-16738} }
Enhancing Online Continual Learning with Plug-and-Play State Space Model and Class-Conditional Mixture of Discretization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Sihao and Yang, Yibo and Li, Xiaojie and Clifton, David A. and Ghanem, Bernard}, title = {Enhancing Online Continual Learning with Plug-and-Play State Space Model and Class-Conditional Mixture of Discretization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20502-20511} }
GRAPHGPT-O: Synergistic Multimodal Comprehension and Generation on Graphs-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Yi and Jin, Bowen and Shen, Jiacheng and Ding, Sirui and Tan, Qiaoyu and Han, Jiawei}, title = {GRAPHGPT-O: Synergistic Multimodal Comprehension and Generation on Graphs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19467-19476} }
Model Poisoning Attacks to Federated Learning via Multi-Round Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Yueqi and Fang, Minghong and Gong, Neil Zhenqiang}, title = {Model Poisoning Attacks to Federated Learning via Multi-Round Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15454-15463} }
TaoAvatar: Real-Time Lifelike Full-Body Talking Avatars for Augmented Reality via 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Jianchuan and Hu, Jingchuan and Wang, Gaige and Jiang, Zhonghua and Zhou, Tiansong and Chen, Zhiwen and Lv, Chengfei}, title = {TaoAvatar: Real-Time Lifelike Full-Body Talking Avatars for Augmented Reality via 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10723-10734} }
Stacking Brick by Brick: Aligned Feature Isolation for Incremental Face Forgery Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Jikang and Yan, Zhiyuan and Zhang, Ying and Hao, Li and Ai, Jiaxin and Zou, Qin and Li, Chen and Wang, Zhongyuan}, title = {Stacking Brick by Brick: Aligned Feature Isolation for Incremental Face Forgery Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13927-13936} }
CO-SPY: Combining Semantic and Pixel Features to Detect Synthetic Images by AI-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Siyuan and Lyu, Lingjuan and Wang, Zhenting and Zhang, Xiangyu and Sehwag, Vikash}, title = {CO-SPY: Combining Semantic and Pixel Features to Detect Synthetic Images by AI}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13455-13465} }
GENMANIP: LLM-driven Simulation for Generalizable Instruction-Following Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Ning and Chen, Yilun and Yang, Shuai and Chen, Xinyi and Tian, Yang and Li, Hao and Huang, Haifeng and Wang, Hanqing and Wang, Tai and Pang, Jiangmiao}, title = {GENMANIP: LLM-driven Simulation for Generalizable Instruction-Following Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12187-12198} }
Localized Concept Erasure for Text-to-Image Diffusion Models Using Training-Free Gated Low-Rank Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Byung Hyun and Lim, Sungjin and Chun, Se Young}, title = {Localized Concept Erasure for Text-to-Image Diffusion Models Using Training-Free Gated Low-Rank Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18596-18606} }
Camera Resection from Known Line Pencils and a Radially Distorted Scanline-
[pdf]
[bibtex]@InProceedings{Dibene_2025_CVPR, author = {Dibene, Juan C. and Dunn, Enrique}, title = {Camera Resection from Known Line Pencils and a Radially Distorted Scanline}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15843-15851} }
SPC-GS: Gaussian Splatting with Semantic-Prompt Consistency for Indoor Open-World Free-view Synthesis from Sparse Inputs-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2025_CVPR, author = {Liao, Guibiao and Li, Qing and Bao, Zhenyu and Qiu, Guoping and Liu, Kanglin}, title = {SPC-GS: Gaussian Splatting with Semantic-Prompt Consistency for Indoor Open-World Free-view Synthesis from Sparse Inputs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11264-11274} }
M3amba: Memory Mamba is All You Need for Whole Slide Image Classification-
[pdf]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Tingting and Jiang, Kui and Xiao, Yi and Zhao, Sicheng and Yao, Hongxun}, title = {M3amba: Memory Mamba is All You Need for Whole Slide Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15601-15610} }
Redefining <Creative> in Dictionary: Towards an Enhanced Semantic Understanding of Creative Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Fu and Xie, Yucheng and Yang, Xu and Wang, Jing and Geng, Xin}, title = {Redefining \ensuremath{<}Creative\ensuremath{>} in Dictionary: Towards an Enhanced Semantic Understanding of Creative Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18444-18454} }
Dinomaly: The Less Is More Philosophy in Multi-Class Unsupervised Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Jia and Lu, Shuai and Zhang, Weihang and Chen, Fang and Li, Huiqi and Liao, Hongen}, title = {Dinomaly: The Less Is More Philosophy in Multi-Class Unsupervised Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20405-20415} }
Detect-and-Guide: Self-regulation of Diffusion Models for Safe Text-to-Image Generation via Guideline Token Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Feifei and Zhang, Mi and Sun, Yiming and Yang, Min}, title = {Detect-and-Guide: Self-regulation of Diffusion Models for Safe Text-to-Image Generation via Guideline Token Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13252-13262} }
MirrorVerse: Pushing Diffusion Models to Realistically Reflect the World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dhiman_2025_CVPR, author = {Dhiman, Ankit and Shah, Manan and Babu, R Venkatesh}, title = {MirrorVerse: Pushing Diffusion Models to Realistically Reflect the World}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11239-11249} }
EAP-GS: Efficient Augmentation of Pointcloud for 3D Gaussian Splatting in Few-shot Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Dongrui and Xing, Yuxiang}, title = {EAP-GS: Efficient Augmentation of Pointcloud for 3D Gaussian Splatting in Few-shot Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16498-16507} }
Empowering Large Language Models with 3D Situation Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Zhihao and Peng, Yibo and Ren, Jinke and Liao, Yinghong and Han, Yatong and Feng, Chun-Mei and Zhao, Hengshuang and Li, Guanbin and Cui, Shuguang and Li, Zhen}, title = {Empowering Large Language Models with 3D Situation Awareness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19435-19445} }
EchoTraffic: Enhancing Traffic Anomaly Understanding with Audio-Visual Insights-
[pdf]
[supp]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Zhenghao and Chen, Hao and Xie, Binzhu and Xu, Jiaqi and Guo, Ziyu and Xu, Xuemiao and Hao, Jianye and Fu, Chi-Wing and Hu, Xiaowei and Heng, Pheng-Ann}, title = {EchoTraffic: Enhancing Traffic Anomaly Understanding with Audio-Visual Insights}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19098-19108} }
Interactive Medical Image Segmentation: A Benchmark Dataset and Baseline-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Junlong and Fu, Bin and Ye, Jin and Wang, Guoan and Li, Tianbin and Wang, Haoyu and Li, Ruoyu and Yao, He and Cheng, Junren and Li, Jingwen and Su, Yanzhou and Zhu, Min and He, Junjun}, title = {Interactive Medical Image Segmentation: A Benchmark Dataset and Baseline}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20841-20851} }
GigaHands: A Massive Annotated Dataset of Bimanual Hand Activities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Rao and Zhang, Dingxi and Jiang, Alex and Fu, Wanjia and Funk, Austin and Ritchie, Daniel and Sridhar, Srinath}, title = {GigaHands: A Massive Annotated Dataset of Bimanual Hand Activities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17461-17474} }
AutoSSVH: Exploring Automated Frame Sampling for Efficient Self-Supervised Video Hashing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lian_2025_CVPR, author = {Lian, Niu and Li, Jun and Wang, Jinpeng and Luo, Ruisheng and Wang, Yaowei and Xia, Shu-Tao and Chen, Bin}, title = {AutoSSVH: Exploring Automated Frame Sampling for Efficient Self-Supervised Video Hashing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18881-18890} }
FrugalNeRF: Fast Convergence for Extreme Few-shot Novel View Synthesis without Learned Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Chin-Yang and Wu, Chung-Ho and Yeh, Chang-Han and Yen, Shih-Han and Sun, Cheng and Liu, Yu-Lun}, title = {FrugalNeRF: Fast Convergence for Extreme Few-shot Novel View Synthesis without Learned Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11227-11238} }
Pioneering 4-Bit FP Quantization for Diffusion Models: Mixup-Sign Quantization and Timestep-Aware Fine-Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Maosen and Chen, Pengtao and Yu, Chong and Wen, Yan and Tan, Xudong and Chen, Tao}, title = {Pioneering 4-Bit FP Quantization for Diffusion Models: Mixup-Sign Quantization and Timestep-Aware Fine-Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18134-18143} }
CompGS: Unleashing 2D Compositionality for Compositional Text-to-3D via Dynamically Optimizing 3D Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2025_CVPR, author = {Ge, Chongjian and Xu, Chenfeng and Ji, Yuanfeng and Peng, Chensheng and Tomizuka, Masayoshi and Luo, Ping and Ding, Mingyu and Jampani, Varun and Zhan, Wei}, title = {CompGS: Unleashing 2D Compositionality for Compositional Text-to-3D via Dynamically Optimizing 3D Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18509-18520} }
FIRE: Robust Detection of Diffusion-Generated Images via Frequency-Guided Reconstruction Error-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chu_2025_CVPR, author = {Chu, Beilin and Xu, Xuan and Wang, Xin and Zhang, Yufei and You, Weike and Zhou, Linna}, title = {FIRE: Robust Detection of Diffusion-Generated Images via Frequency-Guided Reconstruction Error}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12830-12839} }
Assessing and Learning Alignment of Unimodal Vision and Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Le and Yang, Qian and Agrawal, Aishwarya}, title = {Assessing and Learning Alignment of Unimodal Vision and Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14604-14614} }
Action Detail Matters: Refining Video Recognition with Local Action Queries-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Mengmeng and Huang, Zeyi and Kong, Xiangjie and Shen, Guojiang and Dai, Guang and Wang, Jingdong and Liu, Yong}, title = {Action Detail Matters: Refining Video Recognition with Local Action Queries}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19132-19142} }
Generative Map Priors for Collaborative BEV Semantic Segmentation-
[pdf]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Jiahui and Gong, Yue and Wang, Luting and Zhang, Shifeng and Zhou, Xu and Liu, Si}, title = {Generative Map Priors for Collaborative BEV Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11919-11928} }
Coherent 3D Portrait Video Reconstruction via Triplane Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shengze and Li, Xueting and Liu, Chao and Chan, Matthew and Stengel, Michael and Fuchs, Henry and De Mello, Shalini and Nagano, Koki}, title = {Coherent 3D Portrait Video Reconstruction via Triplane Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10712-10722} }
ManiVideo: Generating Hand-Object Manipulation Video with Dexterous and Generalizable Grasping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pang_2025_CVPR, author = {Pang, Youxin and Shao, Ruizhi and Zhang, Jiajun and Tu, Hanzhang and Liu, Yun and Zhou, Boyao and Zhang, Hongwen and Liu, Yebin}, title = {ManiVideo: Generating Hand-Object Manipulation Video with Dexterous and Generalizable Grasping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12209-12219} }
FedCS: Coreset Selection for Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Hao_2025_CVPR, author = {Hao, Chenhe and Xie, Weiying and Li, Daixun and Qin, Haonan and Ye, Hangyu and Fang, Leyuan and Li, Yunsong}, title = {FedCS: Coreset Selection for Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15434-15443} }
Dual-Granularity Semantic Guided Sparse Routing Diffusion Model for General Pansharpening-
[pdf]
[supp]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Yinghui and Qu, Litao and Zhang, Shizhou and Xu, Di and Yang, Yingkun and Zhang, Yanning}, title = {Dual-Granularity Semantic Guided Sparse Routing Diffusion Model for General Pansharpening}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12658-12668} }
OmniMMI: A Comprehensive Multi-modal Interaction Benchmark in Streaming Video Contexts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuxuan and Wang, Yueqian and Chen, Bo and Wu, Tong and Zhao, Dongyan and Zheng, Zilong}, title = {OmniMMI: A Comprehensive Multi-modal Interaction Benchmark in Streaming Video Contexts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18925-18935} }
SOAP: Vision-Centric 3D Semantic Scene Completion with Scene-Adaptive Decoder and Occluded Region-Aware View Projection-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Hyo-Jun and Koh, Yeong Jun and Kim, Hanul and Kim, Hyunseop and Lee, Yonguk and Lee, Jinu}, title = {SOAP: Vision-Centric 3D Semantic Scene Completion with Scene-Adaptive Decoder and Occluded Region-Aware View Projection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17145-17154} }
SimVS: Simulating World Inconsistencies for Robust View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Trevithick_2025_CVPR, author = {Trevithick, Alex and Paiss, Roni and Henzler, Philipp and Verbin, Dor and Wu, Rundi and Alzayer, Hadi and Gao, Ruiqi and Poole, Ben and Barron, Jonathan T. and Holynski, Aleksander and Ramamoorthi, Ravi and Srinivasan, Pratul P.}, title = {SimVS: Simulating World Inconsistencies for Robust View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16464-16474} }
From Zero to Detail: Deconstructing Ultra-High-Definition Image Restoration from Progressive Spectral Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Chen and Chen, Zhizhou and Xu, Yunzhe and Gu, Enxuan and Li, Jian and Yi, Zili and Wang, Qian and Yang, Jian and Tai, Ying}, title = {From Zero to Detail: Deconstructing Ultra-High-Definition Image Restoration from Progressive Spectral Perspective}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17935-17946} }
COSMOS: Cross-Modality Self-Distillation for Vision Language Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Sanghwan and Xiao, Rui and Georgescu, Mariana-Iuliana and Alaniz, Stephan and Akata, Zeynep}, title = {COSMOS: Cross-Modality Self-Distillation for Vision Language Pre-training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14690-14700} }
Lifting Motion to the 3D World via 2D Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiaman and Liu, C. Karen and Wu, Jiajun}, title = {Lifting Motion to the 3D World via 2D Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17518-17528} }
TAPT: Test-Time Adversarial Prompt Tuning for Robust Inference in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xin and Chen, Kai and Zhang, Jiaming and Chen, Jingjing and Ma, Xingjun}, title = {TAPT: Test-Time Adversarial Prompt Tuning for Robust Inference in Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19910-19920} }
Active Data Curation Effectively Distills Large-Scale Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Udandarao_2025_CVPR, author = {Udandarao, Vishaal and Parthasarathy, Nikhil and Naeem, Muhammad Ferjad and Evans, Talfan and Albanie, Samuel and Tombari, Federico and Xian, Yongqin and Tonioni, Alessio and Henaff, Olivier J.}, title = {Active Data Curation Effectively Distills Large-Scale Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14422-14437} }
SCSA: A Plug-and-Play Semantic Continuous-Sparse Attention for Arbitrary Semantic Style Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shang_2025_CVPR, author = {Shang, Chunnan and Wang, Zhizhong and Wang, Hongwei and Meng, Xiangming}, title = {SCSA: A Plug-and-Play Semantic Continuous-Sparse Attention for Arbitrary Semantic Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13051-13060} }
Can't Slow Me Down: Learning Robust and Hardware-Adaptive Object Detectors against Latency Attacks for Edge Devices-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Tianyi and Wang, Zichen and Wang, Cong and Shu, Yuanchao and Deng, Ruilong and Cheng, Peng and Chen, Jiming}, title = {Can't Slow Me Down: Learning Robust and Hardware-Adaptive Object Detectors against Latency Attacks for Edge Devices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19230-19240} }
SAM2Object: Consolidating View Consistency via SAM2 for Zero-Shot 3D Instance Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jihuai and Zhuo, Junbao and Chen, Jiansheng and Ma, Huimin}, title = {SAM2Object: Consolidating View Consistency via SAM2 for Zero-Shot 3D Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19325-19334} }
CDI: Copyrighted Data Identification in Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Dubinski_2025_CVPR, author = {Dubi\'nski, Jan and Kowalczuk, Antoni and Boenisch, Franziska and Dziedzic, Adam}, title = {CDI: Copyrighted Data Identification in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18674-18684} }
CRISP: Object Pose and Shape Estimation with Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Jingnan and Talak, Rajat and Zhang, Harry and Jin, David and Carlone, Luca}, title = {CRISP: Object Pose and Shape Estimation with Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11644-11653} }
Creating Your Editable 3D Photorealistic Avatar with Tetrahedron-constrained Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Hanxi and Men, Yifang and Lian, Zhouhui}, title = {Creating Your Editable 3D Photorealistic Avatar with Tetrahedron-constrained Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15976-15986} }
Sim-to-Real Causal Transfer: A Metric Learning Approach to Causally-Aware Interaction Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rahimi_2025_CVPR, author = {Rahimi, Ahmad and Luan, Po-Chien and Liu, Yuejiang and Raji\v{c}, Frano and Alahi, Alexandre}, title = {Sim-to-Real Causal Transfer: A Metric Learning Approach to Causally-Aware Interaction Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17271-17281} }
Tripartite Weight-Space Ensemble for Few-Shot Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Juntae and Hayat, Munawar and Yun, Sungrack}, title = {Tripartite Weight-Space Ensemble for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15329-15338} }
PerLA: Perceptive 3D Language Assistant-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mei_2025_CVPR, author = {Mei, Guofeng and Lin, Wei and Riz, Luigi and Wu, Yujiao and Poiesi, Fabio and Wang, Yiming}, title = {PerLA: Perceptive 3D Language Assistant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14369-14379} }
PhyT2V: LLM-Guided Iterative Self-Refinement for Physics-Grounded Text-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2025_CVPR, author = {Xue, Qiyao and Yin, Xiangyu and Yang, Boyuan and Gao, Wei}, title = {PhyT2V: LLM-Guided Iterative Self-Refinement for Physics-Grounded Text-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18826-18836} }
Mask^2DiT: Dual Mask-based Diffusion Transformer for Multi-Scene Long Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2025_CVPR, author = {Qi, Tianhao and Yuan, Jianlong and Feng, Wanquan and Fang, Shancheng and Liu, Jiawei and Zhou, SiYu and He, Qian and Xie, Hongtao and Zhang, Yongdong}, title = {Mask{\textasciicircum}2DiT: Dual Mask-based Diffusion Transformer for Multi-Scene Long Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18837-18846} }
JamMa: Ultra-lightweight Local Feature Matching with Joint Mamba-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Xiaoyong and Du, Songlin}, title = {JamMa: Ultra-lightweight Local Feature Matching with Joint Mamba}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14934-14943} }
DyCoke: Dynamic Compression of Tokens for Fast Video Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2025_CVPR, author = {Tao, Keda and Qin, Can and You, Haoxuan and Sui, Yang and Wang, Huan}, title = {DyCoke: Dynamic Compression of Tokens for Fast Video Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18992-19001} }
MammAlps: A Multi-view Video Behavior Monitoring Dataset of Wild Mammals in the Swiss Alps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gabeff_2025_CVPR, author = {Gabeff, Valentin and Qi, Haozhe and Flaherty, Brendan and Sumbul, Gencer and Mathis, Alexander and Tuia, Devis}, title = {MammAlps: A Multi-view Video Behavior Monitoring Dataset of Wild Mammals in the Swiss Alps}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13854-13864} }
Diffusion-based Realistic Listening Head Generation via Hybrid Motion Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yinuo and Fan, Yanbo and Wang, Xuan and Yu, Guo and Wang, Fei}, title = {Diffusion-based Realistic Listening Head Generation via Hybrid Motion Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15885-15895} }
SAT-HMR: Real-Time Multi-Person 3D Mesh Estimation via Scale-Adaptive Tokens-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2025_CVPR, author = {Su, Chi and Ma, Xiaoxuan and Su, Jiajun and Wang, Yizhou}, title = {SAT-HMR: Real-Time Multi-Person 3D Mesh Estimation via Scale-Adaptive Tokens}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16796-16806} }
UniScene: Unified Occupancy-centric Driving Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Bohan and Guo, Jiazhe and Liu, Hongsi and Zou, Yingshuang and Ding, Yikang and Chen, Xiwu and Zhu, Hu and Tan, Feiyang and Zhang, Chi and Wang, Tiancai and Zhou, Shuchang and Zhang, Li and Qi, Xiaojuan and Zhao, Hao and Yang, Mu and Zeng, Wenjun and Jin, Xin}, title = {UniScene: Unified Occupancy-centric Driving Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11971-11981} }
Learning from Streaming Video with Orthogonal Gradients-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Tengda and Gokay, Dilara and Heyward, Joseph and Zhang, Chuhan and Zoran, Daniel and Patraucean, Viorica and Carreira, Joao and Damen, Dima and Zisserman, Andrew}, title = {Learning from Streaming Video with Orthogonal Gradients}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13651-13660} }
Classifier-to-Bias: Toward Unsupervised Automatic Bias Detection for Visual Classifiers-
[pdf]
[supp]
[bibtex]@InProceedings{Guimard_2025_CVPR, author = {Guimard, Quentin and D'Inc\`a, Moreno and Mancini, Massimiliano and Ricci, Elisa}, title = {Classifier-to-Bias: Toward Unsupervised Automatic Bias Detection for Visual Classifiers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15151-15161} }
An Image-like Diffusion Method for Human-Object Interaction Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hui_2025_CVPR, author = {Hui, Xiaofei and Qu, Haoxuan and Rahmani, Hossein and Liu, Jun}, title = {An Image-like Diffusion Method for Human-Object Interaction Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14002-14012} }
COB-GS: Clear Object Boundaries in 3DGS Segmentation Based on Boundary-Adaptive Gaussian Splitting-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiaxin and Jiang, Junjun and Chen, Youyu and Jiang, Kui and Liu, Xianming}, title = {COB-GS: Clear Object Boundaries in 3DGS Segmentation Based on Boundary-Adaptive Gaussian Splitting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19335-19344} }
PEER Pressure: Model-to-Model Regularization for Single Source Domain Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cho_2025_CVPR, author = {Cho, Dong Kyu and Hwang, Inwoo and Lee, Sanghack}, title = {PEER Pressure: Model-to-Model Regularization for Single Source Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15360-15370} }
Revisiting Fairness in Multitask Learning: A Performance-Driven Approach for Variance Reduction-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Xiaohan and Wang, Xiaoxing and Yan, Junchi}, title = {Revisiting Fairness in Multitask Learning: A Performance-Driven Approach for Variance Reduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20492-20501} }
VideoMage: Multi-Subject and Motion Customization of Text-to-Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Chi-Pin and Wu, Yen-Siang and Chung, Hung-Kai and Chang, Kai-Po and Yang, Fu-En and Wang, Yu-Chiang Frank}, title = {VideoMage: Multi-Subject and Motion Customization of Text-to-Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17603-17612} }
Compositional Caching for Training-free Open-vocabulary Attribute Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Garosi_2025_CVPR, author = {Garosi, Marco and Conti, Alessandro and Liu, Gaowen and Ricci, Elisa and Mancini, Massimiliano}, title = {Compositional Caching for Training-free Open-vocabulary Attribute Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15098-15107} }
VI^3NR: Variance Informed Initialization for Implicit Neural Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Koneputugodage_2025_CVPR, author = {Koneputugodage, Chamin Hewa and Ben-Shabat, Yizhak and Ramasinghe, Sameera and Gould, Stephen}, title = {VI{\textasciicircum}3NR: Variance Informed Initialization for Implicit Neural Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13477-13486} }
M-LLM Based Video Frame Selection for Efficient Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Kai and Gao, Feng and Nie, Xiaohan and Zhou, Peng and Tran, Son and Neiman, Tal and Wang, Lingyun and Shah, Mubarak and Hamid, Raffay and Yin, Bing and Chilimbi, Trishul}, title = {M-LLM Based Video Frame Selection for Efficient Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13702-13712} }
Search and Detect: Training-Free Long Tail Object Detection via Web-Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sidhu_2025_CVPR, author = {Sidhu, Mankeerat and Chopra, Hetarth and Blume, Ansel and Kim, Jeonghwan and Reddy, Revanth Gangi and Ji, Heng}, title = {Search and Detect: Training-Free Long Tail Object Detection via Web-Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15129-15138} }
Unleashing the Potential of Multi-modal Foundation Models and Video Diffusion for 4D Dynamic Physical Scene Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhuoman and Ye, Weicai and Luximon, Yan and Wan, Pengfei and Zhang, Di}, title = {Unleashing the Potential of Multi-modal Foundation Models and Video Diffusion for 4D Dynamic Physical Scene Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11016-11025} }
Diffusion Model is Effectively Its Own Teacher-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Xinyin and Yu, Runpeng and Liu, Songhua and Fang, Gongfan and Wang, Xinchao}, title = {Diffusion Model is Effectively Its Own Teacher}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12901-12911} }
UnCommon Objects in 3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xingchen and Tayal, Piyush and Wang, Jianyuan and Zarzar, Jesus and Monnier, Tom and Tertikas, Konstantinos and Duan, Jiali and Toisoul, Antoine and Zhang, Jason Y. and Neverova, Natalia and Vedaldi, Andrea and Shapovalov, Roman and Novotny, David}, title = {UnCommon Objects in 3D}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14102-14113} }
Learning Textual Prompts for Open-World Semi-Supervised Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Yuxin and Cui, Junbiao and Liang, Jiye}, title = {Learning Textual Prompts for Open-World Semi-Supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14756-14765} }
LongDiff: Training-Free Long Video Generation in One Go-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zhuoling and Rahmani, Hossein and Ke, Qiuhong and Liu, Jun}, title = {LongDiff: Training-Free Long Video Generation in One Go}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17789-17798} }
Mask-Adapter: The Devil is in the Masks for Open-Vocabulary Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yongkang and Cheng, Tianheng and Feng, Bin and Liu, Wenyu and Wang, Xinggang}, title = {Mask-Adapter: The Devil is in the Masks for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14998-15008} }
MPDrive: Improving Spatial Understanding with Marker-Based Prompt Learning for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhiyuan and Li, Xiaofan and Xu, Zhihao and Peng, Wenjie and Zhou, Zijian and Shi, Miaojing and Huang, Shuangping}, title = {MPDrive: Improving Spatial Understanding with Marker-Based Prompt Learning for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12089-12099} }
ByTheWay: Boost Your Text-to-Video Generation Model to Higher Quality in a Training-free Way-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bu_2025_CVPR, author = {Bu, Jiazi and Ling, Pengyang and Zhang, Pan and Wu, Tong and Dong, Xiaoyi and Zang, Yuhang and Cao, Yuhang and Lin, Dahua and Wang, Jiaqi}, title = {ByTheWay: Boost Your Text-to-Video Generation Model to Higher Quality in a Training-free Way}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12999-13008} }
Masked Point-Entity Contrast for Open-Vocabulary 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yan and Jia, Baoxiong and Zhu, Ziyu and Huang, Siyuan}, title = {Masked Point-Entity Contrast for Open-Vocabulary 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14125-14136} }
On the Generalization of Handwritten Text Recognition Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Garrido-Munoz_2025_CVPR, author = {Garrido-Munoz, Carlos and Calvo-Zaragoza, Jorge}, title = {On the Generalization of Handwritten Text Recognition Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15275-15286} }
InsTaG: Learning Personalized 3D Talking Head from Few-Second Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiahe and Zhang, Jiawei and Bai, Xiao and Zheng, Jin and Zhou, Jun and Gu, Lin}, title = {InsTaG: Learning Personalized 3D Talking Head from Few-Second Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10690-10700} }
Benchmarking Large Vision-Language Models via Directed Scene Graph for Comprehensive Image Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Fan and Wu, Wei and Zheng, Kecheng and Ma, Shuailei and Gong, Biao and Liu, Jiawei and Zhai, Wei and Cao, Yang and Shen, Yujun and Zha, Zheng-Jun}, title = {Benchmarking Large Vision-Language Models via Directed Scene Graph for Comprehensive Image Captioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19618-19627} }
Rotation-Equivariant Self-Supervised Method in Image Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Hanze and Fu, Jiahong and Xie, Qi and Meng, Deyu}, title = {Rotation-Equivariant Self-Supervised Method in Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12720-12730} }
FlashSloth : Lightning Multimodal Large Language Models via Embedded Visual Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tong_2025_CVPR, author = {Tong, Bo and Lai, Bokai and Zhou, Yiyi and Luo, Gen and Shen, Yunhang and Li, Ke and Sun, Xiaoshuai and Ji, Rongrong}, title = {FlashSloth : Lightning Multimodal Large Language Models via Embedded Visual Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14570-14581} }
T2SG: Traffic Topology Scene Graph for Topology Reasoning in Autonomous Driving-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lv_2025_CVPR, author = {Lv, Changsheng and Qi, Mengshi and Liu, Liang and Ma, Huadong}, title = {T2SG: Traffic Topology Scene Graph for Topology Reasoning in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17197-17206} }
RealEdit: Reddit Edits As a Large-scale Empirical Dataset for Image Transformations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sushko_2025_CVPR, author = {Sushko, Peter and Bharadwaj, Ayana and Lim, Zhi Yang and Ilin, Vasily and Caffee, Ben and Chen, Dongping and Salehi, Mohammadreza and Hsieh, Cheng-Yu and Krishna, Ranjay}, title = {RealEdit: Reddit Edits As a Large-scale Empirical Dataset for Image Transformations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13403-13413} }
VideoScene: Distilling Video Diffusion Model to Generate 3D Scenes in One Step-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Hanyang and Liu, Fangfu and Chi, Jiawei and Duan, Yueqi}, title = {VideoScene: Distilling Video Diffusion Model to Generate 3D Scenes in One Step}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16475-16485} }
3D-HGS: 3D Half-Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Haolin and Liu, Jinyang and Sznaier, Mario and Camps, Octavia}, title = {3D-HGS: 3D Half-Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10996-11005} }
Scale Efficient Training for Large Datasets-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Qing and Gao, Junyu and Wang, Qi}, title = {Scale Efficient Training for Large Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20458-20467} }
Decoder Gradient Shield: Provable and High-Fidelity Prevention of Gradient-Based Box-Free Watermark Removal-
[pdf]
[arXiv]
[bibtex]@InProceedings{An_2025_CVPR, author = {An, Haonan and Hua, Guang and Fang, Zhengru and Xu, Guowen and Rahardja, Susanto and Fang, Yuguang}, title = {Decoder Gradient Shield: Provable and High-Fidelity Prevention of Gradient-Based Box-Free Watermark Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13424-13433} }
Convex Combination Star Shape Prior for Data-driven Image Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Xinyu and Xie, Jun and Chen, Shengzhe and Liu, Jun}, title = {Convex Combination Star Shape Prior for Data-driven Image Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14068-14077} }
Parameter-efficient Fine-tuning in Hyperspherical Space for Open-vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Zelin and Xu, Zhengqin and Zeng, Zhilin and Huang, Yu and Wang, Yaoming and Shen, Wei}, title = {Parameter-efficient Fine-tuning in Hyperspherical Space for Open-vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15009-15020} }
Relative Pose Estimation through Affine Corrections of Monocular Depth Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Yifan and Liu, Shaohui and Pautrat, R\'emi and Pollefeys, Marc and Larsson, Viktor}, title = {Relative Pose Estimation through Affine Corrections of Monocular Depth Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16706-16716} }
Zero-1-to-A: Zero-Shot One Image to Animatable Head Avatars Using Video Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zhenglin and Ma, Fan and Fan, Hehe and Chua, Tat-Seng}, title = {Zero-1-to-A: Zero-Shot One Image to Animatable Head Avatars Using Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15941-15952} }
Occlusion-aware Text-Image-Point Cloud Pretraining for Open-World 3D Object Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Khanh and Hassan, Ghulam Mubashar and Mian, Ajmal}, title = {Occlusion-aware Text-Image-Point Cloud Pretraining for Open-World 3D Object Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16965-16975} }
Conical Visual Concentration for Efficient Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Long and Huang, Qidong and Dong, Xiaoyi and Lu, Jiajie and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and He, Conghui and Wang, Jiaqi and Wu, Feng and Lin, Dahua}, title = {Conical Visual Concentration for Efficient Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14593-14603} }
Foundations of the Theory of Performance-Based Ranking-
[pdf]
[supp]
[bibtex]@InProceedings{Pierard_2025_CVPR, author = {Pi\'erard, S\'ebastien and Halin, Ana{\"\i}s and Cioppa, Anthony and Deliege, Adrien and Van Droogenbroeck, Marc}, title = {Foundations of the Theory of Performance-Based Ranking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14293-14302} }
BIGS: Bimanual Category-agnostic Interaction Reconstruction from Monocular Videos via 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{On_2025_CVPR, author = {On, Jeongwan and Gwak, Kyeonghwan and Kang, Gunyoung and Cha, Junuk and Hwang, Soohyun and Hwang, Hyein and Baek, Seungryul}, title = {BIGS: Bimanual Category-agnostic Interaction Reconstruction from Monocular Videos via 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17437-17447} }
Frequency-Biased Synergistic Design for Image Compression and Compensation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jiaming and Zheng, Qi and Liu, Zihao and Zhong, Yilian and Liu, Peiye and Liu, Tao and Xu, Shusong and Lu, Yanheng and Li, Sicheng and Niu, Dimin and Fan, Yibo}, title = {Frequency-Biased Synergistic Design for Image Compression and Compensation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12820-12829} }
Sparse Voxels Rasterization: Real-time High-fidelity Radiance Field Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Cheng and Choe, Jaesung and Loop, Charles and Ma, Wei-Chiu and Wang, Yu-Chiang Frank}, title = {Sparse Voxels Rasterization: Real-time High-fidelity Radiance Field Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16187-16196} }
MambaIC: State Space Models for High-Performance Learned Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_CVPR, author = {Zeng, Fanhu and Tang, Hao and Shao, Yihua and Chen, Siyu and Shao, Ling and Wang, Yan}, title = {MambaIC: State Space Models for High-Performance Learned Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18041-18050} }
Instant Gaussian Stream: Fast and Generalizable Streaming of Dynamic Scene Reconstruction via Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Jinbo and Peng, Rui and Wang, Zhiyan and Tang, Luyang and Yang, Jiayu and Liang, Jie and Wu, Jiahao and Wang, Ronggang}, title = {Instant Gaussian Stream: Fast and Generalizable Streaming of Dynamic Scene Reconstruction via Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16520-16531} }
Locality-Aware Zero-Shot Human-Object Interaction Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Sanghyun and Jung, Deunsol and Cho, Minsu}, title = {Locality-Aware Zero-Shot Human-Object Interaction Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20190-20200} }
Two by Two: Learning Multi-Task Pairwise Objects Assembly for Generalizable Robot Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2025_CVPR, author = {Qi, Yu and Ju, Yuanchen and Wei, Tianming and Chu, Chi and Wong, Lawson L.S. and Xu, Huazhe}, title = {Two by Two: Learning Multi-Task Pairwise Objects Assembly for Generalizable Robot Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17383-17393} }
SGFormer: Satellite-Ground Fusion for 3D Semantic Scene Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Xiyue and Hu, Jiarui and Hu, Junjie and Bao, Hujun and Zhang, Guofeng}, title = {SGFormer: Satellite-Ground Fusion for 3D Semantic Scene Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11929-11938} }
Random Conditioning for Diffusion Model Compression with Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Dohyun and Park, Sehwan and Han, Geonhee and Kim, Seung Wook and Seo, Paul Hongsuck}, title = {Random Conditioning for Diffusion Model Compression with Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18607-18618} }
Hierarchical Gaussian Mixture Model Splatting for Efficient and Part Controllable 3D Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Qitong and Feng, Mingtao and Wu, Zijie and Dong, Weisheng and Wu, Fangfang and Wang, Yaonan and Mian, Ajmal}, title = {Hierarchical Gaussian Mixture Model Splatting for Efficient and Part Controllable 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11104-11114} }
Heterogeneous Skeleton-Based Action Representation Learning-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Hongsong and Ma, Xiaoyan and Kuang, Jidong and Gui, Jie}, title = {Heterogeneous Skeleton-Based Action Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19154-19164} }
AnyMap: Learning a General Camera Model for Structure-from-Motion with Unknown Distortion in Dynamic Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Cin_2025_CVPR, author = {Cin, Andrea Porfiri Dal and Dikov, Georgi and Ju, Jihong and Ghafoorian, Mohsen}, title = {AnyMap: Learning a General Camera Model for Structure-from-Motion with Unknown Distortion in Dynamic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16674-16684} }
Language Guided Concept Bottleneck Models for Interpretable Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Lu and Han, Haoyu and Tao, Zhe and Yao, Hantao and Xu, Changsheng}, title = {Language Guided Concept Bottleneck Models for Interpretable Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14976-14986} }
Re-HOLD: Video Hand Object Interaction Reenactment via adaptive Layout-instructed Diffusion Model-
[pdf]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Yingying and Yang, Quanwei and Wang, Kaisiyuan and Zhou, Hang and Li, Yingying and Feng, Haocheng and Ding, Errui and Wu, Yu and Wang, Jingdong}, title = {Re-HOLD: Video Hand Object Interaction Reenactment via adaptive Layout-instructed Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17550-17560} }
Odd-One-Out: Anomaly Detection by Comparing with Neighbors-
[pdf]
[supp]
[bibtex]@InProceedings{Bhunia_2025_CVPR, author = {Bhunia, Ankan and Li, Changjian and Bilen, Hakan}, title = {Odd-One-Out: Anomaly Detection by Comparing with Neighbors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20395-20404} }
D^3CTTA: Domain-Dependent Decorrelation for Continual Test-Time Adaption of 3D LiDAR Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jichun and Jiang, Haiyong and Song, Haoxuan and Xiao, Jun and Gong, Dong}, title = {D{\textasciicircum}3CTTA: Domain-Dependent Decorrelation for Continual Test-Time Adaption of 3D LiDAR Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11864-11874} }
A Closer Look at Time Steps is Worthy of Triple Speed-Up for Diffusion Model Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Kai and Shi, Mingjia and Zhou, Yukun and Li, Zekai and Yuan, Zhihang and Shang, Yuzhang and Peng, Xiaojiang and Zhang, Hanwang and You, Yang}, title = {A Closer Look at Time Steps is Worthy of Triple Speed-Up for Diffusion Model Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12934-12944} }
Empowering LLMs to Understand and Generate Complex Vector Graphics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Ximing and Hu, Juncheng and Liang, Guotao and Zhang, Jing and Xu, Dong and Yu, Qian}, title = {Empowering LLMs to Understand and Generate Complex Vector Graphics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19487-19497} }
PanoGS: Gaussian-based Panoptic Segmentation for 3D Open Vocabulary Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhai_2025_CVPR, author = {Zhai, Hongjia and Li, Hai and Li, Zhenzhe and Pan, Xiaokun and He, Yijia and Zhang, Guofeng}, title = {PanoGS: Gaussian-based Panoptic Segmentation for 3D Open Vocabulary Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14114-14124} }
MLVU: Benchmarking Multi-task Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Junjie and Shu, Yan and Zhao, Bo and Wu, Boya and Liang, Zhengyang and Xiao, Shitao and Qin, Minghao and Yang, Xi and Xiong, Yongping and Zhang, Bo and Huang, Tiejun and Liu, Zheng}, title = {MLVU: Benchmarking Multi-task Long Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13691-13701} }
Recovering Dynamic 3D Sketches from Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Jaeah and Choi, Changwoon and Kim, Young Min and Park, Jaesik}, title = {Recovering Dynamic 3D Sketches from Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12423-12432} }
EigenGS Representation: From Eigenspace to Gaussian Image Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tai_2025_CVPR, author = {Tai, Lo-Wei and Li, Ching-En and Chen, Cheng-Lin and Tsai, Chih-Jung and Chen, Hwann-Tzong and Liu, Tyng-Luh}, title = {EigenGS Representation: From Eigenspace to Gaussian Image Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13487-13496} }
MaSS13K: A Matting-level Semantic Segmentation Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Chenxi and Li, Minghan and Zeng, Hui and Luo, Jun and Zhang, Lei}, title = {MaSS13K: A Matting-level Semantic Segmentation Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14046-14056} }
Enhancing Testing-Time Robustness for Trusted Multi-View Classification in the Wild-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Wei and Chen, Yufei and Yue, Xiaodong}, title = {Enhancing Testing-Time Robustness for Trusted Multi-View Classification in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15508-15517} }
ROD-MLLM: Towards More Reliable Object Detection in Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Heng and Ren, Yuqiang and Yan, Ke and Ding, Shouhong and Hao, Yongtao}, title = {ROD-MLLM: Towards More Reliable Object Detection in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14358-14368} }
nnWNet: Rethinking the Use of Transformers in Biomedical Image Segmentation and Calling for a Unified Evaluation Benchmark-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yanfeng and Li, Lingrui and Lu, Le and Xu, Minfeng}, title = {nnWNet: Rethinking the Use of Transformers in Biomedical Image Segmentation and Calling for a Unified Evaluation Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20852-20862} }
VELOCITI: Benchmarking Video-Language Compositional Reasoning with Strict Entailment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saravanan_2025_CVPR, author = {Saravanan, Darshana and Gupta, Varun and Singh, Darshan and Khan, Zeeshan and Gandhi, Vineet and Tapaswi, Makarand}, title = {VELOCITI: Benchmarking Video-Language Compositional Reasoning with Strict Entailment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18914-18924} }
Seeing is Not Believing: Adversarial Natural Object Optimization for Hard-Label 3D Scene Attacks-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Daizong and Hu, Wei}, title = {Seeing is Not Believing: Adversarial Natural Object Optimization for Hard-Label 3D Scene Attacks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11886-11897} }
Lessons and Insights from a Unifying Study of Parameter-Efficient Fine-Tuning (PEFT) in Visual Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mai_2025_CVPR, author = {Mai, Zheda and Zhang, Ping and Tu, Cheng-Hao and Chen, Hong-You and Nguyen, Quang-Huy and Zhang, Li and Chao, Wei-Lun}, title = {Lessons and Insights from a Unifying Study of Parameter-Efficient Fine-Tuning (PEFT) in Visual Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14845-14857} }
Pippo: High-Resolution Multi-View Humans from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kant_2025_CVPR, author = {Kant, Yash and Weber, Ethan and Kim, Jin Kyu and Khirodkar, Rawal and Zhaoen, Su and Martinez, Julieta and Gilitschenski, Igor and Saito, Shunsuke and Bagautdinov, Timur}, title = {Pippo: High-Resolution Multi-View Humans from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16418-16429} }
H2ST: Hierarchical Two-Sample Tests for Continual Out-of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuhang and Zhao, Wenjie and Guo, Yunhui}, title = {H2ST: Hierarchical Two-Sample Tests for Continual Out-of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15413-15423} }
MoVE-KD: Knowledge Distillation for VLMs with Mixture of Visual Encoders-
[pdf]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Jiajun and Zhang, Yuan and Huang, Tao and Lu, Ming and Zhang, Qizhe and An, Ruichuan and Ma, Ningning and Zhang, Shanghang}, title = {MoVE-KD: Knowledge Distillation for VLMs with Mixture of Visual Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19846-19856} }
CamFreeDiff: Camera-free Image to Panorama Generation with Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Xiaoding and Tang, Shitao and Li, Kejie and Wang, Peng}, title = {CamFreeDiff: Camera-free Image to Panorama Generation with Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16408-16417} }
Improving Visual and Downstream Performance of Low-Light Enhancer with Vision Foundation Models Collaboration-
[pdf]
[supp]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Yuxuan and Wang, Haoxuan and Ling, Pengyang and Wei, Zhixiang and Chen, Huaian and Jin, Yi and Chen, Enhong}, title = {Improving Visual and Downstream Performance of Low-Light Enhancer with Vision Foundation Models Collaboration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16071-16080} }
FineLIP: Extending CLIP's Reach via Fine-Grained Alignment with Longer Text Inputs-
[pdf]
[supp]
[bibtex]@InProceedings{Asokan_2025_CVPR, author = {Asokan, Mothilal and Wu, Kebin and Albreiki, Fatima}, title = {FineLIP: Extending CLIP's Reach via Fine-Grained Alignment with Longer Text Inputs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14495-14504} }
Divot: Diffusion Powers Video Tokenizer for Comprehension and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2025_CVPR, author = {Ge, Yuying and Li, Yizhuo and Ge, Yixiao and Shan, Ying}, title = {Divot: Diffusion Powers Video Tokenizer for Comprehension and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13606-13617} }
Towards Zero-Shot Anomaly Detection and Reasoning with Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Jiacong and Lo, Shao-Yuan and Safaei, Bardia and Patel, Vishal M. and Dwivedi, Isht}, title = {Towards Zero-Shot Anomaly Detection and Reasoning with Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20370-20382} }
Video-Guided Foley Sound Generation with Multimodal Controls-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Ziyang and Seetharaman, Prem and Russell, Bryan and Nieto, Oriol and Bourgin, David and Owens, Andrew and Salamon, Justin}, title = {Video-Guided Foley Sound Generation with Multimodal Controls}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18770-18781} }
F^3OCUS - Federated Finetuning of Vision-Language Foundation Models with Optimal Client Layer Updating Strategy via Multi-objective Meta-Heuristics-
[pdf]
[supp]
[bibtex]@InProceedings{Saha_2025_CVPR, author = {Saha, Pramit and Wagner, Felix and Mishra, Divyanshu and Peng, Can and Thakur, Anshul and Clifton, David A. and Kamnitsas, Konstantinos and Noble, J. Alison}, title = {F{\textasciicircum}3OCUS - Federated Finetuning of Vision-Language Foundation Models with Optimal Client Layer Updating Strategy via Multi-objective Meta-Heuristics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20006-20017} }
3D Occupancy Prediction with Low-Resolution Queries via Prototype-aware View Transformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Oh_2025_CVPR, author = {Oh, Gyeongrok and Kim, Sungjune and Ko, Heeju and Chi, Hyung-gun and Kim, Jinkyu and Lee, Dongwook and Ji, Daehyun and Choi, Sungjoon and Jang, Sujin and Kim, Sangpil}, title = {3D Occupancy Prediction with Low-Resolution Queries via Prototype-aware View Transformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17134-17144} }
Can Large Vision-Language Models Correct Semantic Grounding Errors By Themselves?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_CVPR, author = {Liao, Yuan-Hong and Mahmood, Rafid and Fidler, Sanja and Acuna, David}, title = {Can Large Vision-Language Models Correct Semantic Grounding Errors By Themselves?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14667-14678} }
g3D-LF: Generalizable 3D-Language Feature Fields for Embodied Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zihan and Lee, Gim Hee}, title = {g3D-LF: Generalizable 3D-Language Feature Fields for Embodied Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14191-14202} }
UniReal: Universal Image Generation and Editing via Learning Real-world Dynamics-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xi and Zhang, Zhifei and Zhang, He and Zhou, Yuqian and Kim, Soo Ye and Liu, Qing and Li, Yijun and Zhang, Jianming and Zhao, Nanxuan and Wang, Yilin and Ding, Hui and Lin, Zhe and Zhao, Hengshuang}, title = {UniReal: Universal Image Generation and Editing via Learning Real-world Dynamics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12501-12511} }
Exploring Contextual Attribute Density in Referring Expression Counting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhicheng and Pan, Zhiyu and Peng, Zhan and Cheng, Jian and Xiao, Liwen and Jiang, Wei and Cao, Zhiguo}, title = {Exploring Contextual Attribute Density in Referring Expression Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19587-19596} }
SegMAN: Omni-scale Context Modeling with State Space Models and Local Attention for Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Yunxiang and Lou, Meng and Yu, Yizhou}, title = {SegMAN: Omni-scale Context Modeling with State Space Models and Local Attention for Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19077-19087} }
OmniFlow: Any-to-Any Generation with Multi-Modal Rectified Flows-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Shufan and Kallidromitis, Konstantinos and Gokul, Akash and Liao, Zichun and Kato, Yusuke and Kozuka, Kazuki and Grover, Aditya}, title = {OmniFlow: Any-to-Any Generation with Multi-Modal Rectified Flows}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13178-13188} }
SLAM3R: Real-Time Dense Scene Reconstruction from Monocular RGB Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuzheng and Dong, Siyan and Wang, Shuzhe and Yin, Yingda and Yang, Yanchao and Fan, Qingnan and Chen, Baoquan}, title = {SLAM3R: Real-Time Dense Scene Reconstruction from Monocular RGB Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16651-16662} }
SemGeoMo: Dynamic Contextual Human Motion Generation with Semantic and Geometric Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cong_2025_CVPR, author = {Cong, Peishan and Wang, Ziyi and Ma, Yuexin and Yue, Xiangyu}, title = {SemGeoMo: Dynamic Contextual Human Motion Generation with Semantic and Geometric Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17561-17570} }
Detecting Open World Objects via Partial Attribute Assignment-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Muli and Goenawan, Gabriel James and Qin, Huaiyuan and Han, Kai and Peng, Xi and Yang, Yanhua and Zhu, Hongyuan}, title = {Detecting Open World Objects via Partial Attribute Assignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20318-20328} }
Neural Inverse Rendering from Propagating Light-
[pdf]
[supp]
[bibtex]@InProceedings{Malik_2025_CVPR, author = {Malik, Anagh and Attal, Benjamin and Xie, Andrew and O'Toole, Matthew and Lindell, David B.}, title = {Neural Inverse Rendering from Propagating Light}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10534-10544} }
DecoupledGaussian: Object-Scene Decoupling for Physics-Based Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Miaowei and Zhang, Yibo and Xu, Weiwei and Ma, Rui and Zou, Changqing and Morris, Daniel}, title = {DecoupledGaussian: Object-Scene Decoupling for Physics-Based Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11361-11372} }
DashGaussian: Optimizing 3D Gaussian Splatting in 200 Seconds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Youyu and Jiang, Junjun and Jiang, Kui and Tang, Xiao and Li, Zhihao and Liu, Xianming and Nie, Yinyu}, title = {DashGaussian: Optimizing 3D Gaussian Splatting in 200 Seconds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11146-11155} }
PACT: Pruning and Clustering-Based Token Reduction for Faster Visual Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dhouib_2025_CVPR, author = {Dhouib, Mohamed and Buscaldi, Davide and Vanier, Sonia and Shabou, Aymen}, title = {PACT: Pruning and Clustering-Based Token Reduction for Faster Visual Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14582-14592} }
R-SCoRe: Revisiting Scene Coordinate Regression for Robust Large-Scale Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Xudong and Wang, Fangjinhua and Galliani, Silvano and Vogel, Christoph and Pollefeys, Marc}, title = {R-SCoRe: Revisiting Scene Coordinate Regression for Robust Large-Scale Visual Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11536-11546} }
Style Evolving along Chain-of-Thought for Unknown-Domain Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zihao and Wu, Aming and Han, Yahong}, title = {Style Evolving along Chain-of-Thought for Unknown-Domain Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14225-14234} }
OmniSplat: Taming Feed-Forward 3D Gaussian Splatting for Omnidirectional Images with Editable Capabilities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Suyoung and Chung, Jaeyoung and Kim, Kihoon and Huh, Jaeyoo and Lee, Gunhee and Lee, Minsoo and Lee, Kyoung Mu}, title = {OmniSplat: Taming Feed-Forward 3D Gaussian Splatting for Omnidirectional Images with Editable Capabilities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16356-16365} }
UniVAD: A Training-free Unified Model for Few-shot Visual Anomaly Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Zhaopeng and Zhu, Bingke and Zhu, Guibo and Chen, Yingying and Tang, Ming and Wang, Jinqiao}, title = {UniVAD: A Training-free Unified Model for Few-shot Visual Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15194-15203} }
Remote Photoplethysmography in Real-World and Extreme Lighting Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2025_CVPR, author = {Shao, Hang and Luo, Lei and Qian, Jianjun and Yan, Mengkai and Chen, Shuo and Yang, Jian}, title = {Remote Photoplethysmography in Real-World and Extreme Lighting Scenarios}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10858-10867} }
Multi-Modal Contrastive Masked Autoencoders: A Two-Stage Progressive Pre-training Approach for RGBD Datasets-
[pdf]
[supp]
[bibtex]@InProceedings{Jamal_2025_CVPR, author = {Jamal, Muhammad Abdullah and Mohareri, Omid}, title = {Multi-Modal Contrastive Masked Autoencoders: A Two-Stage Progressive Pre-training Approach for RGBD Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17947-17957} }
Font-Agent: Enhancing Font Understanding with Large Language Models-
[pdf]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Yingxin and Xu, Cuijie and Shi, Haitian and Yang, Guoqing and Li, Xiaoning and Luo, Zhiming and Li, Shaozi}, title = {Font-Agent: Enhancing Font Understanding with Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19670-19680} }
Secret Lies in Color: Enhancing AI-Generated Images Detection with Color Distribution Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Jia_2025_CVPR, author = {Jia, Zexi and Huang, Chuanwei and Zhu, Yeshuang and Fei, Hongyan and Duan, Xiaoyue and Yuan, Zhiqiang and Deng, Ying and Zhang, Jiapei and Zhang, Jinchao and Zhou, Jie}, title = {Secret Lies in Color: Enhancing AI-Generated Images Detection with Color Distribution Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13445-13454} }
Cross-Modal and Uncertainty-Aware Agglomeration for Open-Vocabulary 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jinlong and Saltori, Cristiano and Poiesi, Fabio and Sebe, Nicu}, title = {Cross-Modal and Uncertainty-Aware Agglomeration for Open-Vocabulary 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19390-19400} }
SVLTA: Benchmarking Vision-Language Temporal Alignment via Synthetic Video Situation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Hao and Wu, Bo and Lu, Yan and Mao, Zhendong}, title = {SVLTA: Benchmarking Vision-Language Temporal Alignment via Synthetic Video Situation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13798-13809} }
Mixture of Submodules for Domain Adaptive Person Search-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Minsu and Kim, Seungryong and Sohn, Kwanghoon}, title = {Mixture of Submodules for Domain Adaptive Person Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13990-14001} }
SharpDepth: Sharpening Metric Depth Predictions Using Diffusion Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pham_2025_CVPR, author = {Pham, Duc-Hai and Do, Tung and Nguyen, Phong and Hua, Binh-Son and Nguyen, Khoi and Nguyen, Rang}, title = {SharpDepth: Sharpening Metric Depth Predictions Using Diffusion Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17060-17069} }
EvEnhancer: Empowering Effectiveness, Efficiency and Generalizability for Continuous Space-Time Video Super-Resolution with Events-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Shuoyan and Li, Feng and Tang, Shengeng and Zhao, Yao and Bai, Huihui}, title = {EvEnhancer: Empowering Effectiveness, Efficiency and Generalizability for Continuous Space-Time Video Super-Resolution with Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17755-17766} }
Seeing A 3D World in A Grain of Sand-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yufan and Ji, Yu and Guo, Yu and Ye, Jinwei}, title = {Seeing A 3D World in A Grain of Sand}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11187-11196} }
MoFlow: One-Step Flow Matching for Human Trajectory Forecasting via Implicit Maximum Likelihood Estimation based Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Yuxiang and Yan, Qi and Wang, Lele and Li, Ke and Liao, Renjie}, title = {MoFlow: One-Step Flow Matching for Human Trajectory Forecasting via Implicit Maximum Likelihood Estimation based Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17282-17293} }
Reason-before-Retrieve: One-Stage Reflective Chain-of-Thoughts for Training-Free Zero-Shot Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Yuanmin and Zhang, Jue and Qin, Xiaoting and Yu, Jing and Gou, Gaopeng and Xiong, Gang and Lin, Qingwei and Rajmohan, Saravan and Zhang, Dongmei and Wu, Qi}, title = {Reason-before-Retrieve: One-Stage Reflective Chain-of-Thoughts for Training-Free Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14400-14410} }
UniGraspTransformer: Simplified Policy Distillation for Scalable Dexterous Robotic Grasping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Wenbo and Wei, Fangyun and Zhou, Lei and Chen, Xi and Luo, Lin and Yi, Xiaohan and Zhang, Yizhong and Liang, Yaobo and Xu, Chang and Lu, Yan and Yang, Jiaolong and Guo, Baining}, title = {UniGraspTransformer: Simplified Policy Distillation for Scalable Dexterous Robotic Grasping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12199-12208} }
Apollo: An Exploration of Video Understanding in Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zohar_2025_CVPR, author = {Zohar, Orr and Wang, Xiaohan and Dubois, Yann and Mehta, Nikhil and Xiao, Tong and Hansen-Estruch, Philippe and Yu, Licheng and Wang, Xiaofang and Juefei-Xu, Felix and Zhang, Ning and Yeung-Levy, Serena and Xia, Xide}, title = {Apollo: An Exploration of Video Understanding in Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18891-18901} }
Skip Tuning: Pre-trained Vision-Language Models are Effective and Efficient Adapters Themselves-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Shihan and Zhang, Ji and Zeng, Pengpeng and Gao, Lianli and Song, Jingkuan and Shen, Heng Tao}, title = {Skip Tuning: Pre-trained Vision-Language Models are Effective and Efficient Adapters Themselves}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14723-14732} }
PatchDPO: Patch-level DPO for Finetuning-free Personalized Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Qihan and Chan, Long and Liu, Jinlong and He, Wanggui and Jiang, Hao and Song, Mingli and Song, Jie}, title = {PatchDPO: Patch-level DPO for Finetuning-free Personalized Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18369-18378} }
MegaSaM: Accurate, Fast and Robust Structure and Motion from Casual Dynamic Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zhengqi and Tucker, Richard and Cole, Forrester and Wang, Qianqian and Jin, Linyi and Ye, Vickie and Kanazawa, Angjoo and Holynski, Aleksander and Snavely, Noah}, title = {MegaSaM: Accurate, Fast and Robust Structure and Motion from Casual Dynamic Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10486-10496} }
Robust-MVTON: Learning Cross-Pose Feature Alignment and Fusion for Robust Multi-View Virtual Try-On-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Nannan and Li, Yijiang and Du, Dong and Chong, Zheng and Sun, Zhengwentai and Zeng, Jianhao and Dai, Yusheng and Xie, Zhengyu and Zhu, Hairui and Han, Xiaoguang}, title = {Robust-MVTON: Learning Cross-Pose Feature Alignment and Fusion for Robust Multi-View Virtual Try-On}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16029-16039} }
Identity-Preserving Text-to-Video Generation by Frequency Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Shenghai and Huang, Jinfa and He, Xianyi and Ge, Yunyang and Shi, Yujun and Chen, Liuhan and Luo, Jiebo and Yuan, Li}, title = {Identity-Preserving Text-to-Video Generation by Frequency Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12978-12988} }
FreeGave: 3D Physics Learning from Dynamic Videos by Gaussian Velocity-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jinxi and Song, Ziyang and Zhou, Siyuan and Yang, Bo}, title = {FreeGave: 3D Physics Learning from Dynamic Videos by Gaussian Velocity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12433-12443} }
MOS: Modeling Object-Scene Associations in Generalized Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Zhengyuan and Ma, Jinpeng and Sun, Zhimin and Yi, Ran and Song, Haichuan and Tan, Xin and Ma, Lizhuang}, title = {MOS: Modeling Object-Scene Associations in Generalized Category Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15118-15128} }
Test-time Augmentation Improves Efficiency in Conformal Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shanmugam_2025_CVPR, author = {Shanmugam, Divya and Lu, Helen and Sankaranarayanan, Swami and Guttag, John}, title = {Test-time Augmentation Improves Efficiency in Conformal Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20622-20631} }
StoryGPT-V: Large Language Models as Consistent Story Visualizers-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Xiaoqian and Elhoseiny, Mohamed}, title = {StoryGPT-V: Large Language Models as Consistent Story Visualizers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13273-13283} }
Edge-SD-SR: Low Latency and Parameter Efficient On-device Super-Resolution with Stable Diffusion via Bidirectional Conditioning-
[pdf]
[bibtex]@InProceedings{Hadji_2025_CVPR, author = {Hadji, Isma and Noroozi, Mehdi and Escorcia, Victor and Zaganidis, Anestis and Martinez, Brais and Tzimiropoulos, Georgios}, title = {Edge-SD-SR: Low Latency and Parameter Efficient On-device Super-Resolution with Stable Diffusion via Bidirectional Conditioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12789-12798} }
INFP: Audio-Driven Interactive Head Generation in Dyadic Conversations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yongming and Zhang, Longhao and Rong, Zhengkun and Hu, Tianshu and Liang, Shuang and Ge, Zhipeng}, title = {INFP: Audio-Driven Interactive Head Generation in Dyadic Conversations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10667-10677} }
EVPGS: Enhanced View Prior Guidance for Splatting-based Extrapolated View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiahe and Wang, Feiyu and Qu, Xiaochao and Wu, Chengjing and Liu, Luoqi and Liu, Ting}, title = {EVPGS: Enhanced View Prior Guidance for Splatting-based Extrapolated View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16398-16407} }
GREAT: Geometry-Intention Collaborative Inference for Open-Vocabulary 3D Object Affordance Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2025_CVPR, author = {Shao, Yawen and Zhai, Wei and Yang, Yuhang and Luo, Hongchen and Cao, Yang and Zha, Zheng-Jun}, title = {GREAT: Geometry-Intention Collaborative Inference for Open-Vocabulary 3D Object Affordance Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17326-17336} }
Adapting Pre-trained 3D Models for Point Cloud Video Understanding via Cross-frame Spatio-temporal Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Lv_2025_CVPR, author = {Lv, Baixuan and Zha, Yaohua and Dai, Tao and Yuerong, Xue and Chen, Ke and Xia, Shu-Tao}, title = {Adapting Pre-trained 3D Models for Point Cloud Video Understanding via Cross-frame Spatio-temporal Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12413-12422} }
MASH-VLM: Mitigating Action-Scene Hallucination in Video-LLMs through Disentangled Spatial-Temporal Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Bae_2025_CVPR, author = {Bae, Kyungho and Kim, Jinhyung and Lee, Sihaeng and Lee, Soonyoung and Lee, Gunhee and Choi, Jinwoo}, title = {MASH-VLM: Mitigating Action-Scene Hallucination in Video-LLMs through Disentangled Spatial-Temporal Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13744-13753} }
UWAV: Uncertainty-weighted Weakly-supervised Audio-Visual Video Parsing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Yung-Hsuan and Ebbers, Janek and Wang, Yu-Chiang Frank and Germain, Fran\c{c}ois and Jones, Michael Jeffrey and Chatterjee, Moitreya}, title = {UWAV: Uncertainty-weighted Weakly-supervised Audio-Visual Video Parsing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13561-13570} }
Mosaic of Modalities: A Comprehensive Benchmark for Multimodal Graph Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jing and Zhou, Yuhang and Qian, Shengyi and He, Zhongmou and Zhao, Tong and Shah, Neil and Koutra, Danai}, title = {Mosaic of Modalities: A Comprehensive Benchmark for Multimodal Graph Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14215-14224} }
FirePlace: Geometric Refinements of LLM Common Sense Reasoning for 3D Object Placement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Ian and Bao, Yanan and Truong, Karen and Zhou, Howard and Schmid, Cordelia and Guibas, Leonidas and Fathi, Alireza}, title = {FirePlace: Geometric Refinements of LLM Common Sense Reasoning for 3D Object Placement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13466-13476} }
End-to-End Implicit Neural Representations for Classification-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gielisse_2025_CVPR, author = {Gielisse, Alexander and van Gemert, Jan}, title = {End-to-End Implicit Neural Representations for Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18728-18737} }
UNICL-SAM: Uncertainty-Driven In-Context Segmentation with Part Prototype Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Sheng_2025_CVPR, author = {Sheng, Dianmo and Chen, Dongdong and Tan, Zhentao and Liu, Qiankun and Chu, Qi and Gong, Tao and Liu, Bin and Han, Jing and Tu, Wenbin and Xu, Shengwei and Yu, Nenghai}, title = {UNICL-SAM: Uncertainty-Driven In-Context Segmentation with Part Prototype Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20201-20211} }
Layered Motion Fusion: Lifting Motion Segmentation to 3D in Egocentric Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Tschernezki_2025_CVPR, author = {Tschernezki, Vadim and Larlus, Diane and Laina, Iro and Vedaldi, Andrea}, title = {Layered Motion Fusion: Lifting Motion Segmentation to 3D in Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17637-17648} }
Diffusion Self-Distillation for Zero-Shot Customized Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Shengqu and Chan, Eric Ryan and Zhang, Yunzhi and Guibas, Leonidas and Wu, Jiajun and Wetzstein, Gordon}, title = {Diffusion Self-Distillation for Zero-Shot Customized Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18434-18443} }
Uncertainty-guided Perturbation for Image Super-Resolution Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Leheng and You, Weiyi and Shi, Kexuan and Gu, Shuhang}, title = {Uncertainty-guided Perturbation for Image Super-Resolution Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17980-17989} }
Towards Human-Understandable Multi-Dimensional Concept Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Grobrugge_2025_CVPR, author = {Grobr\"ugge, Arne and K\"uhl, Niklas and Satzger, Gerhard and Spitzer, Philipp}, title = {Towards Human-Understandable Multi-Dimensional Concept Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20018-20027} }
ConText-CIR: Learning from Concepts in Text for Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Eric and Kolouju, Pranavi and Pless, Robert and Stylianou, Abby and Jacobs, Nathan}, title = {ConText-CIR: Learning from Concepts in Text for Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19638-19648} }
Perturb-and-Revise: Flexible 3D Editing with Generative Trajectories-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2025_CVPR, author = {Hong, Susung and Karras, Johanna and Martin-Brualla, Ricardo and Kemelmacher-Shlizerman, Ira}, title = {Perturb-and-Revise: Flexible 3D Editing with Generative Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16293-16303} }
Learning Compatible Multi-Prize Subnetworks for Asymmetric Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Yushuai and Zhou, Zikun and Jiang, Dongmei and Wang, Yaowei and Yu, Jun and Lu, Guangming and Pei, Wenjie}, title = {Learning Compatible Multi-Prize Subnetworks for Asymmetric Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15255-15264} }
LoRACLR: Contrastive Adaptation for Customization of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Simsar_2025_CVPR, author = {Simsar, Enis and Hofmann, Thomas and Tombari, Federico and Yanardag, Pinar}, title = {LoRACLR: Contrastive Adaptation for Customization of Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13189-13198} }
Opportunistic Single-Photon Time of Flight-
[pdf]
[supp]
[bibtex]@InProceedings{Nousias_2025_CVPR, author = {Nousias, Sotiris and Wei, Mian and Xiao, Howard and Wu, Maxx and Athar, Shahmeer and Wang, Kevin J. and Malik, Anagh and Barmherzig, David A. and Lindell, David B. and Kutulakos, Kyros N.}, title = {Opportunistic Single-Photon Time of Flight}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15852-15862} }
Argus: Vision-Centric Reasoning with Grounded Chain-of-Thought-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Man_2025_CVPR, author = {Man, Yunze and Huang, De-An and Liu, Guilin and Sheng, Shiwei and Liu, Shilong and Gui, Liang-Yan and Kautz, Jan and Wang, Yu-Xiong and Yu, Zhiding}, title = {Argus: Vision-Centric Reasoning with Grounded Chain-of-Thought}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14268-14280} }
Bootstrap Your Own Views: Masked Ego-Exo Modeling for Fine-grained View-invariant Video Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Jungin and Lee, Jiyoung and Sohn, Kwanghoon}, title = {Bootstrap Your Own Views: Masked Ego-Exo Modeling for Fine-grained View-invariant Video Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13661-13670} }
Encapsulated Composition of Text-to-Image and Text-to-Video Models for High-Quality Video Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2025_CVPR, author = {Su, Tongtong and Wang, Chengyu and Liu, Bingyan and Huang, Jun and Lu, Dongming}, title = {Encapsulated Composition of Text-to-Image and Text-to-Video Models for High-Quality Video Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18209-18218} }
Retrieving Semantics from the Deep: an RAG Solution for Gesture Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mughal_2025_CVPR, author = {Mughal, M. Hamza and Dabral, Rishabh and Scholman, Merel C.J. and Demberg, Vera and Theobalt, Christian}, title = {Retrieving Semantics from the Deep: an RAG Solution for Gesture Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16578-16588} }
Improving Personalized Search with Regularized Low-Rank Parameter Updates-
[pdf]
[supp]
[bibtex]@InProceedings{Ryan_2025_CVPR, author = {Ryan, Fiona and Sivic, Josef and Heilbron, Fabian Caba and Hoffman, Judy and Rehg, James M. and Russell, Bryan}, title = {Improving Personalized Search with Regularized Low-Rank Parameter Updates}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19748-19757} }
HyperLoRA: Parameter-Efficient Adaptive Generation for Portrait Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Mengtian and Chen, Jinshu and Feng, Wanquan and Li, Bingchuan and Dai, Fei and Zhao, Songtao and He, Qian}, title = {HyperLoRA: Parameter-Efficient Adaptive Generation for Portrait Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13114-13123} }
EchoMatch: Partial-to-Partial Shape Matching via Correspondence Reflection-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Yizheng and Ehm, Viktoria and Roetzer, Paul and El Amrani, Nafie and Gao, Maolin and Bernard, Florian and Cremers, Daniel}, title = {EchoMatch: Partial-to-Partial Shape Matching via Correspondence Reflection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11665-11675} }
Cross-Modal Interactive Perception Network with Mamba for Lung Tumor Segmentation in PET-CT Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mei_2025_CVPR, author = {Mei, Jie and Lin, Chenyu and Qiu, Yu and Wang, Yaonan and Zhang, Hui and Wang, Ziyang and Dai, Dong}, title = {Cross-Modal Interactive Perception Network with Mamba for Lung Tumor Segmentation in PET-CT Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15653-15662} }
SuperPC: A Single Diffusion Model for Point Cloud Completion, Upsampling, Denoising, and Colorization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Yi and Zhao, Zhipeng and Su, Shaoshu and Golluri, Sharath and Zheng, Haoze and Yao, Runmao and Wang, Chen}, title = {SuperPC: A Single Diffusion Model for Point Cloud Completion, Upsampling, Denoising, and Colorization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16953-16964} }
Maintaining Consistent Inter-Class Topology in Continual Test-Time Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Ni_2025_CVPR, author = {Ni, Chenggong and Lyu, Fan and Tan, Jiayao and Hu, Fuyuan and Yao, Rui and Zhou, Tao}, title = {Maintaining Consistent Inter-Class Topology in Continual Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15319-15328} }
Generalized Gaussian Entropy Model for Point Cloud Attribute Compression with Dynamic Likelihood Intervals-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Changhao}, title = {Generalized Gaussian Entropy Model for Point Cloud Attribute Compression with Dynamic Likelihood Intervals}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11779-11788} }
Self-Learning Hyperspectral and Multispectral Image Fusion via Adaptive Residual Guided Subspace Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jian and Wang, He and Xu, Yang and Wu, Zebin and Wei, Zhihui}, title = {Self-Learning Hyperspectral and Multispectral Image Fusion via Adaptive Residual Guided Subspace Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17862-17871} }
StickMotion: Generating 3D Human Motions by Drawing a Stickman-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Tao and Wu, Zhihua and He, Qiaozhi and Chu, Jiaming and Qian, Ling and Cheng, Yu and Xing, Junliang and Zhao, Jian and Jin, Lei}, title = {StickMotion: Generating 3D Human Motions by Drawing a Stickman}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12370-12379} }
Enduring, Efficient and Robust Trajectory Prediction Attack in Autonomous Driving via Optimization-Driven Multi-Frame Perturbation Framework-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Yi and Han, Weizhen and Wu, Libing and Liu, Bingyi and Wang, Enshu and Zhang, Zhuangzhuang}, title = {Enduring, Efficient and Robust Trajectory Prediction Attack in Autonomous Driving via Optimization-Driven Multi-Frame Perturbation Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17229-17238} }
Toward Generalized Image Quality Assessment: Relaxing the Perfect Reference Quality Assumption-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Du and Wu, Tianhe and Ma, Kede and Zhang, Lei}, title = {Toward Generalized Image Quality Assessment: Relaxing the Perfect Reference Quality Assumption}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12742-12752} }
Hazy Low-Quality Satellite Video Restoration Via Learning Optimal Joint Degradation Patterns and Continuous-Scale Super-Resolution Reconstruction-
[pdf]
[bibtex]@InProceedings{Ni_2025_CVPR, author = {Ni, Ning and Zhang, Libao}, title = {Hazy Low-Quality Satellite Video Restoration Via Learning Optimal Joint Degradation Patterns and Continuous-Scale Super-Resolution Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12690-12699} }
Overcoming Shortcut Problem in VLM for Robust Out-of-Distribution Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Zhuo and Xiang, Xiang and Liang, Yifan}, title = {Overcoming Shortcut Problem in VLM for Robust Out-of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15402-15412} }
RoboSpatial: Teaching Spatial Understanding to 2D and 3D Vision-Language Models for Robotics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Chan Hee and Blukis, Valts and Tremblay, Jonathan and Tyree, Stephen and Su, Yu and Birchfield, Stan}, title = {RoboSpatial: Teaching Spatial Understanding to 2D and 3D Vision-Language Models for Robotics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15768-15780} }
BG-Triangle: Bezier Gaussian Triangle for 3D Vectorization and Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Minye and Dai, Haizhao and Yao, Kaixin and Tuytelaars, Tinne and Yu, Jingyi}, title = {BG-Triangle: Bezier Gaussian Triangle for 3D Vectorization and Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16197-16207} }
TKG-DM: Training-free Chroma Key Content Generation Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Morita_2025_CVPR, author = {Morita, Ryugo and Frolov, Stanislav and Moser, Brian Bernhard and Shirakawa, Takahiro and Watanabe, Ko and Dengel, Andreas and Zhou, Jinjia}, title = {TKG-DM: Training-free Chroma Key Content Generation Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13031-13040} }
Lift3D Policy: Lifting 2D Foundation Models for Robust 3D Robotic Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Jia_2025_CVPR, author = {Jia, Yueru and Liu, Jiaming and Chen, Sixiang and Gu, Chenyang and Wang, Zhilve and Luo, Longzan and Li, Xiaoqi and Wang, Pengwei and Wang, Zhongyuan and Zhang, Renrui and Zhang, Shanghang}, title = {Lift3D Policy: Lifting 2D Foundation Models for Robust 3D Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17347-17358} }
Multi-View Pose-Agnostic Change Localization with Zero Labels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Galappaththige_2025_CVPR, author = {Galappaththige, Chamuditha Jayanga and Lai, Jason and Windrim, Lloyd and Dansereau, Donald and Sunderhauf, Niko and Miller, Dimity}, title = {Multi-View Pose-Agnostic Change Localization with Zero Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11600-11610} }
Accelerating Diffusion Transformer via Increment-Calibrated Caching with Channel-Aware Singular Value Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zhiyuan and Li, Keyi and Jia, Yifan and Ye, Le and Ma, Yufei}, title = {Accelerating Diffusion Transformer via Increment-Calibrated Caching with Channel-Aware Singular Value Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18011-18020} }
A Simple yet Effective Layout Token in Large Language Models for Document Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Zhaoqing and Luo, Chuwei and Shao, Zirui and Gao, Feiyu and Xing, Hangdi and Zheng, Qi and Zhang, Ji}, title = {A Simple yet Effective Layout Token in Large Language Models for Document Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14472-14482} }
Reconstruction vs. Generation: Taming Optimization Dilemma in Latent Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_CVPR, author = {Yao, Jingfeng and Yang, Bin and Wang, Xinggang}, title = {Reconstruction vs. Generation: Taming Optimization Dilemma in Latent Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15703-15712} }
MEAT: Multiview Diffusion Model for Human Generation on Megapixels with Mesh Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuhan and Hong, Fangzhou and Yang, Shuai and Jiang, Liming and Wu, Wayne and Loy, Chen Change}, title = {MEAT: Multiview Diffusion Model for Human Generation on Megapixels with Mesh Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11297-11306} }
Free Lunch Enhancements for Multi-modal Crowd Counting-
[pdf]
[supp]
[bibtex]@InProceedings{Meng_2025_CVPR, author = {Meng, Haoliang and Hong, Xiaopeng and Lai, Zhengqin and Shang, Miao}, title = {Free Lunch Enhancements for Multi-modal Crowd Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14013-14023} }
EVolSplat: Efficient Volume-based Gaussian Splatting for Urban View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miao_2025_CVPR, author = {Miao, Sheng and Huang, Jiaxin and Bai, Dongfeng and Yan, Xu and Zhou, Hongyu and Wang, Yue and Liu, Bingbing and Geiger, Andreas and Liao, Yiyi}, title = {EVolSplat: Efficient Volume-based Gaussian Splatting for Urban View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11286-11296} }
PIDSR: Complementary Polarized Image Demosaicing and Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Shuangfan and Zhou, Chu and Lyu, Youwei and Guo, Heng and Ma, Zhanyu and Shi, Boxin and Sato, Imari}, title = {PIDSR: Complementary Polarized Image Demosaicing and Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16081-16090} }
MegaSynth: Scaling Up 3D Scene Reconstruction with Synthesized Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Hanwen and Xu, Zexiang and Xie, Desai and Chen, Ziwen and Jin, Haian and Luan, Fujun and Shu, Zhixin and Zhang, Kai and Bi, Sai and Sun, Xin and Gu, Jiuxiang and Huang, Qixing and Pavlakos, Georgios and Tan, Hao}, title = {MegaSynth: Scaling Up 3D Scene Reconstruction with Synthesized Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16441-16452} }
HandOS: 3D Hand Reconstruction in One Stage-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xingyu and Song, Zhuheng and Jiang, Xiaoke and Hu, Yaoqing and Yu, Junzhi and Zhang, Lei}, title = {HandOS: 3D Hand Reconstruction in One Stage}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17304-17314} }
All-Day Multi-Camera Multi-Target Tracking-
[pdf]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Huijie and Qiao, Yu and Zhen, Yihao and Zhao, Tinghui and Fan, Baojie and Wang, Qiang}, title = {All-Day Multi-Camera Multi-Target Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16892-16901} }
EnergyMoGen: Compositional Human Motion Generation with Energy-Based Diffusion Model in Latent Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jianrong and Fan, Hehe and Yang, Yi}, title = {EnergyMoGen: Compositional Human Motion Generation with Energy-Based Diffusion Model in Latent Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17592-17602} }
StarVector: Generating Scalable Vector Graphics Code from Images and Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rodriguez_2025_CVPR, author = {Rodriguez, Juan A. and Puri, Abhay and Agarwal, Shubham and Laradji, Issam H. and Rodriguez, Pau and Rajeswar, Sai and Vazquez, David and Pal, Christopher and Pedersoli, Marco}, title = {StarVector: Generating Scalable Vector Graphics Code from Images and Text}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16175-16186} }
Explaining in Diffusion: Explaining a Classifier with Diffusion Semantics-
[pdf]
[supp]
[bibtex]@InProceedings{Kazimi_2025_CVPR, author = {Kazimi, Tahira and Allada, Ritika and Yanardag, Pinar}, title = {Explaining in Diffusion: Explaining a Classifier with Diffusion Semantics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14799-14809} }
Attention Distillation: A Unified Approach to Visual Characteristics Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yang and Gao, Xu and Chen, Zichong and Huang, Hui}, title = {Attention Distillation: A Unified Approach to Visual Characteristics Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18270-18280} }
From Words to Structured Visuals: A Benchmark and Framework for Text-to-Diagram Generation and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Jingxuan and Tan, Cheng and Chen, Qi and Wu, Gaowei and Li, Siyuan and Gao, Zhangyang and Sun, Linzhuang and Yu, Bihui and Guo, Ruifeng}, title = {From Words to Structured Visuals: A Benchmark and Framework for Text-to-Diagram Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13315-13325} }
DreamRelation: Bridging Customization and Relation Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Qingyu and Qi, Lu and Wu, Jianzong and Bai, Jinbin and Wang, Jingbo and Tong, Yunhai and Li, Xiangtai}, title = {DreamRelation: Bridging Customization and Relation Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15723-15732} }
Depth-Guided Bundle Sampling for Efficient Generalizable Neural Radiance Field Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Li and Zhu, Hao and Chen, Longlong and Hu, Fei and Ye, Long and Ma, Zhan}, title = {Depth-Guided Bundle Sampling for Efficient Generalizable Neural Radiance Field Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11217-11226} }
TinyFusion: Diffusion Transformers Learned Shallow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Gongfan and Li, Kunjun and Ma, Xinyin and Wang, Xinchao}, title = {TinyFusion: Diffusion Transformers Learned Shallow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18144-18154} }
SVG-IR: Spatially-Varying Gaussian Splatting for Inverse Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Hanxiao and Gao, Yupeng and Xie, Jin and Yang, Jian and Wang, Beibei}, title = {SVG-IR: Spatially-Varying Gaussian Splatting for Inverse Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16143-16152} }
Scaling Mesh Generation via Compressive Tokenization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Weng_2025_CVPR, author = {Weng, Haohan and Zhao, Zibo and Lei, Biwen and Yang, Xianghui and Liu, Jian and Lai, Zeqiang and Chen, Zhuo and Liu, Yuhong and Jiang, Jie and Guo, Chunchao and Zhang, Tong and Gao, Shenghua and Chen, C.L. Philip}, title = {Scaling Mesh Generation via Compressive Tokenization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11093-11103} }
Towards Optimizing Large-Scale Multi-Graph Matching in Bioimaging-
[pdf]
[supp]
[bibtex]@InProceedings{Kahl_2025_CVPR, author = {Kahl, Max and Stricker, Sebastian and Hutschenreiter, Lisa and Bernard, Florian and Rother, Carsten and Savchynskyy, Bogdan}, title = {Towards Optimizing Large-Scale Multi-Graph Matching in Bioimaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11569-11578} }
PS-Diffusion: Photorealistic Subject-Driven Image Editing with Disentangled Control and Attention-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Weicheng and Jia, Guoli and Zhang, Zhongqi and Lin, Liang and Yang, Jufeng}, title = {PS-Diffusion: Photorealistic Subject-Driven Image Editing with Disentangled Control and Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18302-18312} }
Exploring Visual Vulnerabilities via Multi-Loss Adversarial Search for Jailbreaking Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hao_2025_CVPR, author = {Hao, Shuyang and Hooi, Bryan and Liu, Jun and Chang, Kai-Wei and Huang, Zi and Cai, Yujun}, title = {Exploring Visual Vulnerabilities via Multi-Loss Adversarial Search for Jailbreaking Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19890-19899} }
Few-shot Implicit Function Generation via Equivariance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Suizhi and Yang, Xingyi and Lu, Hongtao and Wang, Xinchao}, title = {Few-shot Implicit Function Generation via Equivariance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16262-16272} }
vesselFM: A Foundation Model for Universal 3D Blood Vessel Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wittmann_2025_CVPR, author = {Wittmann, Bastian and Wattenberg, Yannick and Amiranashvili, Tamaz and Shit, Suprosanna and Menze, Bjoern}, title = {vesselFM: A Foundation Model for Universal 3D Blood Vessel Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20874-20884} }
Classifier-Free Guidance Inside the Attraction Basin May Cause Memorization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jain_2025_CVPR, author = {Jain, Anubhav and Kobayashi, Yuya and Shibuya, Takashi and Takida, Yuhta and Memon, Nasir and Togelius, Julian and Mitsufuji, Yuki}, title = {Classifier-Free Guidance Inside the Attraction Basin May Cause Memorization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12871-12879} }
Magma: A Foundation Model for Multimodal AI Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jianwei and Tan, Reuben and Wu, Qianhui and Zheng, Ruijie and Peng, Baolin and Liang, Yongyuan and Gu, Yu and Cai, Mu and Ye, Seonghyeon and Jang, Joel and Deng, Yuquan and Gao, Jianfeng}, title = {Magma: A Foundation Model for Multimodal AI Agents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14203-14214} }
Volume Tells: Dual Cycle-Consistent Diffusion for 3D Fluorescence Microscopy De-noising and Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zelin and Wang, Chenwei and Huang, Zhaoke and Ma, Yiming and Zhao, Cunming and Zhao, Zhongying and Yan, Hong}, title = {Volume Tells: Dual Cycle-Consistent Diffusion for 3D Fluorescence Microscopy De-noising and Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16091-16100} }
Matrix3D: Large Photogrammetry Model All-in-One-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Yuanxun and Zhang, Jingyang and Fang, Tian and Nahmias, Jean-Daniel and Tsin, Yanghai and Quan, Long and Cao, Xun and Yao, Yao and Li, Shiwei}, title = {Matrix3D: Large Photogrammetry Model All-in-One}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11250-11263} }
3DEnhancer: Consistent Multi-View Diffusion for 3D Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Yihang and Zhou, Shangchen and Lan, Yushi and Pan, Xingang and Loy, Chen Change}, title = {3DEnhancer: Consistent Multi-View Diffusion for 3D Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16430-16440} }
Investigating the Role of Weight Decay in Enhancing Nonconvex SGD-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Tao and Huang, Yuhao and Shen, Li and Xu, Kele and Wang, Bao}, title = {Investigating the Role of Weight Decay in Enhancing Nonconvex SGD}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15287-15296} }
MarkushGrapher: Joint Visual and Textual Recognition of Markush Structures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Morin_2025_CVPR, author = {Morin, Lucas and Weber, Valery and Nassar, Ahmed and Meijer, Gerhard Ingmar and Van Gool, Luc and Li, Yawei and Staar, Peter}, title = {MarkushGrapher: Joint Visual and Textual Recognition of Markush Structures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14505-14515} }
Detecting Backdoor Attacks in Federated Learning via Direction Alignment Inspection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Jiahao and Zhang, Zikai and Hu, Rui}, title = {Detecting Backdoor Attacks in Federated Learning via Direction Alignment Inspection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20654-20664} }
BlockDance: Reuse Structurally Similar Spatio-Temporal Features to Accelerate Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Hui and Gao, Tingwei and Shao, Jie and Wu, Zuxuan}, title = {BlockDance: Reuse Structurally Similar Spatio-Temporal Features to Accelerate Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12891-12900} }
Mamba-Adaptor: State Space Model Adaptor for Visual Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Fei and Nie, Jiahao and Tang, Yujin and Zhang, Wenkang and Zhao, Hongshen}, title = {Mamba-Adaptor: State Space Model Adaptor for Visual Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20124-20134} }
Robust Message Embedding via Attention Flow-Based Steganography-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Huayuan and Zhang, Shenzhuo and Jiang, Shiqi and Liao, Jing and Gu, Shuhang and Zheng, Dejun and Wang, Changbo and Li, Chenhui}, title = {Robust Message Embedding via Attention Flow-Based Steganography}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12840-12849} }
Compositional Targeted Multi-Label Universal Perturbations-
[pdf]
[supp]
[bibtex]@InProceedings{Mahmood_2025_CVPR, author = {Mahmood, Hassan and Elhamifar, Ehsan}, title = {Compositional Targeted Multi-Label Universal Perturbations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20580-20591} }
PatchGuard: Adversarially Robust Anomaly Detection and Localization through Vision Transformers and Pseudo Anomalies-
[pdf]
[supp]
[bibtex]@InProceedings{Nafez_2025_CVPR, author = {Nafez, Mojtaba and Koochakian, Amirhossein and Maleki, Arad and Habibi, Jafar and Rohban, Mohammad Hossein}, title = {PatchGuard: Adversarially Robust Anomaly Detection and Localization through Vision Transformers and Pseudo Anomalies}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20383-20394} }
Neural Video Compression with Context Modulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Chuanbo and Li, Zhuoyuan and Bian, Yifan and Li, Li and Liu, Dong}, title = {Neural Video Compression with Context Modulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12553-12563} }
On-Device Self-Supervised Learning of Low-Latency Monocular Depth from Only Events-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hagenaars_2025_CVPR, author = {Hagenaars, Jesse J. and Wu, Yilun and Paredes-Valles, Federico and Stroobants, Stein and de Croon, Guido C.H.E.}, title = {On-Device Self-Supervised Learning of Low-Latency Monocular Depth from Only Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17114-17123} }
Learning with Noisy Triplet Correspondence for Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Shuxian and He, Changhao and Liu, Xiting and Zhou, Joey Tianyi and Peng, Xi and Hu, Peng}, title = {Learning with Noisy Triplet Correspondence for Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19628-19637} }
Parallelized Autoregressive Visual Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuqing and Ren, Shuhuai and Lin, Zhijie and Han, Yujin and Guo, Haoyuan and Yang, Zhenheng and Zou, Difan and Feng, Jiashi and Liu, Xihui}, title = {Parallelized Autoregressive Visual Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12955-12965} }
CGMatch: A Different Perspective of Semi-supervised Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Bo and Lu, Jueqing and Tian, Yuan and Zhao, Haifeng and Chang, Yi and Du, Lan}, title = {CGMatch: A Different Perspective of Semi-supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15381-15391} }
FIction: 4D Future Interaction Prediction from Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ashutosh_2025_CVPR, author = {Ashutosh, Kumar and Pavlakos, Georgios and Grauman, Kristen}, title = {FIction: 4D Future Interaction Prediction from Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17613-17625} }
D^2iT: Dynamic Diffusion Transformer for Accurate Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Jia_2025_CVPR, author = {Jia, Weinan and Huang, Mengqi and Chen, Nan and Zhang, Lei and Mao, Zhendong}, title = {D{\textasciicircum}2iT: Dynamic Diffusion Transformer for Accurate Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12860-12870} }
AniDoc: Animation Creation Made Easier-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meng_2025_CVPR, author = {Meng, Yihao and Ouyang, Hao and Wang, Hanlin and Wang, Qiuyu and Wang, Wen and Cheng, Ka Leong and Liu, Zhiheng and Shen, Yujun and Qu, Huamin}, title = {AniDoc: Animation Creation Made Easier}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18187-18197} }
LiSu: A Dataset and Method for LiDAR Surface Normal Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Malic_2025_CVPR, author = {Mali\'c, Du\v{s}an and Fruhwirth-Reisinger, Christian and Schulter, Samuel and Possegger, Horst}, title = {LiSu: A Dataset and Method for LiDAR Surface Normal Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17039-17049} }
Spk2SRImgNet: Super-Resolve Dynamic Scene from Spike Stream via Motion Aligned Collaborative Filtering-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuanlin and Zhang, Yiyang and Xiong, Ruiqin and Zhao, Jing and Zhang, Jian and Fan, Xiaopeng and Huang, Tiejun}, title = {Spk2SRImgNet: Super-Resolve Dynamic Scene from Spike Stream via Motion Aligned Collaborative Filtering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11416-11426} }
VideoGLaMM : A Large Multimodal Model for Pixel-Level Visual Grounding in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Munasinghe_2025_CVPR, author = {Munasinghe, Shehan and Gani, Hanan and Zhu, Wenqi and Cao, Jiale and Xing, Eric and Khan, Fahad Shahbaz and Khan, Salman}, title = {VideoGLaMM : A Large Multimodal Model for Pixel-Level Visual Grounding in Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19036-19046} }
ZeroGrasp: Zero-Shot Shape Reconstruction Enabled Robotic Grasping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Iwase_2025_CVPR, author = {Iwase, Shun and Irshad, Muhammad Zubair and Liu, Katherine and Guizilini, Vitor and Lee, Robert and Ikeda, Takuya and Amma, Ayako and Nishiwaki, Koichi and Kitani, Kris and Ambrus, Rares and Zakharov, Sergey}, title = {ZeroGrasp: Zero-Shot Shape Reconstruction Enabled Robotic Grasping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17405-17415} }
PDFactor: Learning Tri-Perspective View Policy Diffusion Field for Multi-Task Robotic Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Jingyi and Wang, Le and Zhou, Sanping and Wang, Sen and Li, Jiayi and Sun, Haowen and Tang, Wei}, title = {PDFactor: Learning Tri-Perspective View Policy Diffusion Field for Multi-Task Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15757-15767} }
Dense Dispersed Structured Light for Hyperspectral 3D Imaging of Dynamic Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shin_2025_CVPR, author = {Shin, Suhyun and Yoon, Seungwoo and Maeda, Ryota and Baek, Seung-Hwan}, title = {Dense Dispersed Structured Light for Hyperspectral 3D Imaging of Dynamic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16589-16598} }
MM-OR: A Large Multimodal Operating Room Dataset for Semantic Understanding of High-Intensity Surgical Environments-
[pdf]
[supp]
[bibtex]@InProceedings{Ozsoy_2025_CVPR, author = {\"Ozsoy, Ege and Pellegrini, Chantal and Czempiel, Tobias and Tristram, Felix and Yuan, Kun and Bani-Harouni, David and Eck, Ulrich and Busam, Benjamin and Keicher, Matthias and Navab, Nassir}, title = {MM-OR: A Large Multimodal Operating Room Dataset for Semantic Understanding of High-Intensity Surgical Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19378-19389} }
Dora: Sampling and Benchmarking for 3D Shape Variational Auto-Encoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Rui and Zhang, Jianfeng and Liang, Yixun and Luo, Guan and Li, Weiyu and Liu, Jiarui and Li, Xiu and Long, Xiaoxiao and Feng, Jiashi and Tan, Ping}, title = {Dora: Sampling and Benchmarking for 3D Shape Variational Auto-Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16251-16261} }
Once-Tuning-Multiple-Variants: Tuning Once and Expanded as Multiple Vision-Language Model Variants-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Chong and Chen, Tao and Gan, Zhongxue}, title = {Once-Tuning-Multiple-Variants: Tuning Once and Expanded as Multiple Vision-Language Model Variants}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14712-14722} }
Reconstructing Animals and the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kulits_2025_CVPR, author = {Kulits, Peter and Black, Michael J. and Zuffi, Silvia}, title = {Reconstructing Animals and the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16565-16577} }
DiffusionDrive: Truncated Diffusion Model for End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_CVPR, author = {Liao, Bencheng and Chen, Shaoyu and Yin, Haoran and Jiang, Bo and Wang, Cheng and Yan, Sixu and Zhang, Xinbang and Li, Xiangyu and Zhang, Ying and Zhang, Qian and Wang, Xinggang}, title = {DiffusionDrive: Truncated Diffusion Model for End-to-End Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12037-12047} }
DVHGNN: Multi-Scale Dilated Vision HGNN for Efficient Vision Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Caoshuo and Li, Tanzhe and Hu, Xiaobin and Luo, Donghao and Jin, Taisong}, title = {DVHGNN: Multi-Scale Dilated Vision HGNN for Efficient Vision Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20158-20168} }
Reconstructing In-the-Wild Open-Vocabulary Human-Object Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2025_CVPR, author = {Wen, Boran and Huang, Dingbang and Zhang, Zichen and Zhou, Jiahong and Deng, Jianbin and Gong, Jingyu and Chen, Yulong and Ma, Lizhuang and Li, Yong-Lu}, title = {Reconstructing In-the-Wild Open-Vocabulary Human-Object Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17426-17436} }
GROVE: A Generalized Reward for Learning Open-Vocabulary Physical Skill-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2025_CVPR, author = {Cui, Jieming and Liu, Tengyu and Meng, Ziyu and Yu, Jiale and Song, Ran and Zhang, Wei and Zhu, Yixin and Huang, Siyuan}, title = {GROVE: A Generalized Reward for Learning Open-Vocabulary Physical Skill}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15781-15790} }
GauSTAR: Gaussian Surface Tracking and Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Chengwei and Xue, Lixin and Zarate, Juan and Song, Jie}, title = {GauSTAR: Gaussian Surface Tracking and Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16543-16553} }
Training-free Dense-Aligned Diffusion Guidance for Modular Conditional Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zixuan and Peng, Duo and Chen, Feng and Yang, Yuwei and Lei, Yinjie}, title = {Training-free Dense-Aligned Diffusion Guidance for Modular Conditional Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13135-13145} }
TADFormer: Task-Adaptive Dynamic TransFormer for Efficient Multi-Task Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Baek_2025_CVPR, author = {Baek, Seungmin and Lee, Soyul and Jo, Hayeon and Choi, Hyesong and Min, Dongbo}, title = {TADFormer: Task-Adaptive Dynamic TransFormer for Efficient Multi-Task Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14858-14868} }
CamPoint: Boosting Point Cloud Segmentation with Virtual Camera-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jianhui and Luo, Yizhi and Zhang, Zicheng and Nie, Xuecheng and Li, Bonan}, title = {CamPoint: Boosting Point Cloud Segmentation with Virtual Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11822-11832} }
MERGE: Multi-faceted Hierarchical Graph-based GNN for Gene Expression Prediction from Whole Slide Histopathology Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ganguly_2025_CVPR, author = {Ganguly, Aniruddha and Chatterjee, Debolina and Huang, Wentao and Zhang, Jie and Yurovsky, Alisa and Johnson, Travis Steele and Chen, Chao}, title = {MERGE: Multi-faceted Hierarchical Graph-based GNN for Gene Expression Prediction from Whole Slide Histopathology Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15611-15620} }
SPA-VL: A Comprehensive Safety Preference Alignment Dataset for Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yongting and Chen, Lu and Zheng, Guodong and Gao, Yifeng and Zheng, Rui and Fu, Jinlan and Yin, Zhenfei and Jin, Senjie and Qiao, Yu and Huang, Xuanjing and Zhao, Feng and Gui, Tao and Shao, Jing}, title = {SPA-VL: A Comprehensive Safety Preference Alignment Dataset for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19867-19878} }
PTDiffusion: Free Lunch for Generating Optical Illusion Hidden Pictures with Phase-Transferred Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Xiang and Yang, Shuai and Liu, Jiaying}, title = {PTDiffusion: Free Lunch for Generating Optical Illusion Hidden Pictures with Phase-Transferred Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18240-18249} }
Preserving Clusters in Prompt Learning for Unsupervised Domain Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Vuong_2025_CVPR, author = {Vuong, Tung-Long and Phan, Hoang and Vo, Vy and Bui, Anh and Do, Thanh-Toan and Le, Trung and Phung, Dinh}, title = {Preserving Clusters in Prompt Learning for Unsupervised Domain Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19974-19984} }
Attend to Not Attended: Structure-then-Detail Token Merging for Post-training DiT Acceleration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Haipeng and Tang, Sheng and Cao, Juan and Zhang, Enshuo and Tang, Fan and Lee, Tong-Yee}, title = {Attend to Not Attended: Structure-then-Detail Token Merging for Post-training DiT Acceleration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18083-18092} }
FlowRAM: Grounding Flow Matching Policy with Region-Aware Mamba Framework for Robotic Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Sen and Wang, Le and Zhou, Sanping and Tian, Jingyi and Li, Jiayi and Sun, Haowen and Tang, Wei}, title = {FlowRAM: Grounding Flow Matching Policy with Region-Aware Mamba Framework for Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12176-12186} }
Visual Lexicon: Rich Image Features in Language Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, XuDong and Zhou, Xingyi and Fathi, Alireza and Darrell, Trevor and Schmid, Cordelia}, title = {Visual Lexicon: Rich Image Features in Language Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19736-19747} }
Test-Time Visual In-Context Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Jiahao and Tonioni, Alessio and Rauschmayr, Nathalie and Tombari, Federico and Schiele, Bernt}, title = {Test-Time Visual In-Context Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19996-20005} }
Prior Does Matter: Visual Navigation via Denoising Diffusion Bridge Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Hao and Zeng, Yiming and Bi, Zetong and Wan, Zhaoliang and Huang, Junlong and Cheng, Hui}, title = {Prior Does Matter: Visual Navigation via Denoising Diffusion Bridge Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12100-12110} }
SegEarth-OV: Towards Training-Free Open-Vocabulary Segmentation for Remote Sensing Images-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Kaiyu and Liu, Ruixun and Cao, Xiangyong and Bai, Xueru and Zhou, Feng and Meng, Deyu and Wang, Zhi}, title = {SegEarth-OV: Towards Training-Free Open-Vocabulary Segmentation for Remote Sensing Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10545-10556} }
Do We Really Need Curated Malicious Data for Safety Alignment in Multi-modal Large Language Models?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yanbo and Guan, Jiyang and Liang, Jian and He, Ran}, title = {Do We Really Need Curated Malicious Data for Safety Alignment in Multi-modal Large Language Models?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19879-19889} }
Harnessing Global-Local Collaborative Adversarial Perturbation for Anti-Customization-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Long and Wang, Jiakai and Hao, Haojie and Qin, Haotong and Zhao, Jiejie and Liu, Xianglong}, title = {Harnessing Global-Local Collaborative Adversarial Perturbation for Anti-Customization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13414-13423} }
Acc3D: Accelerating Single Image to 3D Diffusion Models via Edge Consistency Guided Score Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Kendong and Zhu, Zhiyu and Liu, Hui and Hou, Junhui}, title = {Acc3D: Accelerating Single Image to 3D Diffusion Models via Edge Consistency Guided Score Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18031-18040} }
Soft Self-labeling and Potts Relaxations for Weakly-supervised Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhongwen and Boykov, Yuri}, title = {Soft Self-labeling and Potts Relaxations for Weakly-supervised Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20244-20253} }
MVSAnywhere: Zero-Shot Multi-View Stereo-
[pdf]
[arXiv]
[bibtex]@InProceedings{Izquierdo_2025_CVPR, author = {Izquierdo, Sergio and Sayed, Mohamed and Firman, Michael and Garcia-Hernando, Guillermo and Turmukhambetov, Daniyar and Civera, Javier and Mac Aodha, Oisin and Brostow, Gabriel and Watson, Jamie}, title = {MVSAnywhere: Zero-Shot Multi-View Stereo}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11493-11504} }
Generating 6DoF Object Manipulation Trajectories from Action Description in Egocentric Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Yoshida_2025_CVPR, author = {Yoshida, Tomoya and Kurita, Shuhei and Nishimura, Taichi and Mori, Shinsuke}, title = {Generating 6DoF Object Manipulation Trajectories from Action Description in Egocentric Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17370-17382} }
BIOMEDICA: An Open Biomedical Image-Caption Archive, Dataset, and Vision-Language Models Derived from Scientific Literature-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lozano_2025_CVPR, author = {Lozano, Alejandro and Sun, Min Woo and Burgess, James and Chen, Liangyu and Nirschl, Jeffrey J. and Gu, Jeffrey and Lopez, Ivan and Aklilu, Josiah and Rau, Anita and Katzer, Austin Wolfgang and Zhang, Yuhui and Chiu, Collin and Wang, Xiaohan and Song, Alfred Seunghoon and Tibshirani, Robert and Yeung-Levy, Serena}, title = {BIOMEDICA: An Open Biomedical Image-Caption Archive, Dataset, and Vision-Language Models Derived from Scientific Literature}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19724-19735} }
Structure-Aware Correspondence Learning for Relative Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Yihan and Yang, Wenfei and Ren, Huan and Zhang, Shifeng and Zhang, Tianzhu and Wu, Feng}, title = {Structure-Aware Correspondence Learning for Relative Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11611-11621} }
PyTorchGeoNodes: Enabling Differentiable Shape Programs for 3D Shape Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stekovic_2025_CVPR, author = {Stekovic, Sinisa and Artykov, Arslan and Ainetter, Stefan and D'Urso, Mattia and Fraundorfer, Friedrich}, title = {PyTorchGeoNodes: Enabling Differentiable Shape Programs for 3D Shape Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16283-16292} }
FIFA: Fine-grained Inter-frame Attention for Driver's Video Gaze Estimation-
[pdf]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Daosong and Cui, Mingyue and Huang, Kai}, title = {FIFA: Fine-grained Inter-frame Attention for Driver's Video Gaze Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18760-18769} }
Shape Abstraction via Marching Differentiable Support Functions-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Sunkyung and Lee, Jeongmin and Lee, Dongjun}, title = {Shape Abstraction via Marching Differentiable Support Functions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16902-16911} }
Scaling Down Text Encoders of Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Lifu and Liu, Daqing and Liu, Xinchen and He, Xiaodong}, title = {Scaling Down Text Encoders of Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18424-18433} }
POT: Prototypical Optimal Transport for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jian and Dai, Tianhong and Zhang, Bingfeng and Yu, Siyue and Lim, Eng Gee and Xiao, Jimin}, title = {POT: Prototypical Optimal Transport for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15055-15064} }
SKE-Layout: Spatial Knowledge Enhanced Layout Generation with LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Junsheng and Cao, Nieqing and Ding, Yan and Xie, Mengying and Gu, Fuqiang and Chen, Chao}, title = {SKE-Layout: Spatial Knowledge Enhanced Layout Generation with LLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19414-19423} }
Gaussian Eigen Models for Human Heads-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zielonka_2025_CVPR, author = {Zielonka, Wojciech and Bolkart, Timo and Beeler, Thabo and Thies, Justus}, title = {Gaussian Eigen Models for Human Heads}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15930-15940} }
4D-Fly: Fast 4D Reconstruction from a Single Monocular Video-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Diankun and Liu, Fangfu and Hung, Yi-Hsin and Qian, Yue and Zhan, Xiaohang and Duan, Yueqi}, title = {4D-Fly: Fast 4D Reconstruction from a Single Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16663-16673} }
Complementary Advantages: Exploiting Cross-Field Frequency Correlation for NIR-Assisted Image Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuchen and Wang, Hongyuan and Wang, Lizhi and Wang, Xin and Zhu, Lin and Lu, Wanxuan and Huang, Hua}, title = {Complementary Advantages: Exploiting Cross-Field Frequency Correlation for NIR-Assisted Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12679-12689} }
Eval3D: Interpretable and Fine-grained Evaluation for 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duggal_2025_CVPR, author = {Duggal, Shivam and Hu, Yushi and Michel, Oscar and Kembhavi, Aniruddha and Freeman, William T. and Smith, Noah A. and Krishna, Ranjay and Torralba, Antonio and Farhadi, Ali and Ma, Wei-Chiu}, title = {Eval3D: Interpretable and Fine-grained Evaluation for 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13326-13336} }
DiffLO: Semantic-Aware LiDAR Odometry with Diffusion-Based Refinement-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yongshu and Liu, Chen and Zhu, Minghang and Ao, Sheng and Wen, Chenglu and Wang, Cheng}, title = {DiffLO: Semantic-Aware LiDAR Odometry with Diffusion-Based Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17050-17059} }
Style-Editor: Text-driven Object-centric Style Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Jihun and Gim, Jongmin and Lee, Kyoungmin and Lee, Seunghun and Im, Sunghoon}, title = {Style-Editor: Text-driven Object-centric Style Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18281-18291} }
Transfer Your Perspective: Controllable 3D Generation from Any Viewpoint in a Driving Scene-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2025_CVPR, author = {Pan, Tai-Yu and Jeon, Sooyoung and Fan, Mengdi and Yoo, Jinsu and Feng, Zhenyang and Campbell, Mark and Weinberger, Kilian Q. and Hariharan, Bharath and Chao, Wei-Lun}, title = {Transfer Your Perspective: Controllable 3D Generation from Any Viewpoint in a Driving Scene}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12027-12036} }
FastVLM: Efficient Vision Encoding for Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vasu_2025_CVPR, author = {Vasu, Pavan Kumar Anasosalu and Faghri, Fartash and Li, Chun-Liang and Koc, Cem and True, Nate and Antony, Albert and Santhanam, Gokula and Gabriel, James and Grasch, Peter and Tuzel, Oncel and Pouransari, Hadi}, title = {FastVLM: Efficient Vision Encoding for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19769-19780} }
VISTA3D: A Unified Segmentation Foundation Model For 3D Medical Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Yufan and Guo, Pengfei and Tang, Yucheng and Myronenko, Andriy and Nath, Vishwesh and Xu, Ziyue and Yang, Dong and Zhao, Can and Simon, Benjamin and Belue, Mason and Harmon, Stephanie and Turkbey, Baris and Xu, Daguang and Li, Wenqi}, title = {VISTA3D: A Unified Segmentation Foundation Model For 3D Medical Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20863-20873} }
S2D-LFE: Sparse-to-Dense Light Field Event Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yutong and Weng, Wenming and Zhang, Yueyi and Xiong, Zhiwei}, title = {S2D-LFE: Sparse-to-Dense Light Field Event Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11207-11216} }
The Art of Deception: Color Visual Illusions and Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gomez-Villa_2025_CVPR, author = {Gomez-Villa, Alexandra and Wang, Kai and Parraga, C.Alejandro and Twardowski, Bart{\l}omiej and Malo, Jesus and Vazquez-Corral, Javier and van den Weijer, Joost}, title = {The Art of Deception: Color Visual Illusions and Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18642-18652} }
Progressive Rendering Distillation: Adapting Stable Diffusion for Instant Text-to-Mesh Generation without 3D Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Zhiyuan and Liang, Xinyue and Wu, Rongyuan and Zhu, Xiangyu and Lei, Zhen and Zhang, Lei}, title = {Progressive Rendering Distillation: Adapting Stable Diffusion for Instant Text-to-Mesh Generation without 3D Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11036-11050} }
Do Computer Vision Foundation Models Learn the Low-level Characteristics of the Human Visual System?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Yancheng and Yin, Fei and Hammou, Dounia and Mantiuk, Rafal}, title = {Do Computer Vision Foundation Models Learn the Low-level Characteristics of the Human Visual System?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20039-20048} }
Online Task-Free Continual Learning via Dynamic Expansionable Memory Distribution-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Fei and Bors, Adrian G.}, title = {Online Task-Free Continual Learning via Dynamic Expansionable Memory Distribution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20512-20522} }
Rethinking Token Reduction with Parameter-Efficient Fine-Tuning in ViT for Pixel-Level Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Lei_2025_CVPR, author = {Lei, Cheng and Li, Ao and Yao, Hu and Zhu, Ce and Zhang, Le}, title = {Rethinking Token Reduction with Parameter-Efficient Fine-Tuning in ViT for Pixel-Level Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14954-14964} }
SVDC: Consistent Direct Time-of-Flight Video Depth Completion with Frequency Selective Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Xuan and Xiang, Jijun and Wang, Xianqi and Liu, Longliang and Wang, Yu and Zhang, Hong and Guo, Fei and Yang, Xin}, title = {SVDC: Consistent Direct Time-of-Flight Video Depth Completion with Frequency Selective Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16619-16628} }
Rethinking Training for De-biasing Text-to-Image Generation: Unlocking the Potential of Stable Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Eunji and Kim, Siwon and Park, Minjun and Entezari, Rahim and Yoon, Sungroh}, title = {Rethinking Training for De-biasing Text-to-Image Generation: Unlocking the Potential of Stable Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13361-13370} }
Instant3dit: Multiview Inpainting for Fast Editing of 3D Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Barda_2025_CVPR, author = {Barda, Amir and Gadelha, Matheus and Kim, Vladimir G. and Aigerman, Noam and Bermano, Amit H. and Groueix, Thibault}, title = {Instant3dit: Multiview Inpainting for Fast Editing of 3D Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16273-16282} }
STDD: Spatio-Temporal Dual Diffusion for Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yao_2025_CVPR, author = {Yao, Shuaizhen and Zhang, Xiaoya and Liu, Xin and Liu, Mengyi and Cui, Zhen}, title = {STDD: Spatio-Temporal Dual Diffusion for Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12575-12584} }
Implicit Correspondence Learning for Image-to-Point Cloud Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xinjun and Yang, Wenfei and Deng, Jiacheng and Cheng, Zhixin and Zhou, Xu and Zhang, Tianzhu}, title = {Implicit Correspondence Learning for Image-to-Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16922-16931} }
ILIAS: Instance-Level Image retrieval At Scale-
[pdf]
[supp]
[bibtex]@InProceedings{Kordopatis-Zilos_2025_CVPR, author = {Kordopatis-Zilos, Giorgos and Stojni\'c, Vladan and Manko, Anna and Suma, Pavel and Ypsilantis, Nikolaos-Antonios and Efthymiadis, Nikos and Laskar, Zakaria and Matas, Jiri and Chum, Ondrej and Tolias, Giorgos}, title = {ILIAS: Instance-Level Image retrieval At Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14777-14787} }
GeoDepth: From Point-to-Depth to Plane-to-Depth Modeling for Self-Supervised Monocular Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Haifeng and Gu, Shuhang and Duan, Lixin and Li, Wen}, title = {GeoDepth: From Point-to-Depth to Plane-to-Depth Modeling for Self-Supervised Monocular Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11525-11535} }
SSHNet: Unsupervised Cross-modal Homography Estimation via Problem Reformulation and Split Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Junchen and Cao, Si-Yuan and Zhang, Runmin and Zhang, Chenghao and Yu, Zhu and Chen, Shujie and Yang, Bailin and Shen, Hui-Liang}, title = {SSHNet: Unsupervised Cross-modal Homography Estimation via Problem Reformulation and Split Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16685-16694} }
USP-Gaussian: Unifying Spike-based Image Reconstruction, Pose Correction and Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Kang and Zhang, Jiyuan and Hao, Zecheng and Zheng, Yajing and Huang, Tiejun and Yu, Zhaofei}, title = {USP-Gaussian: Unifying Spike-based Image Reconstruction, Pose Correction and Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16609-16618} }
Holmes-VAU: Towards Long-term Video Anomaly Understanding at Any Granularity-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Huaxin and Xu, Xiaohao and Wang, Xiang and Zuo, Jialong and Huang, Xiaonan and Gao, Changxin and Zhang, Shanjun and Yu, Li and Sang, Nong}, title = {Holmes-VAU: Towards Long-term Video Anomaly Understanding at Any Granularity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13843-13853} }
QuartDepth: Post-Training Quantization for Real-Time Depth Estimation on the Edge-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Xuan and Ma, Weize and Liu, Jing and Yang, Changdi and Ding, Rui and Wang, Quanyi and Ding, Henghui and Niu, Wei and Wang, Yanzhi and Zhao, Pu and Lin, Jun and Gu, Jiuxiang}, title = {QuartDepth: Post-Training Quantization for Real-Time Depth Estimation on the Edge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11448-11460} }
ReWind: Understanding Long Videos with Instructed Learnable Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Diko_2025_CVPR, author = {Diko, Anxhelo and Wang, Tinghuai and Swaileh, Wassim and Sun, Shiyan and Patras, Ioannis}, title = {ReWind: Understanding Long Videos with Instructed Learnable Memory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13734-13743} }
DirectTriGS: Triplane-based Gaussian Splatting Field Representation for 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ju_2025_CVPR, author = {Ju, Xiaoliang and Li, Hongsheng}, title = {DirectTriGS: Triplane-based Gaussian Splatting Field Representation for 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16229-16239} }
Make-It-Animatable: An Efficient Framework for Authoring Animation-Ready 3D Characters-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Zhiyang and Xiang, Jinxu and Ma, Kai and Zhou, Wengang and Li, Houqiang and Zhang, Ran}, title = {Make-It-Animatable: An Efficient Framework for Authoring Animation-Ready 3D Characters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10783-10792} }
Subspace Constraint and Contribution Estimation for Heterogeneous Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xiangtao and Li, Sheng and Li, Ao and Liu, Yipeng and Zhang, Fan and Zhu, Ce and Zhang, Le}, title = {Subspace Constraint and Contribution Estimation for Heterogeneous Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20632-20642} }
NeRFPrior: Learning Neural Radiance Field as a Prior for Indoor Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Wenyuan and Jia, Emily Yue-ting and Zhou, Junsheng and Ma, Baorui and Shi, Kanle and Liu, Yu-Shen and Han, Zhizhong}, title = {NeRFPrior: Learning Neural Radiance Field as a Prior for Indoor Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11317-11327} }
Towards Training-free Anomaly Detection with Vision and Language Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinjin and Wang, Guodong and Jin, Yizhou and Huang, Di}, title = {Towards Training-free Anomaly Detection with Vision and Language Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15204-15213} }
Dynamic Content Prediction with Motion-aware Priors for Blind Face Video Restoration-
[pdf]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Lianxin and Zheng, Bingbing and Wu, Si and Wong, Hau San}, title = {Dynamic Content Prediction with Motion-aware Priors for Blind Face Video Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17821-17830} }
Exploring Sparse MoE in GANs for Text-conditioned Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jiapeng and Yang, Ceyuan and Zheng, Kecheng and Xu, Yinghao and Shi, Zifan and Zhang, Yifei and Chen, Qifeng and Shen, Yujun}, title = {Exploring Sparse MoE in GANs for Text-conditioned Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18411-18423} }
Efficient Event-Based Object Detection: A Hybrid Neural Network with Spatial and Temporal Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ahmed_2025_CVPR, author = {Ahmed, Soikat Hasan and Finkbeiner, Jan and Neftci, Emre}, title = {Efficient Event-Based Object Detection: A Hybrid Neural Network with Spatial and Temporal Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13970-13979} }
HUNet: Homotopy Unfolding Network for Image Compressive Sensing-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Feiyang and Gan, Hongping}, title = {HUNet: Homotopy Unfolding Network for Image Compressive Sensing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12799-12808} }
See Further When Clear: Curriculum Consistency Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yunpeng and Liu, Boxiao and Zhang, Yi and Hou, Xingzhong and Song, Guanglu and Liu, Yu and You, Haihang}, title = {See Further When Clear: Curriculum Consistency Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18103-18112} }
PassionSR: Post-Training Quantization with Adaptive Scale in One-Step Diffusion based Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Libo and Li, Jianze and Qin, Haotong and Li, Wenbo and Zhang, Yulun and Guo, Yong and Yang, Xiaokang}, title = {PassionSR: Post-Training Quantization with Adaptive Scale in One-Step Diffusion based Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12778-12788} }
RainyGS: Efficient Rain Synthesis with Physically-Based Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Qiyu and Ni, Xingyu and Shen, Qianfan and Chen, Wenzheng and Chen, Baoquan and Chu, Mengyu}, title = {RainyGS: Efficient Rain Synthesis with Physically-Based Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16153-16162} }
Three-view Focal Length Recovery From Homographies-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2025_CVPR, author = {Ding, Yaqing and Kocur, Viktor and Haladova, Zuzana Berger and Wu, Qianliang and Cai, Shen and Yang, Jian and Kukelova, Zuzana}, title = {Three-view Focal Length Recovery From Homographies}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11505-11514} }
RAP: Retrieval-Augmented Personalization for Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hao_2025_CVPR, author = {Hao, Haoran and Han, Jiaming and Li, Changsheng and Li, Yu-Feng and Yue, Xiangyu}, title = {RAP: Retrieval-Augmented Personalization for Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14538-14548} }
Distilling Spectral Graph for Object-Context Aware Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Chanyoung and Ju, Dayun and Han, Woojung and Yang, Ming-Hsuan and Hwang, Seong Jae}, title = {Distilling Spectral Graph for Object-Context Aware Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15033-15042} }
Stereo4D: Learning How Things Move in 3D from Internet Stereo Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Linyi and Tucker, Richard and Li, Zhengqi and Fouhey, David and Snavely, Noah and Holynski, Aleksander}, title = {Stereo4D: Learning How Things Move in 3D from Internet Stereo Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10497-10509} }
FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Kefan and Min, Chaerin and Zhang, Linguang and Hampali, Shreyas and Keskin, Cem and Sridhar, Srinath}, title = {FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17448-17460} }
InterDyn: Controllable Interactive Dynamics with Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Akkerman_2025_CVPR, author = {Akkerman, Rick and Feng, Haiwen and Black, Michael J. and Tzionas, Dimitrios and Abrevaya, Victoria Fern\'andez}, title = {InterDyn: Controllable Interactive Dynamics with Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12467-12479} }
LLMDet: Learning Strong Open-Vocabulary Object Detectors under the Supervision of Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Shenghao and Yang, Qize and Mo, Qijie and Yan, Junkai and Wei, Xihan and Meng, Jingke and Xie, Xiaohua and Zheng, Wei-Shi}, title = {LLMDet: Learning Strong Open-Vocabulary Object Detectors under the Supervision of Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14987-14997} }
MagicQuill: An Intelligent Interactive Image Editing System-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zichen and Yu, Yue and Ouyang, Hao and Wang, Qiuyu and Cheng, Ka Leong and Wang, Wen and Liu, Zhiheng and Chen, Qifeng and Shen, Yujun}, title = {MagicQuill: An Intelligent Interactive Image Editing System}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13072-13082} }
Open-Vocabulary Functional 3D Scene Graphs for Real-World Indoor Spaces-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chenyangguang and Delitzas, Alexandros and Wang, Fangjinhua and Zhang, Ruida and Ji, Xiangyang and Pollefeys, Marc and Engelmann, Francis}, title = {Open-Vocabulary Functional 3D Scene Graphs for Real-World Indoor Spaces}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19401-19413} }
Boosting Adversarial Transferability through Augmentation in Hypothesis Space-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Yu and Liu, Weiquan and Xu, Qingshan and Zheng, Shijun and Huang, Shujun and Zang, Yu and Shen, Siqi and Wen, Chenglu and Wang, Cheng}, title = {Boosting Adversarial Transferability through Augmentation in Hypothesis Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19175-19185} }
ViiNeuS: Volumetric Initialization for Implicit Neural Surface Reconstruction of Urban Scenes with Limited Image Overlap-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Djeghim_2025_CVPR, author = {Djeghim, Hala and Piasco, Nathan and Bennehar, Moussab and Roldao, Luis and Tsishkou, Dzmitry and Sidib\'e, D\'esir\'e}, title = {ViiNeuS: Volumetric Initialization for Implicit Neural Surface Reconstruction of Urban Scenes with Limited Image Overlap}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11854-11863} }
Model Diagnosis and Correction via Linguistic and Implicit Attribute Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xuanbai and Xu, Xiang and Li, Zhihua and Zhao, Tianchen and Perona, Pietro and Zhang, Qin and Xing, Yifan}, title = {Model Diagnosis and Correction via Linguistic and Implicit Attribute Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14281-14292} }
UniPhy: Learning a Unified Constitutive Model for Inverse Physics Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mittal_2025_CVPR, author = {Mittal, Himangi and Zhuang, Peiye and Lee, Hsin-Ying and Tulsiani, Shubham}, title = {UniPhy: Learning a Unified Constitutive Model for Inverse Physics Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16208-16218} }
STAA-SNN: Spatial-Temporal Attention Aggregator for Spiking Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Tianqing and Yu, Kairong and Zhong, Xian and Wang, Hongwei and Xu, Qi and Zhang, Qiang}, title = {STAA-SNN: Spatial-Temporal Attention Aggregator for Spiking Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13959-13969} }
Knowledge Memorization and Rumination for Pre-trained Model-based Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Zijian and Jia, Wangwang and Zhang, Xingxing and Zhou, Dulan and Xu, Kele and Dawei, Feng and Dou, Yong and Mao, Xinjun and Wang, Huaimin}, title = {Knowledge Memorization and Rumination for Pre-trained Model-based Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20523-20533} }
Video-ColBERT: Contextualized Late Interaction for Text-to-Video Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Reddy_2025_CVPR, author = {Reddy, Arun and Martin, Alexander and Yang, Eugene and Yates, Andrew and Sanders, Kate and Murray, Kenton and Kriz, Reno and de Melo, Celso M. and Van Durme, Benjamin and Chellappa, Rama}, title = {Video-ColBERT: Contextualized Late Interaction for Text-to-Video Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19691-19701} }
Visual and Semantic Prompt Collaboration for Generalized Zero-Shot Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Huajie and Li, Zhengxian and Yu, Xiaohan and Hu, Yongli and Yin, Baocai and Yang, Jian and Qi, Yuankai}, title = {Visual and Semantic Prompt Collaboration for Generalized Zero-Shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20275-20285} }
VidMuse: A Simple Video-to-Music Generation Framework with Long-Short-Term Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Zeyue and Liu, Zhaoyang and Yuan, Ruibin and Pan, Jiahao and Liu, Qifeng and Tan, Xu and Chen, Qifeng and Xue, Wei and Guo, Yike}, title = {VidMuse: A Simple Video-to-Music Generation Framework with Long-Short-Term Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18782-18793} }
Human-centered Interactive Learning via MLLMs for Text-to-Image Person Re-identification-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Yang and Chen, Chao and Fu, Zhihang and Peng, Dezhong and Peng, Xi and Hu, Peng}, title = {Human-centered Interactive Learning via MLLMs for Text-to-Image Person Re-identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14390-14399} }
Exposure-slot: Exposure-centric Representations Learning with Slot-in-Slot Attention for Region-aware Exposure Correction-
[pdf]
[supp]
[bibtex]@InProceedings{Jung_2025_CVPR, author = {Jung, Donggoo and Kim, Daehyun and Wang, Guanghui and Kim, Tae Hyun}, title = {Exposure-slot: Exposure-centric Representations Learning with Slot-in-Slot Attention for Region-aware Exposure Correction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17892-17901} }
EdgeDiff: Edge-aware Diffusion Network for Building Reconstruction from Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yujun and Wang, Ruisheng and Huang, Shangfeng and Cai, Guorong}, title = {EdgeDiff: Edge-aware Diffusion Network for Building Reconstruction from Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17008-17018} }
DeNVeR: Deformable Neural Vessel Representations for Unsupervised Video Vessel Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Chun-Hung and Chen, Shih-Hong and Hu, Chih-Yao and Wu, Hsin-Yu and Chen, Kai-Hsin and Chen, Yu-You and Su, Chih-Hai and Lee, Chih-Kuo and Liu, Yu-Lun}, title = {DeNVeR: Deformable Neural Vessel Representations for Unsupervised Video Vessel Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15682-15692} }
Task-Aware Clustering for Prompting Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Hao_2025_CVPR, author = {Hao, Fusheng and He, Fengxiang and Wu, Fuxiang and Wang, Tichao and Song, Chengqun and Cheng, Jun}, title = {Task-Aware Clustering for Prompting Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14745-14755} }
FSboard: Over 3 Million Characters of ASL Fingerspelling Collected via Smartphones-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Georg_2025_CVPR, author = {Georg, Manfred and Tanzer, Garrett and Uboweja, Esha and Hassan, Saad and Shengelia, Maximus and Sepah, Sam and Forbes, Sean and Starner, Thad}, title = {FSboard: Over 3 Million Characters of ASL Fingerspelling Collected via Smartphones}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13897-13906} }
Light Transport-aware Diffusion Posterior Sampling for Single-View Reconstruction of 3D Volumes-
[pdf]
[supp]
[bibtex]@InProceedings{Leonard_2025_CVPR, author = {Leonard, Ludwic and Thurey, Nils and Westermann, R\"udiger}, title = {Light Transport-aware Diffusion Posterior Sampling for Single-View Reconstruction of 3D Volumes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16163-16174} }
STiL: Semi-supervised Tabular-Image Learning for Comprehensive Task-Relevant Information Exploration in Multimodal Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Siyi and Luo, Xinzhe and O'Regan, Declan P. and Qin, Chen}, title = {STiL: Semi-supervised Tabular-Image Learning for Comprehensive Task-Relevant Information Exploration in Multimodal Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15549-15559} }
Auto Cherry-Picker: Learning from High-quality Generative Data Driven by Language-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Yicheng and Li, Xiangtai and Li, Yining and Zeng, Yanhong and Wu, Jianzong and Zhao, Xiangyu and Chen, Kai}, title = {Auto Cherry-Picker: Learning from High-quality Generative Data Driven by Language}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19952-19962} }
ReRAW: RGB-to-RAW Image Reconstruction via Stratified Sampling for Efficient Object Detection on the Edge-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Berdan_2025_CVPR, author = {Berdan, Radu and Besbinar, Beril and Reinders, Christoph and Otsuka, Junji and Iso, Daisuke}, title = {ReRAW: RGB-to-RAW Image Reconstruction via Stratified Sampling for Efficient Object Detection on the Edge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11833-11843} }
HunyuanPortrait: Implicit Condition Control for Enhanced Portrait Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Zunnan and Yu, Zhentao and Zhou, Zixiang and Zhou, Jun and Jin, Xiaoyu and Hong, Fa-ting and Ji, Xiaozhong and Zhu, Junwei and Cai, Chengfei and Tang, Shiyu and Lin, Qin and Li, Xiu and Lu, Qinglin}, title = {HunyuanPortrait: Implicit Condition Control for Enhanced Portrait Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15909-15919} }
Zero-shot 3D Question Answering via Voxel-based Dynamic Token Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Hsiang-Wei and Chen, Fu-Chen and Chai, Wenhao and Su, Che-Chun and Xia, Lu and Jung, Sanghun and Yang, Cheng-Yen and Hwang, Jenq-Neng and Sun, Min and Kuo, Cheng-Hao}, title = {Zero-shot 3D Question Answering via Voxel-based Dynamic Token Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19424-19434} }
Enhanced then Progressive Fusion with View Graph for Multi-View Clustering-
[pdf]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Zhibin and Liu, Meng and Wang, Siwei and Liang, Ke and Zhang, Yi and Liu, Suyuan and Jin, Jiaqi and Liu, Xinwang and Zhu, En}, title = {Enhanced then Progressive Fusion with View Graph for Multi-View Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15518-15527} }
Correcting Deviations from Normality: A Reformulated Diffusion Model for Multi-Class Unsupervised Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Beizaee_2025_CVPR, author = {Beizaee, Farzad and Lodygensky, Gregory A. and Desrosiers, Christian and Dolz, Jose}, title = {Correcting Deviations from Normality: A Reformulated Diffusion Model for Multi-Class Unsupervised Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19088-19097} }
Continuous 3D Perception Model with Persistent State-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Qianqian and Zhang, Yifei and Holynski, Aleksander and Efros, Alexei A. and Kanazawa, Angjoo}, title = {Continuous 3D Perception Model with Persistent State}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10510-10522} }
LP-Diff: Towards Improved Restoration of Real-World Degraded License Plate-
[pdf]
[supp]
[bibtex]@InProceedings{Gong_2025_CVPR, author = {Gong, Haoyan and Zhang, Zhenrong and Feng, Yuzheng and Nguyen, Anh and Liu, Hongbin}, title = {LP-Diff: Towards Improved Restoration of Real-World Degraded License Plate}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17831-17840} }
FilmComposer: LLM-Driven Music Production for Silent Film Clips-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Zhifeng and He, Qile and Zhu, Youjia and He, Qiwei and Li, Mengtian}, title = {FilmComposer: LLM-Driven Music Production for Silent Film Clips}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13519-13528} }
EventPSR: Surface Normal and Reflectance Estimation from Photometric Stereo Using an Event Camera-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Bohan and Han, Jin and Shi, Boxin and Sato, Imari}, title = {EventPSR: Surface Normal and Reflectance Estimation from Photometric Stereo Using an Event Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11427-11436} }
CASP: Consistency-aware Audio-induced Saliency Prediction Model for Omnidirectional Video-
[pdf]
[bibtex]@InProceedings{Wan_2025_CVPR, author = {Wan, Zhaolin and Qin, Han and Li, Zhiyang and Fan, Xiaopeng and Zuo, Wangmeng and Zhao, Debin}, title = {CASP: Consistency-aware Audio-induced Saliency Prediction Model for Omnidirectional Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12605-12614} }
Gazing at Rewards: Eye Movements as a Lens into Human and AI Decision-Making in Hybrid Visual Foraging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Bo and Tan, Dingwei and Kuo, Yen-Ling and Sun, Zhaowei and Wolfe, Jeremy M. and Cham, Tat-Jen and Zhang, Mengmi}, title = {Gazing at Rewards: Eye Movements as a Lens into Human and AI Decision-Making in Hybrid Visual Foraging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14810-14823} }
FOCUS: Knowledge-enhanced Adaptive Visual Compression for Few-shot Whole Slide Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Zhengrui and Xiong, Conghao and Ma, Jiabo and Sun, Qichen and Feng, Lishuang and Wang, Jinzhuo and Chen, Hao}, title = {FOCUS: Knowledge-enhanced Adaptive Visual Compression for Few-shot Whole Slide Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15590-15600} }
GRAE-3DMOT: Geometry Relation-Aware Encoder for Online 3D Multi-Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Hyunseop and Lee, Hyo-Jun and Lee, Yonguk and Lee, Jinu and Kim, Hanul and Koh, Yeong Jun}, title = {GRAE-3DMOT: Geometry Relation-Aware Encoder for Online 3D Multi-Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11697-11706} }
Automatic Joint Structured Pruning and Quantization for Efficient Neural Network Training and Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Xiaoyi and Aponte, David and Banbury, Colby and Robinson, Daniel P. and Ding, Tianyu and Koishida, Kazuhito and Zharkov, Ilya and Chen, Tianyi}, title = {Automatic Joint Structured Pruning and Quantization for Efficient Neural Network Training and Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15234-15244} }
MAR-3D: Progressive Masked Auto-regressor for High-Resolution 3D Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Jinnan and Zhu, Lingting and Hu, Zeyu and Qian, Shengju and Chen, Yugang and Wang, Xin and Lee, Gim Hee}, title = {MAR-3D: Progressive Masked Auto-regressor for High-Resolution 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11083-11092} }
Synthetic Prior for Few-Shot Drivable Head Avatar Inversion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zielonka_2025_CVPR, author = {Zielonka, Wojciech and Garbin, Stephan J. and Lattas, Alexandros and Kopanas, George and Gotardo, Paulo and Beeler, Thabo and Thies, Justus and Bolkart, Timo}, title = {Synthetic Prior for Few-Shot Drivable Head Avatar Inversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10735-10746} }
Reasoning in Visual Navigation of End-to-end Trained Agents: A Dynamical Systems Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Janny_2025_CVPR, author = {Janny, Steeven and Poirier, Herv\'e and Antsfeld, Leonid and Bono, Guillaume and Monaci, Gianluca and Chidlovskii, Boris and Giuliari, Francesco and Del Bue, Alessio and Wolf, Christian}, title = {Reasoning in Visual Navigation of End-to-end Trained Agents: A Dynamical Systems Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12111-12121} }
DFormerv2: Geometry Self-Attention for RGBD Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Bo-Wen and Cao, Jiao-Long and Cheng, Ming-Ming and Hou, Qibin}, title = {DFormerv2: Geometry Self-Attention for RGBD Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19345-19355} }
GroomLight: Hybrid Inverse Rendering for Relightable Human Hair Appearance Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Yang and Chai, Menglei and Vicini, Delio and Zhou, Yuxiao and Xu, Yinghao and Guibas, Leonidas and Wetzstein, Gordon and Beeler, Thabo}, title = {GroomLight: Hybrid Inverse Rendering for Relightable Human Hair Appearance Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16040-16050} }
Sea-ing in Low-light-
[pdf]
[supp]
[bibtex]@InProceedings{Varghese_2025_CVPR, author = {Varghese, Nisha and Rajagopalan, A. N.}, title = {Sea-ing in Low-light}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16629-16640} }
Generative Modeling of Class Probability for Multi-Modal Representation Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shin_2025_CVPR, author = {Shin, JungKyoo and Kim, Bumsoo and Kim, Eunwoo}, title = {Generative Modeling of Class Probability for Multi-Modal Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20737-20746} }
VisionZip: Longer is Better but Not Necessary in Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Senqiao and Chen, Yukang and Tian, Zhuotao and Wang, Chengyao and Li, Jingyao and Yu, Bei and Jia, Jiaya}, title = {VisionZip: Longer is Better but Not Necessary in Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19792-19802} }
BlenderGym: Benchmarking Foundational Model Systems for Graphics Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Yunqi and Huang, Ian and Je, Jihyeon and Yang, Guandao and Guibas, Leonidas}, title = {BlenderGym: Benchmarking Foundational Model Systems for Graphics Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18574-18583} }
VoteFlow: Enforcing Local Rigidity in Self-Supervised Scene Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Yancong and Wang, Shiming and Nan, Liangliang and Kooij, Julian and Caesar, Holger}, title = {VoteFlow: Enforcing Local Rigidity in Self-Supervised Scene Flow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17155-17164} }
Uncertainty Weighted Gradients for Model Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Jinxu and Tao, Linwei and Dong, Minjing and Xu, Chang}, title = {Uncertainty Weighted Gradients for Model Calibration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15497-15507} }
FFaceNeRF: Few-shot Face Editing in Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yun_2025_CVPR, author = {Yun, Kwan and Kim, Chaelin and Shin, Hangyeul and Noh, Junyong}, title = {FFaceNeRF: Few-shot Face Editing in Neural Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10825-10835} }
Minimizing Labeled, Maximizing Unlabeled: An Image-Driven Approach for Video Instance Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Fangyun and Zhao, Jinjing and Yan, Kun and Xu, Chang}, title = {Minimizing Labeled, Maximizing Unlabeled: An Image-Driven Approach for Video Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19304-19314} }
Layer- and Timestep-Adaptive Differentiable Token Compression Ratios for Efficient Diffusion Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{You_2025_CVPR, author = {You, Haoran and Barnes, Connelly and Zhou, Yuqian and Kang, Yan and Du, Zhenbang and Zhou, Wei and Zhang, Lingzhi and Nitzan, Yotam and Liu, Xiaoyang and Lin, Zhe and Shechtman, Eli and Amirghodsi, Sohrab and Lin, Yingyan Celine}, title = {Layer- and Timestep-Adaptive Differentiable Token Compression Ratios for Efficient Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18072-18082} }
Zero-shot RGB-D Point Cloud Registration with Pre-trained Large Vision Model-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Haobo and Xie, Jin and Yang, Jian and Yu, Liang and Zheng, Jianmin}, title = {Zero-shot RGB-D Point Cloud Registration with Pre-trained Large Vision Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16943-16952} }
DistinctAD: Distinctive Audio Description Generation in Contexts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Bo and Wu, Wenhao and Wu, Qiangqiang and Song, Yuxin and Chan, Antoni B.}, title = {DistinctAD: Distinctive Audio Description Generation in Contexts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13571-13581} }
CL-MoE: Enhancing Multimodal Large Language Model with Dual Momentum Mixture-of-Experts for Continual Visual Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Huai_2025_CVPR, author = {Huai, Tianyu and Zhou, Jie and Wu, Xingjiao and Chen, Qin and Bai, Qingchun and Zhou, Ze and He, Liang}, title = {CL-MoE: Enhancing Multimodal Large Language Model with Dual Momentum Mixture-of-Experts for Continual Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19608-19617} }
Point Cloud Upsampling Using Conditional Diffusion Module with Adaptive Noise Suppression-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Boqian and Yang, Shen and Chen, Hao and Yang, Chao and Jia, Jing and Jiang, Guang}, title = {Point Cloud Upsampling Using Conditional Diffusion Module with Adaptive Noise Suppression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16987-16996} }
Trajectory Mamba: Efficient Attention-Mamba Forecasting Model Based on Selective SSM-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yizhou and Cheng, Yihua and Wang, Kezhi}, title = {Trajectory Mamba: Efficient Attention-Mamba Forecasting Model Based on Selective SSM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12058-12067} }
VLOGGER: Multimodal Diffusion for Embodied Avatar Synthesis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Corona_2025_CVPR, author = {Corona, Enric and Zanfir, Andrei and Bazavan, Eduard Gabriel and Kolotouros, Nikos and Alldieck, Thiemo and Sminchisescu, Cristian}, title = {VLOGGER: Multimodal Diffusion for Embodied Avatar Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15896-15908} }
DEIM: DETR with Improved Matching for Fast Convergence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Shihua and Lu, Zhichao and Cun, Xiaodong and Yu, Yongjun and Zhou, Xiao and Shen, Xi}, title = {DEIM: DETR with Improved Matching for Fast Convergence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15162-15171} }
Human Motion Instruction Tuning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Lei and Jia, Sen and Wang, Jianhao and Jiang, Zhongyu and Zhou, Feng and Dai, Ju and Zhang, Tianfang and Wu, Zongkai and Hwang, Jenq-Neng}, title = {Human Motion Instruction Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17582-17591} }
A Flag Decomposition for Hierarchical Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mankovich_2025_CVPR, author = {Mankovich, Nathan and Santamaria, Ignacio and Camps-Valls, Gustau and Birdal, Tolga}, title = {A Flag Decomposition for Hierarchical Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18738-18748} }
RCP-Bench: Benchmarking Robustness for Collaborative Perception Under Diverse Corruptions-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Shihang and Qu, Sanqing and Wang, Tianhang and Zhang, Xudong and Zhu, Yunwei and Mao, Jian and Lu, Fan and Lin, Qiao and Chen, Guang}, title = {RCP-Bench: Benchmarking Robustness for Collaborative Perception Under Diverse Corruptions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11908-11918} }
Olympus: A Universal Task Router for Computer Vision Tasks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Yuanze and Li, Yunsheng and Chen, Dongdong and Xu, Weijian and Clark, Ronald and Torr, Philip}, title = {Olympus: A Universal Task Router for Computer Vision Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14235-14246} }
Circumventing Shortcuts in Audio-visual Deepfake Detection Datasets with Unsupervised Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Smeu_2025_CVPR, author = {Smeu, Stefan and Boldisor, Dragos-Alexandru and Oneata, Dan and Oneata, Elisabeta}, title = {Circumventing Shortcuts in Audio-visual Deepfake Detection Datasets with Unsupervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18815-18825} }
Image Over Text: Transforming Formula Recognition Evaluation with Character Detection Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Bin and Wu, Fan and Ouyang, Linke and Gu, Zhuangcheng and Zhang, Rui and Xia, Renqiu and Shi, Botian and Zhang, Bo and He, Conghui}, title = {Image Over Text: Transforming Formula Recognition Evaluation with Character Detection Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19681-19690} }
Improving Semi-Supervised Semantic Segmentation with Sliced-Wasserstein Feature Alignment and Uniformity-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Chen-Yi and Derakhshandeh, Kasra and Chaterji, Somali}, title = {Improving Semi-Supervised Semantic Segmentation with Sliced-Wasserstein Feature Alignment and Uniformity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20233-20243} }
3D-Mem: 3D Scene Memory for Embodied Exploration and Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Yuncong and Yang, Han and Zhou, Jiachen and Chen, Peihao and Zhang, Hongxin and Du, Yilun and Gan, Chuang}, title = {3D-Mem: 3D Scene Memory for Embodied Exploration and Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17294-17303} }
Navigation World Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bar_2025_CVPR, author = {Bar, Amir and Zhou, Gaoyue and Tran, Danny and Darrell, Trevor and LeCun, Yann}, title = {Navigation World Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15791-15801} }
Conformal Prediction and MLLM aided Uncertainty Quantification in Scene Graph Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nag_2025_CVPR, author = {Nag, Sayak and Ghosh, Udita and Ta, Calvin-Khang and Bose, Sarosij and Li, Jiachen and Roy-Chowdhury, Amit K.}, title = {Conformal Prediction and MLLM aided Uncertainty Quantification in Scene Graph Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11676-11686} }
Reconstructing Close Human Interaction with Appearance and Proxemics Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Buzhen and Li, Chen and Xu, Chongyang and Lu, Dongyue and Chen, Jinnan and Wang, Yangang and Lee, Gim Hee}, title = {Reconstructing Close Human Interaction with Appearance and Proxemics Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17475-17485} }
Scenario Dreamer: Vectorized Latent Diffusion for Generating Driving Simulation Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rowe_2025_CVPR, author = {Rowe, Luke and Girgis, Roger and Gosselin, Anthony and Paull, Liam and Pal, Christopher and Heide, Felix}, title = {Scenario Dreamer: Vectorized Latent Diffusion for Generating Driving Simulation Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17207-17218} }
Poly-Autoregressive Prediction for Modeling Interactions-
[pdf]
[supp]
[bibtex]@InProceedings{Thakkar_2025_CVPR, author = {Thakkar, Neerja and Sadjadpour, Tara and Rajasegeran, Jathushan and Ginosar, Shiry and Malik, Jitendra}, title = {Poly-Autoregressive Prediction for Modeling Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12402-12412} }
PoseBH: Prototypical Multi-Dataset Training Beyond Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_CVPR, author = {Jeong, Uyoung and Freer, Jonathan and Baek, Seungryul and Chang, Hyung Jin and Kim, Kwang In}, title = {PoseBH: Prototypical Multi-Dataset Training Beyond Human Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12278-12288} }
Decision SpikeFormer: Spike-Driven Transformer for Decision Making-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Wei and Gu, Qinying and Ye, Nanyang}, title = {Decision SpikeFormer: Spike-Driven Transformer for Decision Making}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19241-19250} }
Theory-Inspired Deep Multi-View Multi-Label Learning with Incomplete Views and Noisy Labels-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Quanjiang and Luo, Tingjin and Liao, Jiahui}, title = {Theory-Inspired Deep Multi-View Multi-Label Learning with Incomplete Views and Noisy Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20706-20715} }
EMOE: Modality-Specific Enhanced Dynamic Emotion Experts-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Yiyang and Huang, Wenke and Wan, Guancheng and Su, Kehua and Ye, Mang}, title = {EMOE: Modality-Specific Enhanced Dynamic Emotion Experts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14314-14324} }
Generative Video Propagation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Shaoteng and Wang, Tianyu and Wang, Jui-Hsien and Liu, Qing and Zhang, Zhifei and Lee, Joon-Young and Li, Yijun and Yu, Bei and Lin, Zhe and Kim, Soo Ye and Jia, Jiaya}, title = {Generative Video Propagation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17712-17722} }
From Multimodal LLMs to Generalist Embodied Agents: Methods and Lessons-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Szot_2025_CVPR, author = {Szot, Andrew and Mazoure, Bogdan and Attia, Omar and Timofeev, Aleksei and Agrawal, Harsh and Hjelm, Devon and Gan, Zhe and Kira, Zsolt and Toshev, Alexander}, title = {From Multimodal LLMs to Generalist Embodied Agents: Methods and Lessons}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10644-10655} }
Mosaic3D: Foundation Dataset and Model for Open-Vocabulary 3D Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Junha and Park, Chunghyun and Choe, Jaesung and Wang, Yu-Chiang Frank and Kautz, Jan and Cho, Minsu and Choy, Chris}, title = {Mosaic3D: Foundation Dataset and Model for Open-Vocabulary 3D Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14089-14101} }
T-CIL: Temperature Scaling using Adversarial Perturbation for Calibration in Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Hwang_2025_CVPR, author = {Hwang, Seong-Hyeon and Kim, Minsu and Whang, Steven Euijong}, title = {T-CIL: Temperature Scaling using Adversarial Perturbation for Calibration in Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15339-15348} }
LoRA Subtraction for Drift-Resistant Space in Exemplar-Free Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xuan and Chang, Xiaobin}, title = {LoRA Subtraction for Drift-Resistant Space in Exemplar-Free Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15308-15318} }
AniMer: Animal Pose and Shape Estimation Using Family Aware Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lyu_2025_CVPR, author = {Lyu, Jin and Zhu, Tianyi and Gu, Yi and Lin, Li and Cheng, Pujin and Liu, Yebin and Tang, Xiaoying and An, Liang}, title = {AniMer: Animal Pose and Shape Estimation Using Family Aware Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17486-17496} }
Co-op: Correspondence-based Novel Object Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Moon_2025_CVPR, author = {Moon, Sungphill and Son, Hyeontae and Hur, Dongcheol and Kim, Sangwook}, title = {Co-op: Correspondence-based Novel Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11622-11632} }
CATANet: Efficient Content-Aware Token Aggregation for Lightweight Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xin and Liu, Jie and Tang, Jie and Wu, Gangshan}, title = {CATANet: Efficient Content-Aware Token Aggregation for Lightweight Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17902-17912} }
RayFlow: Instance-Aware Diffusion Acceleration via Adaptive Flow Trajectories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2025_CVPR, author = {Shao, Huiyang and Xia, Xin and Yang, Yuhong and Ren, Yuxi and Wang, Xing and Xiao, Xuefeng}, title = {RayFlow: Instance-Aware Diffusion Acceleration via Adaptive Flow Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18113-18123} }
Self-Supervised Large Scale Point Cloud Completion for Archaeological Site Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Aocheng and Zimmer-Dauphinee, James R. and Kalyanam, Rajesh and Lindsay, Ian and VanValkenburgh, Parker and Wernke, Steven and Aliaga, Daniel}, title = {Self-Supervised Large Scale Point Cloud Completion for Archaeological Site Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11759-11768} }
Chain of Attack: On the Robustness of Vision-Language Models Against Transfer-Based Adversarial Attacks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Peng and Bie, Yequan and Mao, Jianda and Song, Yangqiu and Wang, Yang and Chen, Hao and Chen, Kani}, title = {Chain of Attack: On the Robustness of Vision-Language Models Against Transfer-Based Adversarial Attacks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14679-14689} }
Rate-In: Information-Driven Adaptive Dropout Rates for Improved Inference-Time Uncertainty Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Zeevi_2025_CVPR, author = {Zeevi, Tal and Shwartz-Ziv, Ravid and LeCun, Yann and Staib, Lawrence H. and Onofrey, John A.}, title = {Rate-In: Information-Driven Adaptive Dropout Rates for Improved Inference-Time Uncertainty Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20757-20766} }
Thin-Shell-SfT: Fine-Grained Monocular Non-rigid 3D Surface Tracking with Neural Deformation Fields-
[pdf]
[supp]
[bibtex]@InProceedings{Kairanda_2025_CVPR, author = {Kairanda, Navami and Habermann, Marc and Naik, Shanthika and Theobalt, Christian and Golyanik, Vladislav}, title = {Thin-Shell-SfT: Fine-Grained Monocular Non-rigid 3D Surface Tracking with Neural Deformation Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11373-11383} }
DeCLIP: Decoupled Learning for Open-Vocabulary Dense Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Junjie and Chen, Bin and Li, Yulin and Kang, Bin and Chen, Yichi and Tian, Zhuotao}, title = {DeCLIP: Decoupled Learning for Open-Vocabulary Dense Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14824-14834} }
SocialGesture: Delving into Multi-person Gesture Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Xu and Virupaksha, Pranav and Jia, Wenqi and Lai, Bolin and Ryan, Fiona and Lee, Sangmin and Rehg, James M.}, title = {SocialGesture: Delving into Multi-person Gesture Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19509-19519} }
Multi-modal Topology-embedded Graph Learning for Spatially Resolved Genes Prediction from Pathology Images with Prior Gene Similarity Information-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Hang and Chi, Changxi and Wan, Peng and Zhang, Daoqiang and Shao, Wei}, title = {Multi-modal Topology-embedded Graph Learning for Spatially Resolved Genes Prediction from Pathology Images with Prior Gene Similarity Information}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20810-20819} }
Question-Aware Gaussian Experts for Audio-Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Hongyeob and Jung, Inyoung and Suh, Dayoon and Zhang, Youjia and Lee, Sangmin and Hong, Sungeun}, title = {Question-Aware Gaussian Experts for Audio-Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13681-13690} }
Adaptive Rectangular Convolution for Remote Sensing Pansharpening-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xueyang and Zheng, Zhixin and Shao, Jiandong and Duan, Yule and Deng, Liang-Jian}, title = {Adaptive Rectangular Convolution for Remote Sensing Pansharpening}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17872-17881} }
UIBDiffusion: Universal Imperceptible Backdoor Attack for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Yuning and Zhao, Bingyin and Chu, Rui and Luo, Feng and Sikdar, Biplab and Lao, Yingjie}, title = {UIBDiffusion: Universal Imperceptible Backdoor Attack for Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19186-19196} }
FisherTune: Fisher-Guided Robust Tuning of Vision Foundation Models for Domain Generalized Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Dong and Li, Jinlong and Wang, Shuang and Wu, Mengyao and Zang, Qi and Sebe, Nicu and Zhong, Zhun}, title = {FisherTune: Fisher-Guided Robust Tuning of Vision Foundation Models for Domain Generalized Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15043-15054} }
AdMiT: Adaptive Multi-Source Tuning in Dynamic Environments-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2025_CVPR, author = {Chang, Xiangyu and Niloy, Fahim Faisal and Ahmed, Sk Miraj and Krishnamurthy, Srikanth V. and Guler, Basak and Swami, Ananthram and Oymak, Samet and Roy-Chowdhury, Amit}, title = {AdMiT: Adaptive Multi-Source Tuning in Dynamic Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20569-20579} }
Unbiased Video Scene Graph Generation via Visual and Semantic Dual Debiasing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yanjun and Li, Zhaoyang and Chen, Honghui and Xu, Lizhi}, title = {Unbiased Video Scene Graph Generation via Visual and Semantic Dual Debiasing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19047-19056} }
Unleashing In-context Learning of Autoregressive Models for Few-shot Image Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Bolin and Juefei-Xu, Felix and Liu, Miao and Dai, Xiaoliang and Mehta, Nikhil and Zhu, Chenguang and Huang, Zeyi and Rehg, James M. and Lee, Sangmin and Zhang, Ning and Xiao, Tong}, title = {Unleashing In-context Learning of Autoregressive Models for Few-shot Image Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18346-18357} }
Revisiting Generative Replay for Class Incremental Object Detection-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shizhou and Lv, Xueqiang and Xing, Yinghui and Wu, Qirui and Xu, Di and Zhang, Yanning}, title = {Revisiting Generative Replay for Class Incremental Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20340-20349} }
Bridging Viewpoint Gaps: Geometric Reasoning Boosts Semantic Correspondence-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2025_CVPR, author = {Qian, Qiyang and Chen, Hansheng and Tomizuka, Masayoshi and Keutzer, Kurt and Wang, Qianqian and Xu, Chenfeng}, title = {Bridging Viewpoint Gaps: Geometric Reasoning Boosts Semantic Correspondence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11579-11589} }
Spatial Transport Optimization by Repositioning Attention Map for Training-Free Text-to-Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Woojung and Lee, Yeonkyung and Kim, Chanyoung and Park, Kwanghyun and Hwang, Seong Jae}, title = {Spatial Transport Optimization by Repositioning Attention Map for Training-Free Text-to-Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18401-18410} }
MobileH2R: Learning Generalizable Human to Mobile Robot Handover Exclusively from Scalable and Diverse Synthetic Data-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zifan and Chen, Ziqing and Chen, Junyu and Wang, Jilong and Yang, Yuxin and Liu, Yunze and Liu, Xueyi and Wang, He and Yi, Li}, title = {MobileH2R: Learning Generalizable Human to Mobile Robot Handover Exclusively from Scalable and Diverse Synthetic Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17315-17325} }
PERSE: Personalized 3D Generative Avatars from A Single Portrait-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cha_2025_CVPR, author = {Cha, Hyunsoo and Lee, Inhee and Joo, Hanbyul}, title = {PERSE: Personalized 3D Generative Avatars from A Single Portrait}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15953-15962} }
Dynamic Stereotype Theory Induced Micro-expression Recognition with Oriented Deformation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Bohao and Wang, Xuejiao and Wang, Changbo and He, Gaoqi}, title = {Dynamic Stereotype Theory Induced Micro-expression Recognition with Oriented Deformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10701-10711} }
ACL: Activating Capability of Linear Attention for Image Restoration-
[pdf]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Yubin and Meng, Yuan and Ji, Jiayi and Sun, Xiaoshuai}, title = {ACL: Activating Capability of Linear Attention for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17913-17923} }
MARBLE: Material Recomposition and Blending in CLIP-Space-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Ta Ying and Sharma, Prafull and Boss, Mark and Jampani, Varun}, title = {MARBLE: Material Recomposition and Blending in CLIP-Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13061-13071} }
Efficient Visual State Space Model for Image Deblurring-
[pdf]
[supp]
[bibtex]@InProceedings{Kong_2025_CVPR, author = {Kong, Lingshun and Dong, Jiangxin and Tang, Jinhui and Yang, Ming-Hsuan and Pan, Jinshan}, title = {Efficient Visual State Space Model for Image Deblurring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12710-12719} }
Enhancing 3D Gaze Estimation in the Wild using Weak Supervision with Gaze Following Labels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vuillecard_2025_CVPR, author = {Vuillecard, Pierre and Odobez, Jean-Marc}, title = {Enhancing 3D Gaze Estimation in the Wild using Weak Supervision with Gaze Following Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13508-13518} }
Reward Fine-Tuning Two-Step Diffusion Models via Learning Differentiable Latent-Space Surrogate Reward-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2025_CVPR, author = {Jia, Zhiwei and Nan, Yuesong and Zhao, Huixi and Liu, Gengdai}, title = {Reward Fine-Tuning Two-Step Diffusion Models via Learning Differentiable Latent-Space Surrogate Reward}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12912-12922} }
Detecting Out-of-Distribution Through the Lens of Neural Collapse-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Litian and Qin, Yao}, title = {Detecting Out-of-Distribution Through the Lens of Neural Collapse}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15424-15433} }
Sparse2DGS: Geometry-Prioritized Gaussian Splatting for Surface Reconstruction from Sparse Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Jiang and Li, Rui and Zhu, Yu and Guo, Rong and Sun, Jinqiu and Zhang, Yanning}, title = {Sparse2DGS: Geometry-Prioritized Gaussian Splatting for Surface Reconstruction from Sparse Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11307-11316} }
VinTAGe: Joint Video and Text Conditioning for Holistic Audio Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kushwaha_2025_CVPR, author = {Kushwaha, Saksham Singh and Tian, Yapeng}, title = {VinTAGe: Joint Video and Text Conditioning for Holistic Audio Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13529-13539} }
Efficient Decoupled Feature 3D Gaussian Splatting via Hierarchical Compression-
[pdf]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Zhenqi and Liu, Ting and Zhang, Yanning}, title = {Efficient Decoupled Feature 3D Gaussian Splatting via Hierarchical Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11156-11166} }
CountLLM: Towards Generalizable Repetitive Action Counting via Large Language Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yao_2025_CVPR, author = {Yao, Ziyu and Cheng, Xuxin and Huang, Zhiqi and Li, Lei}, title = {CountLLM: Towards Generalizable Repetitive Action Counting via Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19143-19153} }
SPAR3D: Stable Point-Aware Reconstruction of 3D Objects from Single Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zixuan and Boss, Mark and Vasishta, Aaryaman and Rehg, James M. and Jampani, Varun}, title = {SPAR3D: Stable Point-Aware Reconstruction of 3D Objects from Single Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16860-16870} }
Focus-N-Fix: Region-Aware Fine-Tuning for Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Xiaoying and Saha, Avinab and He, Junfeng and Hao, Susan and Vicol, Paul and Ryu, Moonkyung and Li, Gang and Singla, Sahil and Young, Sarah and Li, Yinxiao and Yang, Feng and Ramachandran, Deepak}, title = {Focus-N-Fix: Region-Aware Fine-Tuning for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18486-18496} }
Label Shift Meets Online Learning: Ensuring Consistent Adaptation with Universal Dynamic Regret-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Yucong and Gu, Shilin and Fan, Ruidong and Xu, Chao and Hou, Chenping}, title = {Label Shift Meets Online Learning: Ensuring Consistent Adaptation with Universal Dynamic Regret}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15392-15401} }
A Physics-Informed Blur Learning Framework for Imaging Systems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Liqun and Li, Yuxuan and Dai, Jun and Gu, Jinwei and Xue, Tianfan}, title = {A Physics-Informed Blur Learning Framework for Imaging Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10913-10922} }
Towards Practical Real-Time Neural Video Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2025_CVPR, author = {Jia, Zhaoyang and Li, Bin and Li, Jiahao and Xie, Wenxuan and Qi, Linfeng and Li, Houqiang and Lu, Yan}, title = {Towards Practical Real-Time Neural Video Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12543-12552} }
DepthSplat: Connecting Gaussian Splatting and Depth-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Haofei and Peng, Songyou and Wang, Fangjinhua and Blum, Hermann and Barath, Daniel and Geiger, Andreas and Pollefeys, Marc}, title = {DepthSplat: Connecting Gaussian Splatting and Depth}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16453-16463} }
Dynamic Camera Poses and Where to Find Them-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rockwell_2025_CVPR, author = {Rockwell, Chris and Tung, Joseph and Lin, Tsung-Yi and Liu, Ming-Yu and Fouhey, David F. and Lin, Chen-Hsuan}, title = {Dynamic Camera Poses and Where to Find Them}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12444-12455} }
OmniGen: Unified Image Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Shitao and Wang, Yueze and Zhou, Junjie and Yuan, Huaying and Xing, Xingrun and Yan, Ruiran and Li, Chaofan and Wang, Shuting and Huang, Tiejun and Liu, Zheng}, title = {OmniGen: Unified Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13294-13304} }
QuCOOP: A Versatile Framework for Solving Composite and Binary-Parametrised Problems on Quantum Annealers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meli_2025_CVPR, author = {Meli, Natacha Kuete and Golyanik, Vladislav and Benkner, Marcel Seelbach and Moeller, Michael}, title = {QuCOOP: A Versatile Framework for Solving Composite and Binary-Parametrised Problems on Quantum Annealers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11395-11405} }
Mesh Mamba: A Unified State Space Model for Saliency Prediction in Non-Textured and Textured Meshes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Kaiwei and Zhu, Dandan and Min, Xiongkuo and Zhai, Guangtao}, title = {Mesh Mamba: A Unified State Space Model for Saliency Prediction in Non-Textured and Textured Meshes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16219-16228} }
SILMM: Self-Improving Large Multimodal Models for Compositional Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Leigang and Li, Haochuan and Wang, Wenjie and Liu, Xiang and Li, Juncheng and Nie, Liqiang and Chua, Tat-Seng}, title = {SILMM: Self-Improving Large Multimodal Models for Compositional Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18497-18508} }
Calibrated Multi-Preference Optimization for Aligning Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Kyungmin and Li, Xiahong and Wang, Qifei and He, Junfeng and Ke, Junjie and Yang, Ming-Hsuan and Essa, Irfan and Shin, Jinwoo and Yang, Feng and Li, Yinxiao}, title = {Calibrated Multi-Preference Optimization for Aligning Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18465-18475} }
Advancing Adversarial Robustness in GNeRFs: The IL2-NeRF Attack-
[pdf]
[supp]
[bibtex]@InProceedings{Meng_2025_CVPR, author = {Meng, Nicole and Manicke, Caleb and Sahu, Ronak and Ding, Caiwen and Lao, Yingjie}, title = {Advancing Adversarial Robustness in GNeRFs: The IL2-NeRF Attack}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16388-16397} }
PolarNeXt: Rethink Instance Segmentation with Polar Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Jiacheng and Zhou, Xinghong and Wu, Yiqiang and Zhu, Bin and Lu, Jiaxuan and Qin, Yu and Li, Xiaomao}, title = {PolarNeXt: Rethink Instance Segmentation with Polar Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19315-19324} }
SAM-REF: Introducing Image-Prompt Synergy during Interaction for Detail Enhancement in the Segment Anything Model-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Chongkai and Liu, Ting and Li, Anqi and Qu, Xiaochao and Wu, Chengjing and Liu, Luoqi and Hu, Xiaolin}, title = {SAM-REF: Introducing Image-Prompt Synergy during Interaction for Detail Enhancement in the Segment Anything Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19356-19365} }
DarkIR: Robust Low-Light Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feijoo_2025_CVPR, author = {Feijoo, Daniel and Benito, Juan C. and Garcia, Alvaro and Conde, Marcos V.}, title = {DarkIR: Robust Low-Light Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10879-10889} }
R2C: Mapping Room to Chessboard to Unlock LLM As Low-Level Action Planner-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2025_CVPR, author = {Bai, Ziyi and Li, Hanxuan and Fu, Bin and Xiong, Chuyan and Wang, Ruiping and Chen, Xilin}, title = {R2C: Mapping Room to Chessboard to Unlock LLM As Low-Level Action Planner}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19456-19466} }
From Prototypes to General Distributions: An Efficient Curriculum for Masked Image Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Jinhong and Wu, Cheng-En and Li, Huanran and Zhang, Jifan and Hu, Yu Hen and Morgado, Pedro}, title = {From Prototypes to General Distributions: An Efficient Curriculum for Masked Image Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20028-20038} }
Difference Inversion: Interpolate and Isolate the Difference with Token Consistency for Image Analogy Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Hyunsoo and Kim, Donghyun and Kim, Suhyun}, title = {Difference Inversion: Interpolate and Isolate the Difference with Token Consistency for Image Analogy Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18250-18259} }
MTADiffusion: Mask Text Alignment Diffusion Model for Object Inpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Jun and Liu, Ting and Wu, Yihang and Qu, Xiaochao and Liu, Luoqi and Hu, Xiaolin}, title = {MTADiffusion: Mask Text Alignment Diffusion Model for Object Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18325-18334} }
Grounding 3D Object Affordance with Language Instructions, Visual Observations and Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, He and Kong, Quyu and Xu, Kechun and Xia, Xunlong and Deng, Bing and Ye, Jieping and Xiong, Rong and Wang, Yue}, title = {Grounding 3D Object Affordance with Language Instructions, Visual Observations and Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17337-17346} }
Image is All You Need to Empower Large-scale Diffusion Models for In-Domain Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Pu and Zhou, Feng and Yang, Lu and Huang, Tianrui and Song, Qing}, title = {Image is All You Need to Empower Large-scale Diffusion Models for In-Domain Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18358-18368} }
Evolving High-Quality Rendering and Reconstruction in a Unified Framework with Contribution-Adaptive Regularization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, You and Zhang, Zhipeng and Li, Xinyang and Qu, Yansong and Lin, Yu and Zhang, Shengchuan and Cao, Liujuan}, title = {Evolving High-Quality Rendering and Reconstruction in a Unified Framework with Contribution-Adaptive Regularization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16346-16355} }
NoiseCtrl: A Sampling-Algorithm-Agnostic Conditional Generation Method for Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Longquan and Wang, He and Tang, Jinhui}, title = {NoiseCtrl: A Sampling-Algorithm-Agnostic Conditional Generation Method for Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18093-18102} }
KMD: Koopman Multi-modality Decomposition for Generalized Brain Tumor Segmentation under Incomplete Modalities-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Tianyi and Jiang, Haochuan and Huang, Kaizhu}, title = {KMD: Koopman Multi-modality Decomposition for Generalized Brain Tumor Segmentation under Incomplete Modalities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15663-15671} }
DORNet: A Degradation Oriented and Regularized Network for Blind Depth Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhengxue and Yan, Zhiqiang and Pan, Jinshan and Gao, Guangwei and Zhang, Kai and Yang, Jian}, title = {DORNet: A Degradation Oriented and Regularized Network for Blind Depth Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15813-15822} }
Fractal Calibration for Long-tailed Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alexandridis_2025_CVPR, author = {Alexandridis, Konstantinos Panagiotis and Elezi, Ismail and Deng, Jiankang and Nguyen, Anh and Luo, Shan}, title = {Fractal Calibration for Long-tailed Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15139-15150} }
M3GYM: A Large-Scale Multimodal Multi-view Multi-person Pose Dataset for Fitness Activity Understanding in Real-world Settings-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Qingzheng and Cao, Ru and Shen, Xin and Du, Heming and Wang, Sen and Yu, Xin}, title = {M3GYM: A Large-Scale Multimodal Multi-view Multi-person Pose Dataset for Fitness Activity Understanding in Real-world Settings}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12289-12300} }
FedSPA: Generalizable Federated Graph Learning under Homophily Heterogeneity-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2025_CVPR, author = {Tan, Zihan and Wan, Guancheng and Huang, Wenke and Li, He and Zhang, Guibin and Yang, Carl and Ye, Mang}, title = {FedSPA: Generalizable Federated Graph Learning under Homophily Heterogeneity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15464-15475} }
GazeGene: Large-scale Synthetic Gaze Dataset with 3D Eyeball Annotations-
[pdf]
[supp]
[bibtex]@InProceedings{Bao_2025_CVPR, author = {Bao, Yiwei and Wang, Zhiming and Lu, Feng}, title = {GazeGene: Large-scale Synthetic Gaze Dataset with 3D Eyeball Annotations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18749-18759} }
VideoHandles: Editing 3D Object Compositions in Videos Using Video Generative Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koo_2025_CVPR, author = {Koo, Juil and Guerrero, Paul and Huang, Chun-Hao P. and Ceylan, Duygu and Sung, Minhyuk}, title = {VideoHandles: Editing 3D Object Compositions in Videos Using Video Generative Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17692-17701} }
GaussTR: Foundation Model-Aligned Gaussian Transformer for Self-Supervised 3D Spatial Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Haoyi and Liu, Liu and Cheng, Tianheng and Wang, Xinjie and Lin, Tianwei and Su, Zhizhong and Liu, Wenyu and Wang, Xinggang}, title = {GaussTR: Foundation Model-Aligned Gaussian Transformer for Self-Supervised 3D Spatial Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11960-11970} }
Continuous, Subject-Specific Attribute Control in T2I Models by Identifying Semantic Directions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Baumann_2025_CVPR, author = {Baumann, Stefan Andreas and Krause, Felix and Neumayr, Michael and Stracke, Nick and Sevi, Melvin and Hu, Vincent Tao and Ommer, Bj\"orn}, title = {Continuous, Subject-Specific Attribute Control in T2I Models by Identifying Semantic Directions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13231-13241} }
SimLingo: Vision-Only Closed-Loop Autonomous Driving with Language-Action Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Renz_2025_CVPR, author = {Renz, Katrin and Chen, Long and Arani, Elahe and Sinavski, Oleg}, title = {SimLingo: Vision-Only Closed-Loop Autonomous Driving with Language-Action Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11993-12003} }
Improved Video VAE for Latent Video Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Pingyu and Zhu, Kai and Liu, Yu and Zhao, Liming and Zhai, Wei and Cao, Yang and Zha, Zheng-Jun}, title = {Improved Video VAE for Latent Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18124-18133} }
Efficient Video Super-Resolution for Real-time Rendering with Decoupled G-buffer Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Mingjun and Sun, Long and Dong, Jiangxin and Pan, Jinshan}, title = {Efficient Video Super-Resolution for Real-time Rendering with Decoupled G-buffer Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11328-11337} }
Learned Image Compression with Dictionary-based Entropy Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Jingbo and Zhang, Leheng and Zhou, Xingyu and Li, Mu and Li, Wen and Gu, Shuhang}, title = {Learned Image Compression with Dictionary-based Entropy Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12850-12859} }
FireEdit: Fine-grained Instruction-based Image Editing via Region-aware Vision Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jun and Li, Jiahao and Xu, Zunnan and Li, Hanhui and Cheng, Yiji and Hong, Fa-Ting and Lin, Qin and Lu, Qinglin and Liang, Xiaodan}, title = {FireEdit: Fine-grained Instruction-based Image Editing via Region-aware Vision Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13093-13103} }
DL2G: Degradation-guided Local-to-Global Restoration for Eyeglass Reflection Removal-
[pdf]
[supp]
[bibtex]@InProceedings{Yi_2025_CVPR, author = {Yi, Zhilv and Lu, Xiao and Ding, Hong and Hu, Jingbo and Jiang, Zhi and Xiao, Chunxia}, title = {DL2G: Degradation-guided Local-to-Global Restoration for Eyeglass Reflection Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16061-16070} }
MFogHub: Bridging Multi-Regional and Multi-Satellite Data for Global Marine Fog Detection and Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Mengqiu and Chen, Kaixin and Guo, Heng and Huang, Yixiang and Wu, Ming and Shi, Zhenwei and Zhang, Chuang and Guo, Jun}, title = {MFogHub: Bridging Multi-Regional and Multi-Satellite Data for Global Marine Fog Detection and Forecasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12637-12646} }
The Illusion of Unlearning: The Unstable Nature of Machine Unlearning in Text-to-Image Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{George_2025_CVPR, author = {George, Naveen and Dasaraju, Karthik Nandan and Chittepu, Rutheesh Reddy and Mopuri, Konda Reddy}, title = {The Illusion of Unlearning: The Unstable Nature of Machine Unlearning in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13393-13402} }
Leveraging Global Stereo Consistency for Category-Level Shape and 6D Pose Estimation from Stereo Images-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2025_CVPR, author = {Qiu, Junning and Lu, Minglei and Wang, Fei and Guo, Yu and Ling, Yonggen}, title = {Leveraging Global Stereo Consistency for Category-Level Shape and 6D Pose Estimation from Stereo Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16839-16849} }
AlphaPre: Amplitude-Phase Disentanglement Model for Precipitation Nowcasting-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Kenghong and Zhang, Baoquan and Yu, Demin and Feng, Wenzhi and Chen, Shidong and Gao, Feifan and Li, Xutao and Ye, Yunming}, title = {AlphaPre: Amplitude-Phase Disentanglement Model for Precipitation Nowcasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17841-17850} }
Detection-Friendly Nonuniformity Correction: A Union Framework for Infrared UAV Target Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Houzhang and Wang, Xiaolin and Li, Zengyang and Wang, Lu and Li, Qingshan and Chang, Yi and Yan, Luxin}, title = {Detection-Friendly Nonuniformity Correction: A Union Framework for Infrared UAV Target Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11898-11907} }
Articulated Kinematics Distillation from Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xuan and Ma, Qianli and Lin, Tsung-Yi and Chen, Yongxin and Jiang, Chenfanfu and Liu, Ming-Yu and Xiang, Donglai}, title = {Articulated Kinematics Distillation from Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17571-17581} }
ExpertAF: Expert Actionable Feedback from Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ashutosh_2025_CVPR, author = {Ashutosh, Kumar and Nagarajan, Tushar and Pavlakos, Georgios and Kitani, Kris and Grauman, Kristen}, title = {ExpertAF: Expert Actionable Feedback from Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13582-13594} }
Volumetrically Consistent 3D Gaussian Rasterization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Talegaonkar_2025_CVPR, author = {Talegaonkar, Chinmay and Belhe, Yash and Ramamoorthi, Ravi and Antipa, Nicholas}, title = {Volumetrically Consistent 3D Gaussian Rasterization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10953-10963} }
The Impact Label Noise and Choice of Threshold has on Cross-Entropy and Soft-Dice in Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Nordstrom_2025_CVPR, author = {Nordstr\"om, Marcus and Maki, Atsuto and Hult, Henrik}, title = {The Impact Label Noise and Choice of Threshold has on Cross-Entropy and Soft-Dice in Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20820-20829} }
LLaVA-Critic: Learning to Evaluate Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xiong_2025_CVPR, author = {Xiong, Tianyi and Wang, Xiyao and Guo, Dong and Ye, Qinghao and Fan, Haoqi and Gu, Quanquan and Huang, Heng and Li, Chunyuan}, title = {LLaVA-Critic: Learning to Evaluate Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13618-13628} }
VILA-M3: Enhancing Vision-Language Models with Medical Expert Knowledge-
[pdf]
[supp]
[bibtex]@InProceedings{Nath_2025_CVPR, author = {Nath, Vishwesh and Li, Wenqi and Yang, Dong and Myronenko, Andriy and Zheng, Mingxin and Lu, Yao and Liu, Zhijian and Yin, Hongxu and Law, Yee Man and Tang, Yucheng and Guo, Pengfei and Zhao, Can and Xu, Ziyue and He, Yufan and Harmon, Stephanie and Simon, Benjamin and Heinrich, Greg and Aylward, Stephen and Edgar, Marc and Zephyr, Michael and Molchanov, Pavlo and Turkbey, Baris and Roth, Holger and Xu, Daguang}, title = {VILA-M3: Enhancing Vision-Language Models with Medical Expert Knowledge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14788-14798} }
Repurposing Pre-trained Video Diffusion Models for Event-based Video Interpolation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Jingxi and Feng, Brandon Y. and Cai, Haoming and Wang, Tianfu and Burner, Levi and Yuan, Dehao and Fermuller, Cornelia and Metzler, Christopher A. and Aloimonos, Yiannis}, title = {Repurposing Pre-trained Video Diffusion Models for Event-based Video Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12456-12466} }
Large-scale Multi-view Tensor Clustering with Implicit Linear Kernels-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jiyuan and Liu, Xinwang and Li, Chuankun and Wan, Xinhang and Tan, Hao and Zhang, Yi and Liang, Weixuan and Qu, Qian and Feng, Yu and Guan, Renxiang and Liang, Ke}, title = {Large-scale Multi-view Tensor Clustering with Implicit Linear Kernels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20727-20736} }
Q-Eval-100K: Evaluating Visual Quality and Alignment Level for Text-to-Vision Content-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zicheng and Kou, Tengchuan and Wang, Shushi and Li, Chunyi and Sun, Wei and Wang, Wei and Li, Xiaoyu and Wang, Zongyu and Cao, Xuezhi and Min, Xiongkuo and Liu, Xiaohong and Zhai, Guangtao}, title = {Q-Eval-100K: Evaluating Visual Quality and Alignment Level for Text-to-Vision Content}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10621-10631} }
Dual Focus-Attention Transformer for Robust Point Cloud Registration-
[pdf]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Kexue and Yuan, Mingzhi and Wang, Changwei and Pang, Weiguang and Chi, Jing and Wang, Manning and Gao, Longxiang}, title = {Dual Focus-Attention Transformer for Robust Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11769-11778} }
Forming Auxiliary High-confident Instance-level Loss to Promote Learning from Label Proportions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Tianhao and Chen, Han and Hu, Juncheng and Zhu, Yungang and Li, Ximing}, title = {Forming Auxiliary High-confident Instance-level Loss to Promote Learning from Label Proportions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20592-20601} }
Progress-Aware Video Frame Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2025_CVPR, author = {Xue, Zihui and An, Joungbin and Yang, Xitong and Grauman, Kristen}, title = {Progress-Aware Video Frame Captioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13639-13650} }
SMTPD: A New Benchmark for Temporal Prediction of Social Media Popularity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yijie and Zheng, Bolun and Zhu, Wei and Pan, Hangjia and Yao, Yuchen and Xu, Ning and Liu, Anan and Zhang, Quan and Yan, Chenggang}, title = {SMTPD: A New Benchmark for Temporal Prediction of Social Media Popularity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18847-18857} }
Learning on Model Weights using Tree Experts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Horwitz_2025_CVPR, author = {Horwitz, Eliahu and Cavia, Bar and Kahana, Jonathan and Hoshen, Yedid}, title = {Learning on Model Weights using Tree Experts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20468-20478} }
Image Reconstruction from Readout-Multiplexed Single-Photon Detector Arrays-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bharadwaj_2025_CVPR, author = {Bharadwaj, Shashwath and Kitichotkul, Ruangrawee and Agarwal, Akshay and Goyal, Vivek K}, title = {Image Reconstruction from Readout-Multiplexed Single-Photon Detector Arrays}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11406-11415} }
Towards Transformer-Based Aligned Generation with Self-Coherence Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shulei and Lin, Wang and Huang, Hai and Wang, Hanting and Cai, Sihang and Han, WenKang and Jin, Tao and Chen, Jingyuan and Sun, Jiacheng and Zhu, Jieming and Zhao, Zhou}, title = {Towards Transformer-Based Aligned Generation with Self-Coherence Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18455-18464} }
Accurate Scene Text Recognition with Efficient Model Scaling and Cloze Self-Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maracani_2025_CVPR, author = {Maracani, Andrea and Ozkan, Savas and Cho, Sijun and Kim, Hyowon and Noh, Eunchung and Min, Jeongwon and Min, Cho Jung and Park, Dookun and Ozay, Mete}, title = {Accurate Scene Text Recognition with Efficient Model Scaling and Cloze Self-Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14516-14526} }
DART: Disease-aware Image-Text Alignment and Self-correcting Re-alignment for Trustworthy Radiology Report Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Sang-Jun and Heo, Keun-Soo and Shin, Dong-Hee and Son, Young-Han and Oh, Ji-Hye and Kam, Tae-Eui}, title = {DART: Disease-aware Image-Text Alignment and Self-correcting Re-alignment for Trustworthy Radiology Report Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15580-15589} }
On the Consistency of Video Large Language Models in Temporal Comprehension-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2025_CVPR, author = {Jung, Minjoon and Xiao, Junbin and Zhang, Byoung-Tak and Yao, Angela}, title = {On the Consistency of Video Large Language Models in Temporal Comprehension}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13713-13722} }
Less is More: Efficient Model Merging with Binary Task Switch-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2025_CVPR, author = {Qi, Biqing and Li, Fangyuan and Wang, Zhen and Gao, Junqi and Li, Dong and Ye, Peng and Zhou, Bowen}, title = {Less is More: Efficient Model Merging with Binary Task Switch}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15265-15274} }
One-Minute Video Generation with Test-Time Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dalal_2025_CVPR, author = {Dalal, Karan and Koceja, Daniel and Xu, Jiarui and Zhao, Yue and Han, Shihao and Cheung, Ka Chun and Kautz, Jan and Choi, Yejin and Sun, Yu and Wang, Xiaolong}, title = {One-Minute Video Generation with Test-Time Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17702-17711} }
InteractionMap: Improving Online Vectorized HDMap Construction with Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Kuang and Yang, Chuan and Li, Zhanbin}, title = {InteractionMap: Improving Online Vectorized HDMap Construction with Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17176-17186} }
ROCKET-1: Mastering Open-World Interaction with Visual-Temporal Context Prompting-
[pdf]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Shaofei and Wang, Zihao and Lian, Kewei and Mu, Zhancun and Ma, Xiaojian and Liu, Anji and Liang, Yitao}, title = {ROCKET-1: Mastering Open-World Interaction with Visual-Temporal Context Prompting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12122-12131} }
RLAIF-V: Open-Source AI Feedback Leads to Super GPT-4V Trustworthiness-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Tianyu and Zhang, Haoye and Li, Qiming and Xu, Qixin and Yao, Yuan and Chen, Da and Lu, Xiaoman and Cui, Ganqu and Dang, Yunkai and He, Taiwen and Feng, Xiaocheng and Song, Jun and Zheng, Bo and Liu, Zhiyuan and Chua, Tat-Seng and Sun, Maosong}, title = {RLAIF-V: Open-Source AI Feedback Leads to Super GPT-4V Trustworthiness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19985-19995} }
EditSplat: Multi-View Fusion and Attention-Guided Optimization for View-Consistent 3D Scene Editing with 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Dong In and Park, Hyeongcheol and Seo, Jiyoung and Park, Eunbyung and Park, Hyunje and Baek, Ha Dam and Shin, Sangheon and Kim, Sangmin and Kim, Sangpil}, title = {EditSplat: Multi-View Fusion and Attention-Guided Optimization for View-Consistent 3D Scene Editing with 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11135-11145} }
One-shot 3D Object Canonicalization based on Geometric and Semantic Consistency-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Li and Wang, Yujie and Chen, Wenzheng and Dai, Qiyu and Gao, Qingzhe and Qin, Xueying and Chen, Baoquan}, title = {One-shot 3D Object Canonicalization based on Geometric and Semantic Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16850-16859} }
Inst3D-LMM: Instance-Aware 3D Scene Understanding with Multi-modal Instruction Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Hanxun and Li, Wentong and Wang, Song and Chen, Junbo and Zhu, Jianke}, title = {Inst3D-LMM: Instance-Aware 3D Scene Understanding with Multi-modal Instruction Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14147-14157} }
ProHOC: Probabilistic Hierarchical Out-of-Distribution Classification via Multi-Depth Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wallin_2025_CVPR, author = {Wallin, Erik and Kahl, Fredrik and Hammarstrand, Lars}, title = {ProHOC: Probabilistic Hierarchical Out-of-Distribution Classification via Multi-Depth Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20612-20621} }
CLIP is Strong Enough to Fight Back: Test-time Counterattacks towards Zero-shot Adversarial Robustness of CLIP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Songlong and Zhao, Zhengyu and Sebe, Nicu}, title = {CLIP is Strong Enough to Fight Back: Test-time Counterattacks towards Zero-shot Adversarial Robustness of CLIP}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15172-15182} }
Graph-Embedded Structure-Aware Perceptual Hashing for Neural Network Protection and Piracy Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Ruiheng and Chen, Haozhe and Zhao, Boyao and Chen, Kejiang and Zhang, Weiming}, title = {Graph-Embedded Structure-Aware Perceptual Hashing for Neural Network Protection and Piracy Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20169-20178} }
Interleaved-Modal Chain-of-Thought-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Jun and Li, Yongqi and Cao, Ziqiang and Li, Wenjie}, title = {Interleaved-Modal Chain-of-Thought}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19520-19529} }
Enhancing Adversarial Transferability with Checkpoints of a Single Model's Training-
[pdf]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Shixin and He, Chaoxiang and Ma, Xiaojing and Zhu, Bin Benjamin and Wang, Shuo and Hu, Hongsheng and Zhang, Dongmei and Yu, Linchen}, title = {Enhancing Adversarial Transferability with Checkpoints of a Single Model's Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20685-20694} }
O-TPT: Orthogonality Constraints for Calibrating Test-time Prompt Tuning in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Sharifdeen_2025_CVPR, author = {Sharifdeen, Ashshak and Munir, Muhammad Akhtar and Baliah, Sanoojan and Khan, Salman and Khan, Muhammad Haris}, title = {O-TPT: Orthogonality Constraints for Calibrating Test-time Prompt Tuning in Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19942-19951} }
Analyzing the Synthetic-to-Real Domain Gap in 3D Hand Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zhuoran and Yang, Linlin and Sun, Pengzhan and Hui, Pan and Yao, Angela}, title = {Analyzing the Synthetic-to-Real Domain Gap in 3D Hand Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12255-12265} }
Feature4X: Bridging Any Monocular Video to 4D Agentic AI with Versatile Gaussian Feature Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Shijie and Ren, Hui and Weng, Yijia and Zhang, Shuwang and Wang, Zhen and Xu, Dejia and Fan, Zhiwen and You, Suya and Wang, Zhangyang and Guibas, Leonidas and Kadambi, Achuta}, title = {Feature4X: Bridging Any Monocular Video to 4D Agentic AI with Versatile Gaussian Feature Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14179-14190} }
Hyperspectral Pansharpening via Diffusion Models with Iteratively Zero-Shot Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Jin-Liang and Huang, Ting-Zhu and Deng, Liang-Jian and Lin, Guang and Cao, Zihan and Li, Chao and Zhao, Qibin}, title = {Hyperspectral Pansharpening via Diffusion Models with Iteratively Zero-Shot Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12669-12678} }
EASEMVC:Efficient Dual Selection Mechanism for Deep Multi-View Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Baili and Dong, Zhibin and Liang, Ke and Liu, Suyuan and Wang, Siwei and Liu, Tianrui and Hu, Xingchen and Zhu, En and Liu, Xinwang}, title = {EASEMVC:Efficient Dual Selection Mechanism for Deep Multi-View Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20716-20726} }
DSPNet: Dual-vision Scene Perception for Robust 3D Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Jingzhou and Liu, Yang and Chen, Weixing and Li, Zhen and Wang, Yaowei and Li, Guanbin and Lin, Liang}, title = {DSPNet: Dual-vision Scene Perception for Robust 3D Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14169-14178} }
IceDiff: High Resolution and High-Quality Arctic Sea Ice Forecasting with Generative Diffusion Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Jingyi and Tu, Siwei and Yang, Weidong and Fei, Ben and Li, Shuhao and Liu, Keyi and Luo, Yeqi and Ma, Lipeng and Bai, Lei}, title = {IceDiff: High Resolution and High-Quality Arctic Sea Ice Forecasting with Generative Diffusion Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10567-10576} }
DTOS: Dynamic Time Object Sensing with Large Multimodal Model-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Jirui and Zhang, Jinrong and Liu, Shenglan and Xu, Luhao and Huang, Zhixiong and Huang, Gao}, title = {DTOS: Dynamic Time Object Sensing with Large Multimodal Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13810-13820} }
How to Merge Your Multimodal Models Over Time?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dziadzio_2025_CVPR, author = {Dziadzio, Sebastian and Udandarao, Vishaal and Roth, Karsten and Prabhu, Ameya and Akata, Zeynep and Albanie, Samuel and Bethge, Matthias}, title = {How to Merge Your Multimodal Models Over Time?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20479-20491} }
Identifying and Mitigating Position Bias of Multi-image Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Xinyu and Zou, Shu and Yang, Zhaoyuan and Zhang, Jing}, title = {Identifying and Mitigating Position Bias of Multi-image Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10599-10609} }
Exploring CLIP's Dense Knowledge for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Zhiwei and Meng, Yucong and Fu, Kexue and Tang, Feilong and Wang, Shuo and Song, Zhijian}, title = {Exploring CLIP's Dense Knowledge for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20223-20232} }
ShowUI: One Vision-Language-Action Model for GUI Visual Agent-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Kevin Qinghong and Li, Linjie and Gao, Difei and Yang, Zhengyuan and Wu, Shiwei and Bai, Zechen and Lei, Stan Weixian and Wang, Lijuan and Shou, Mike Zheng}, title = {ShowUI: One Vision-Language-Action Model for GUI Visual Agent}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19498-19508} }
Infinity: Scaling Bitwise AutoRegressive Modeling for High-Resolution Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Jian and Liu, Jinlai and Jiang, Yi and Yan, Bin and Zhang, Yuqi and Yuan, Zehuan and Peng, Bingyue and Liu, Xiaobing}, title = {Infinity: Scaling Bitwise AutoRegressive Modeling for High-Resolution Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15733-15744} }
HumanDreamer: Generating Controllable Human-Motion Videos via Decoupled Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Boyuan and Wang, Xiaofeng and Ni, Chaojun and Zhao, Guosheng and Yang, Zhiqin and Zhu, Zheng and Zhang, Muyang and Zhou, Yukun and Chen, Xinze and Huang, Guan and Liu, Lihong and Wang, Xingang}, title = {HumanDreamer: Generating Controllable Human-Motion Videos via Decoupled Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12391-12401} }
ReVisionLLM: Recursive Vision-Language Model for Temporal Grounding in Hour-Long Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hannan_2025_CVPR, author = {Hannan, Tanveer and Islam, Md Mohaiminul and Gu, Jindong and Seidl, Thomas and Bertasius, Gedas}, title = {ReVisionLLM: Recursive Vision-Language Model for Temporal Grounding in Hour-Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19012-19022} }
ArtiFade: Learning to Generate High-quality Subject from Blemished Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Shuya and Hao, Shaozhe and Cao, Yukang and Wong, Kwan-Yee K.}, title = {ArtiFade: Learning to Generate High-quality Subject from Blemished Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13167-13177} }
Prompting Depth Anything for 4K Resolution Accurate Metric Depth Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Haotong and Peng, Sida and Chen, Jingxiao and Peng, Songyou and Sun, Jiaming and Liu, Minghuan and Bao, Hujun and Feng, Jiashi and Zhou, Xiaowei and Kang, Bingyi}, title = {Prompting Depth Anything for 4K Resolution Accurate Metric Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17070-17080} }
GET: Unlocking the Multi-modal Potential of CLIP for Generalized Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Enguang and Peng, Zhimao and Xie, Zhengyuan and Yang, Fei and Liu, Xialei and Cheng, Ming-Ming}, title = {GET: Unlocking the Multi-modal Potential of CLIP for Generalized Category Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20296-20306} }
Test-Time Domain Generalization via Universe Learning: A Multi-Graph Matching Approach for Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2025_CVPR, author = {Lv, Xingguo and Dong, Xingbo and Wang, Liwen and Yang, Jiewen and Zhao, Lei and Pu, Bin and Jin, Zhe and Li, Xuejun}, title = {Test-Time Domain Generalization via Universe Learning: A Multi-Graph Matching Approach for Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15621-15631} }
DeDe: Detecting Backdoor Samples for SSL Encoders via Decoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hou_2025_CVPR, author = {Hou, Sizai and Li, Songze and Yao, Duanyi}, title = {DeDe: Detecting Backdoor Samples for SSL Encoders via Decoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20675-20684} }
Towards Scalable Human-aligned Benchmark for Text-guided Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ryu_2025_CVPR, author = {Ryu, Suho and Kim, Kihyun and Baek, Eugene and Shin, Dongsoo and Lee, Joonseok}, title = {Towards Scalable Human-aligned Benchmark for Text-guided Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18292-18301} }
Coeff-Tuning: A Graph Filter Subspace View for Tuning Attention-Based Large Models-
[pdf]
[supp]
[bibtex]@InProceedings{Miao_2025_CVPR, author = {Miao, Zichen and Chen, Wei and Qiu, Qiang}, title = {Coeff-Tuning: A Graph Filter Subspace View for Tuning Attention-Based Large Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20146-20157} }
Self-Supervised Cross-View Correspondence with Predictive Cycle Consistency-
[pdf]
[supp]
[bibtex]@InProceedings{Baade_2025_CVPR, author = {Baade, Alan and Chen, Changan}, title = {Self-Supervised Cross-View Correspondence with Predictive Cycle Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16753-16763} }
CryptoFace: End-to-End Encrypted Face Recognition-
[pdf]
[bibtex]@InProceedings{Ao_2025_CVPR, author = {Ao, Wei and Boddeti, Vishnu Naresh}, title = {CryptoFace: End-to-End Encrypted Face Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19197-19206} }
Relation-Rich Visual Document Generator for Visual Information Extraction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Zi-Han and Lin, Chien-Wei and Li, Wei-Hua and Liu, Hsuan-Tung and Yeh, Yi-Ren and Chen, Chu-Song}, title = {Relation-Rich Visual Document Generator for Visual Information Extraction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14449-14459} }
PromptHash:Affinity-Prompted Collaborative Cross-Modal Learning for Adaptive Hashing Retrieval-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zou_2025_CVPR, author = {Zou, Qiang and Cheng, Shuli and Chen, Jiayi}, title = {PromptHash:Affinity-Prompted Collaborative Cross-Modal Learning for Adaptive Hashing Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19649-19658} }
Universal Scene Graph Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Shengqiong and Fei, Hao and Chua, Tat-seng}, title = {Universal Scene Graph Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14158-14168} }
Split Adaptation for Pre-trained Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Lixu and Shang, Bingqi and Li, Yi and Mohapatra, Payal and Dong, Wei and Wang, Xiao and Zhu, Qi}, title = {Split Adaptation for Pre-trained Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20092-20102} }
SpatialLLM: A Compound 3D-Informed Design towards Spatially-Intelligent Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Wufei and Ye, Luoxin and de Melo, Celso M and Yuille, Alan and Chen, Jieneng}, title = {SpatialLLM: A Compound 3D-Informed Design towards Spatially-Intelligent Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17249-17260} }
Learning Occlusion-Robust Vision Transformers for Real-Time UAV Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, You and Wang, Xucheng and Yang, Xiangyang and Liu, Mengyuan and Zeng, Dan and Ye, Hengzhou and Li, Shuiwang}, title = {Learning Occlusion-Robust Vision Transformers for Real-Time UAV Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17103-17113} }
Plug-and-Play Versatile Compressed Video Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_CVPR, author = {Zeng, Huimin and Li, Jiacheng and Xiong, Zhiwei}, title = {Plug-and-Play Versatile Compressed Video Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17767-17777} }
UltraFusion: Ultra High Dynamic Imaging using Exposure Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zixuan and Wang, Yujin and Cai, Xin and You, Zhiyuan and Lu, Zheming and Zhang, Fan and Guo, Shi and Xue, Tianfan}, title = {UltraFusion: Ultra High Dynamic Imaging using Exposure Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16111-16121} }
Noise-Resistant Video Anomaly Detection via RGB Error-Guided Multiscale Predictive Coding and Dynamic Memory-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Han and Du, Wenli and Liao, Peng and Wang, Bing and Fan, Siyuan}, title = {Noise-Resistant Video Anomaly Detection via RGB Error-Guided Multiscale Predictive Coding and Dynamic Memory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19109-19119} }
GroupMamba: Efficient Group-Based Visual State Space Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shaker_2025_CVPR, author = {Shaker, Abdelrahman and Wasim, Syed Talal and Khan, Salman and Gall, Juergen and Khan, Fahad Shahbaz}, title = {GroupMamba: Efficient Group-Based Visual State Space Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14912-14922} }
Escaping Plato's Cave: Towards the Alignment of 3D and Text Latent Spaces-
[pdf]
[supp]
[bibtex]@InProceedings{Hadgi_2025_CVPR, author = {Hadgi, Souhail and Moschella, Luca and Santilli, Andrea and Gomez, Diego and Huang, Qixing and Rodol\`a, Emanuele and Melzi, Simone and Ovsjanikov, Maks}, title = {Escaping Plato's Cave: Towards the Alignment of 3D and Text Latent Spaces}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19825-19835} }
ActiveGAMER: Active GAussian Mapping through Efficient Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Liyan and Zhan, Huangying and Chen, Kevin and Xu, Xiangyu and Yan, Qingan and Cai, Changjiang and Xu, Yi}, title = {ActiveGAMER: Active GAussian Mapping through Efficient Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16486-16497} }
Positive2Negative: Breaking the Information-Lossy Barrier in Self-Supervised Single Image Denoising-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Tong and Wang, Lizhi and Xu, Zhiyuan and Zhu, Lin and Lu, Wanxuan and Huang, Hua}, title = {Positive2Negative: Breaking the Information-Lossy Barrier in Self-Supervised Single Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17924-17934} }
SeaLion: Semantic Part-Aware Latent Point Diffusion Models for 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Dekai and Di, Yan and Gavranovic, Stefan and Ilic, Slobodan}, title = {SeaLion: Semantic Part-Aware Latent Point Diffusion Models for 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11789-11798} }
Toward Real-world BEV Perception: Depth Uncertainty Estimation via Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Shu-Wei and Tsai, Yi-Hsuan and Chen, Yi-Ting}, title = {Toward Real-world BEV Perception: Depth Uncertainty Estimation via Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17124-17133} }
Efficient Fine-Tuning and Concept Suppression for Pruned Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shirkavand_2025_CVPR, author = {Shirkavand, Reza and Yu, Peiran and Gao, Shangqian and Somepalli, Gowthami and Goldstein, Tom and Huang, Heng}, title = {Efficient Fine-Tuning and Concept Suppression for Pruned Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18619-18629} }
WildGS-SLAM: Monocular Gaussian Splatting SLAM in Dynamic Environments-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Jianhao and Zhu, Zihan and Bieri, Valentin and Pollefeys, Marc and Peng, Songyou and Armeni, Iro}, title = {WildGS-SLAM: Monocular Gaussian Splatting SLAM in Dynamic Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11461-11471} }
RePerformer: Immersive Human-centric Volumetric Videos from Playback to Photoreal Reperformance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yuheng and Shen, Zhehao and Guo, Chengcheng and Hong, Yu and Su, Zhuo and Zhang, Yingliang and Habermann, Marc and Xu, Lan}, title = {RePerformer: Immersive Human-centric Volumetric Videos from Playback to Photoreal Reperformance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11349-11360} }
CheXWorld: Exploring Image World Modeling for Radiograph Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2025_CVPR, author = {Yue, Yang and Wang, Yulin and Tao, Chenxin and Liu, Pan and Song, Shiji and Huang, Gao}, title = {CheXWorld: Exploring Image World Modeling for Radiograph Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20778-20788} }
Towards Long-Horizon Vision-Language Navigation: Platform, Benchmark and Method-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Xinshuai and Chen, Weixing and Liu, Yang and Chen, Weikai and Li, Guanbin and Lin, Liang}, title = {Towards Long-Horizon Vision-Language Navigation: Platform, Benchmark and Method}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12078-12088} }
AIGV-Assessor: Benchmarking and Evaluating the Perceptual Quality of Text-to-Video Generation with LMM-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jiarui and Duan, Huiyu and Zhai, Guangtao and Wang, Juntong and Min, Xiongkuo}, title = {AIGV-Assessor: Benchmarking and Evaluating the Perceptual Quality of Text-to-Video Generation with LMM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18869-18880} }
Autoregressive Distillation of Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Yeongmin and Anagnostidis, Sotiris and Du, Yuming and Sch\"onfeld, Edgar and Kohler, Jonas and Georgopoulos, Markos and Pumarola, Albert and Thabet, Ali and Sanakoyeu, Artsiom}, title = {Autoregressive Distillation of Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15745-15756} }
OmniManip: Towards General Robotic Manipulation via Object-Centric Interaction Primitives as Spatial Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2025_CVPR, author = {Pan, Mingjie and Zhang, Jiyao and Wu, Tianshu and Zhao, Yinghao and Gao, Wenlong and Dong, Hao}, title = {OmniManip: Towards General Robotic Manipulation via Object-Centric Interaction Primitives as Spatial Constraints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17359-17369} }
DiscoVLA: Discrepancy Reduction in Vision, Language, and Alignment for Parameter-Efficient Video-Text Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Leqi and Gong, Guoqiang and Hao, Tianxiang and He, Tao and Zhang, Yifeng and Liu, Pengzhang and Zhao, Sicheng and Han, Jungong and Ding, Guiguang}, title = {DiscoVLA: Discrepancy Reduction in Vision, Language, and Alignment for Parameter-Efficient Video-Text Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19702-19712} }
Visual-Instructed Degradation Diffusion for All-in-One Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Wenyang and Qin, Haina and Chen, Zewen and Wang, Libin and Zheng, Dandan and Li, Yuming and Liu, Yufan and Li, Bing and Hu, Weiming}, title = {Visual-Instructed Degradation Diffusion for All-in-One Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12764-12777} }
Insightful Instance Features for 3D Instance Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Roh_2025_CVPR, author = {Roh, Wonseok and Jung, Hwanhee and Nam, Giljoo and Lee, Dong In and Park, Hyeongcheol and Yoon, Sang Ho and Joo, Jungseock and Kim, Sangpil}, title = {Insightful Instance Features for 3D Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14057-14067} }
EmoDubber: Towards High Quality and Emotion Controllable Movie Dubbing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cong_2025_CVPR, author = {Cong, Gaoxiang and Pan, Jiadong and Li, Liang and Qi, Yuankai and Peng, Yuxin and van den Hengel, Anton and Yang, Jian and Huang, Qingming}, title = {EmoDubber: Towards High Quality and Emotion Controllable Movie Dubbing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15863-15873} }
A Hubness Perspective on Representation Learning for Graph-Based Multi-View Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Zheming and Liu, He and Lang, Congyan and Wang, Tao and Li, Yidong and Kampffmeyer, Michael C.}, title = {A Hubness Perspective on Representation Learning for Graph-Based Multi-View Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15528-15537} }
Spatial-Temporal Graph Diffusion Policy with Kinematic Modeling for Bimanual Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2025_CVPR, author = {Lv, Qi and Li, Hao and Deng, Xiang and Shao, Rui and Li, Yinchuan and Hao, Jianye and Gao, Longxiang and Wang, Michael Yu and Nie, Liqiang}, title = {Spatial-Temporal Graph Diffusion Policy with Kinematic Modeling for Bimanual Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17394-17404} }
ZeroVO: Visual Odometry with Minimal Assumptions-
[pdf]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Lei and Yin, Zekai and Ohn-Bar, Eshed}, title = {ZeroVO: Visual Odometry with Minimal Assumptions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17092-17102} }
VideoRefer Suite: Advancing Spatial-Temporal Object Understanding with Video LLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Yuqian and Zhang, Hang and Li, Wentong and Cheng, Zesen and Zhang, Boqiang and Li, Long and Li, Xin and Zhao, Deli and Zhang, Wenqiao and Zhuang, Yueting and Zhu, Jianke and Bing, Lidong}, title = {VideoRefer Suite: Advancing Spatial-Temporal Object Understanding with Video LLM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18970-18980} }
HoVLE: Unleashing the Power of Monolithic Vision-Language Models with Holistic Vision-Language Embedding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2025_CVPR, author = {Tao, Chenxin and Su, Shiqian and Zhu, Xizhou and Zhang, Chenyu and Chen, Zhe and Liu, Jiawen and Wang, Wenhai and Lu, Lewei and Huang, Gao and Qiao, Yu and Dai, Jifeng}, title = {HoVLE: Unleashing the Power of Monolithic Vision-Language Models with Holistic Vision-Language Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14559-14569} }
Gen3DEval: Using vLLMs for Automatic Evaluation of Generated 3D Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maiti_2025_CVPR, author = {Maiti, Shalini and Agapito, Lourdes and Kokkinos, Filippos}, title = {Gen3DEval: Using vLLMs for Automatic Evaluation of Generated 3D Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18552-18562} }
SkySense-O: Towards Open-World Remote Sensing Interpretation with Vision-Centric Visual-Language Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Qi and Lao, Jiangwei and Ji, Deyi and Luo, Junwei and Wu, Kang and Zhang, Yingying and Ru, Lixiang and Wang, Jian and Chen, Jingdong and Yang, Ming and Liu, Dong and Zhao, Feng}, title = {SkySense-O: Towards Open-World Remote Sensing Interpretation with Vision-Centric Visual-Language Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14733-14744} }
AdaDARE-gamma: Balancing Stability and Plasticity in Multi-modal LLMs through Efficient Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Jingyi and Yang, Jintao and Luo, Zhunchen and Cao, Yunbo and Gao, Qiang and Zhang, Mengyuan and Hu, Wenpeng}, title = {AdaDARE-gamma: Balancing Stability and Plasticity in Multi-modal LLMs through Efficient Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19758-19768} }
LeviTor: 3D Trajectory Oriented Image-to-Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Hanlin and Ouyang, Hao and Wang, Qiuyu and Wang, Wen and Cheng, Ka Leong and Chen, Qifeng and Shen, Yujun and Wang, Limin}, title = {LeviTor: 3D Trajectory Oriented Image-to-Video Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12490-12500} }
SapiensID: Foundation for Human Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Minchul and Ye, Dingqiang and Su, Yiyang and Liu, Feng and Liu, Xiaoming}, title = {SapiensID: Foundation for Human Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13937-13947} }
Extrapolating and Decoupling Image-to-Video Generation Models: Motion Modeling is Easier Than You Think-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Jie and Qu, Xiaoye and Lu, Zhenyi and Wei, Wei and Liu, Sichen and Cheng, Yu}, title = {Extrapolating and Decoupling Image-to-Video Generation Models: Motion Modeling is Easier Than You Think}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12512-12521} }
FreeCloth: Free-form Generation Enhances Challenging Clothed Human Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Hang and Ma, Xiaoxuan and Ci, Hai and Zhu, Wentao and Wang, Yizhou}, title = {FreeCloth: Free-form Generation Enhances Challenging Clothed Human Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15987-15997} }
InstanceGaussian: Appearance-Semantic Joint Gaussian Representation for 3D Instance-Level Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Haijie and Wu, Yanmin and Meng, Jiarui and Gao, Qiankun and Zhang, Zhiyao and Wang, Ronggang and Zhang, Jian}, title = {InstanceGaussian: Appearance-Semantic Joint Gaussian Representation for 3D Instance-Level Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14078-14088} }
CSC-PA: Cross-image Semantic Correlation via Prototype Attentions for Single-network Semi-supervised Breast Tumor Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2025_CVPR, author = {Ding, Zhenhui and Chen, Guilian and Zhang, Qin and Wu, Huisi and Qin, Jing}, title = {CSC-PA: Cross-image Semantic Correlation via Prototype Attentions for Single-network Semi-supervised Breast Tumor Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15632-15641} }
VisionPAD: A Vision-Centric Pre-training Paradigm for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Haiming and Zhou, Wending and Zhu, Yiyao and Yan, Xu and Gao, Jiantao and Bai, Dongfeng and Cai, Yingjie and Liu, Bingbing and Cui, Shuguang and Li, Zhen}, title = {VisionPAD: A Vision-Centric Pre-training Paradigm for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17165-17175} }
Detecting Adversarial Data Using Perturbation Forgery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Qian and Li, Chen and Luo, Yuchen and Ling, Hefei and Huang, Shijuan and Jia, Ruoxi and Yu, Ning}, title = {Detecting Adversarial Data Using Perturbation Forgery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13917-13926} }
CoA: Towards Real Image Dehazing via Compression-and-Adaptation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Long and Feng, Yuxin and Zhang, Yan and Liu, Jinyuan and Wang, Weimin and Chen, Guang-Yong and Xu, Chengpei and Su, Zhuo}, title = {CoA: Towards Real Image Dehazing via Compression-and-Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11197-11206} }
TopV: Compatible Token Pruning with Inference Time Optimization for Fast and Low-Memory Multimodal Vision Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Cheng and Sui, Yang and Xiao, Jinqi and Huang, Lingyi and Gong, Yu and Li, Chendi and Yan, Jinghua and Bai, Yu and Sadayappan, Ponnuswamy and Hu, Xia and Yuan, Bo}, title = {TopV: Compatible Token Pruning with Inference Time Optimization for Fast and Low-Memory Multimodal Vision Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19803-19813} }
Learned Binocular-Encoding Optics for RGBD Imaging Using Joint Stereo and Focus Cues-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuhui and Ou, Liangxun and Fu, Qiang and Amata, Hadi and Heidrich, Wolfgang and Peng, Yifan}, title = {Learned Binocular-Encoding Optics for RGBD Imaging Using Joint Stereo and Focus Cues}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15833-15842} }
MobilePortrait: Real-Time One-Shot Neural Head Avatars on Mobile Devices-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Jianwen and Lin, Gaojie and Rong, Zhengkun and Liang, Chao and Zhu, Yongming and Yang, Jiaqi and Zhong, Tianyun}, title = {MobilePortrait: Real-Time One-Shot Neural Head Avatars on Mobile Devices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15920-15929} }
Light3R-SfM: Towards Feed-forward Structure-from-Motion-
[pdf]
[supp]
[bibtex]@InProceedings{Elflein_2025_CVPR, author = {Elflein, Sven and Zhou, Qunjie and Leal-Taix\'e, Laura}, title = {Light3R-SfM: Towards Feed-forward Structure-from-Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16774-16784} }
Robotic Visual Instruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yanbang and Gong, Ziyang and Li, Haoyang and Huang, Xiaoqi and Kang, Haolan and Bai, Guangping and Ma, Xianzheng}, title = {Robotic Visual Instruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12155-12165} }
MASt3R-SLAM: Real-Time Dense SLAM with 3D Reconstruction Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Murai_2025_CVPR, author = {Murai, Riku and Dexheimer, Eric and Davison, Andrew J.}, title = {MASt3R-SLAM: Real-Time Dense SLAM with 3D Reconstruction Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16695-16705} }
Viewpoint Rosetta Stone: Unlocking Unpaired Ego-Exo Videos for View-invariant Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Mi and Xue, Zihui and Dimakis, Alex and Grauman, Kristen}, title = {Viewpoint Rosetta Stone: Unlocking Unpaired Ego-Exo Videos for View-invariant Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15802-15812} }
Cross-modal Information Flow in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhi and Yadav, Srishti and Han, Fengze and Shutova, Ekaterina}, title = {Cross-modal Information Flow in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19781-19791} }
Keyframe-Guided Creative Video Inpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Yuwei and Yang, Ceyuan and Rao, Anyi and Meng, Chenlin and Bar-Tal, Omer and Ding, Shuangrui and Agrawala, Maneesh and Lin, Dahua and Dai, Bo}, title = {Keyframe-Guided Creative Video Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13009-13020} }
EdgeTAM: On-Device Track Anything Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Chong and Zhu, Chenchen and Xiong, Yunyang and Suri, Saksham and Xiao, Fanyi and Wu, Lemeng and Krishnamoorthi, Raghuraman and Dai, Bo and Loy, Chen Change and Chandra, Vikas and Soran, Bilge}, title = {EdgeTAM: On-Device Track Anything Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13832-13842} }
EarthDial: Turning Multi-sensory Earth Observations to Interactive Dialogues-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Soni_2025_CVPR, author = {Soni, Sagar and Dudhane, Akshay and Debary, Hiyam and Fiaz, Mustansar and Munir, Muhammad Akhtar and Danish, Muhammad Sohail and Fraccaro, Paolo and Watson, Campbell D and Klein, Levente J and Khan, Fahad Shahbaz and Khan, Salman}, title = {EarthDial: Turning Multi-sensory Earth Observations to Interactive Dialogues}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14303-14313} }
Video Summarization with Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Min Jung and Gong, Dayoung and Cho, Minsu}, title = {Video Summarization with Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18981-18991} }
Sketchtopia: A Dataset and Foundational Agents for Benchmarking Asynchronous Multimodal Communication with Iconic Feedback-
[pdf]
[supp]
[bibtex]@InProceedings{Khan_2025_CVPR, author = {Khan, Mohd Hozaifa and Sarvadevabhatla, Ravi Kiran}, title = {Sketchtopia: A Dataset and Foundational Agents for Benchmarking Asynchronous Multimodal Communication with Iconic Feedback}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18176-18186} }
Consistency-aware Self-Training for Iterative-based Stereo Matching-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jingyi and Ye, Peng and Zhang, Haoyu and Yuan, Jiakang and Qiang, Rao and YangChenXu, Liu and Cailin, Wu and Xu, Feng and Chen, Tao}, title = {Consistency-aware Self-Training for Iterative-based Stereo Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16641-16650} }
MV-MATH: Evaluating Multimodal Math Reasoning in Multi-Visual Contexts-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Peijie and Li, Zhong-Zhi and Yin, Fei and Ran, Dekang and Liu, Cheng-Lin}, title = {MV-MATH: Evaluating Multimodal Math Reasoning in Multi-Visual Contexts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19541-19551} }
Generalized Few-shot 3D Point Cloud Segmentation with Vision-Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2025_CVPR, author = {An, Zhaochong and Sun, Guolei and Liu, Yun and Li, Runjia and Han, Junlin and Konukoglu, Ender and Belongie, Serge}, title = {Generalized Few-shot 3D Point Cloud Segmentation with Vision-Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16997-17007} }
Back