CVPR 2025 Open Access Repository

Papers

Back
DynScene: Scalable Generation of Dynamic Robotic Manipulation Scenes for Embodied AI: Sangmin Lee,

Sungyong Park,

Heewon Kim; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Sangmin and Park, Sungyong and Kim, Heewon}, title = {DynScene: Scalable Generation of Dynamic Robotic Manipulation Scenes for Embodied AI}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12166-12175} }
DiffLocks: Generating 3D Hair from a Single Image using Diffusion Models: Radu Alexandru Rosu,

Keyu Wu,

Yao Feng,

Youyi Zheng,

Michael J. Black; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rosu_2025_CVPR, author = {Rosu, Radu Alexandru and Wu, Keyu and Feng, Yao and Zheng, Youyi and Black, Michael J.}, title = {DiffLocks: Generating 3D Hair from a Single Image using Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10847-10857} }
Harnessing Frequency Spectrum Insights for Image Copyright Protection Against Diffusion Models: Zhenguang Liu,

Chao Shuai,

Shaojing Fan,

Ziping Dong,

Jinwu Hu,

Zhongjie Ba,

Kui Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhenguang and Shuai, Chao and Fan, Shaojing and Dong, Ziping and Hu, Jinwu and Ba, Zhongjie and Ren, Kui}, title = {Harnessing Frequency Spectrum Insights for Image Copyright Protection Against Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18653-18662} }
IDEA-Bench: How Far are Generative Models from Professional Designing?: Chen Liang,

Lianghua Huang,

Jingwu Fang,

Huanzhang Dou,

Wei Wang,

Zhi-Fan Wu,

Yupeng Shi,

Junge Zhang,

Xin Zhao,

Yu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Chen and Huang, Lianghua and Fang, Jingwu and Dou, Huanzhang and Wang, Wei and Wu, Zhi-Fan and Shi, Yupeng and Zhang, Junge and Zhao, Xin and Liu, Yu}, title = {IDEA-Bench: How Far are Generative Models from Professional Designing?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18541-18551} }
PhD: A ChatGPT-Prompted Visual Hallucination Evaluation Dataset: Jiazhen Liu,

Yuhan Fu,

Ruobing Xie,

Runquan Xie,

Xingwu Sun,

Fengzong Lian,

Zhanhui Kang,

Xirong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jiazhen and Fu, Yuhan and Xie, Ruobing and Xie, Runquan and Sun, Xingwu and Lian, Fengzong and Kang, Zhanhui and Li, Xirong}, title = {PhD: A ChatGPT-Prompted Visual Hallucination Evaluation Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19857-19866} }
ClimbingCap: Multi-Modal Dataset and Method for Rock Climbing in World Coordinate: Ming Yan,

Xincheng Lin,

Yuhua Luo,

Shuqi Fan,

Yudi Dai,

Qixin Zhong,

Lincai Zhong,

Yuexin Ma,

Lan Xu,

Chenglu Wen,

Siqi Shen,

Cheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Ming and Lin, Xincheng and Luo, Yuhua and Fan, Shuqi and Dai, Yudi and Zhong, Qixin and Zhong, Lincai and Ma, Yuexin and Xu, Lan and Wen, Chenglu and Shen, Siqi and Wang, Cheng}, title = {ClimbingCap: Multi-Modal Dataset and Method for Rock Climbing in World Coordinate}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12312-12323} }
A Bias-Free Training Paradigm for More General AI-generated Image Detection: Fabrizio Guillaro,

Giada Zingarini,

Ben Usman,

Avneesh Sud,

Davide Cozzolino,

Luisa Verdoliva; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guillaro_2025_CVPR, author = {Guillaro, Fabrizio and Zingarini, Giada and Usman, Ben and Sud, Avneesh and Cozzolino, Davide and Verdoliva, Luisa}, title = {A Bias-Free Training Paradigm for More General AI-generated Image Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18685-18694} }
FALCON: Fairness Learning via Contrastive Attention Approach to Continual Semantic Scene Understanding: Thanh-Dat Truong,

Utsav Prabhu,

Bhiksha Raj,

Jackson Cothren,

Khoa Luu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Truong_2025_CVPR, author = {Truong, Thanh-Dat and Prabhu, Utsav and Raj, Bhiksha and Cothren, Jackson and Luu, Khoa}, title = {FALCON: Fairness Learning via Contrastive Attention Approach to Continual Semantic Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15065-15075} }
Certified Human Trajectory Prediction: Mohammadhossein Bahari,

Saeed Saadatnejad,

Amirhossein Askari Farsangi,

Seyed-Mohsen Moosavi-Dezfooli,

Alexandre Alahi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bahari_2025_CVPR, author = {Bahari, Mohammadhossein and Saadatnejad, Saeed and Farsangi, Amirhossein Askari and Moosavi-Dezfooli, Seyed-Mohsen and Alahi, Alexandre}, title = {Certified Human Trajectory Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12301-12311} }
Transformers without Normalization: Jiachen Zhu,

Xinlei Chen,

Kaiming He,

Yann LeCun,

Zhuang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jiachen and Chen, Xinlei and He, Kaiming and LeCun, Yann and Liu, Zhuang}, title = {Transformers without Normalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14901-14911} }
HiPART: Hierarchical Pose AutoRegressive Transformer for Occluded 3D Human Pose Estimation: Hongwei Zheng,

Han Li,

Wenrui Dai,

Ziyang Zheng,

Chenglin Li,

Junni Zou,

Hongkai Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Hongwei and Li, Han and Dai, Wenrui and Zheng, Ziyang and Li, Chenglin and Zou, Junni and Xiong, Hongkai}, title = {HiPART: Hierarchical Pose AutoRegressive Transformer for Occluded 3D Human Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16807-16817} }
From Faces to Voices: Learning Hierarchical Representations for High-quality Video-to-Speech: Ji-Hoon Kim,

Jeongsoo Choi,

Jaehun Kim,

Chaeyoung Jung,

Joon Son Chung; [pdf] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Ji-Hoon and Choi, Jeongsoo and Kim, Jaehun and Jung, Chaeyoung and Chung, Joon Son}, title = {From Faces to Voices: Learning Hierarchical Representations for High-quality Video-to-Speech}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15874-15884} }
DFM: Differentiable Feature Matching for Anomaly Detection: Sheng Wu,

Yimi Wang,

Xudong Liu,

Yuguang Yang,

Runqi Wang,

Guodong Guo,

David Doermann,

Baochang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Sheng and Wang, Yimi and Liu, Xudong and Yang, Yuguang and Wang, Runqi and Guo, Guodong and Doermann, David and Zhang, Baochang}, title = {DFM: Differentiable Feature Matching for Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15224-15233} }
PointSR: Self-Regularized Point Supervision for Drone-View Object Detection: Weizhuo Li,

Yue Xi,

Wenjing Jia,

Zehao Zhang,

Fei Li,

Xiangzeng Liu,

Qiguang Miao; [pdf]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Weizhuo and Xi, Yue and Jia, Wenjing and Zhang, Zehao and Li, Fei and Liu, Xiangzeng and Miao, Qiguang}, title = {PointSR: Self-Regularized Point Supervision for Drone-View Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11707-11716} }
v-CLR: View-Consistent Learning for Open-World Instance Segmentation: Chang-Bin Zhang,

Jinhong Ni,

Yujie Zhong,

Kai Han; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chang-Bin and Ni, Jinhong and Zhong, Yujie and Han, Kai}, title = {v-CLR: View-Consistent Learning for Open-World Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20307-20317} }
Reloc3r: Large-Scale Training of Relative Camera Pose Regression for Generalizable, Fast, and Accurate Visual Localization: Siyan Dong,

Shuzhe Wang,

Shaohui Liu,

Lulu Cai,

Qingnan Fan,

Juho Kannala,

Yanchao Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Siyan and Wang, Shuzhe and Liu, Shaohui and Cai, Lulu and Fan, Qingnan and Kannala, Juho and Yang, Yanchao}, title = {Reloc3r: Large-Scale Training of Relative Camera Pose Regression for Generalizable, Fast, and Accurate Visual Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16739-16752} }
Janus: Decoupling Visual Encoding for Unified Multimodal Understanding and Generation: Chengyue Wu,

Xiaokang Chen,

Zhiyu Wu,

Yiyang Ma,

Xingchao Liu,

Zizheng Pan,

Wen Liu,

Zhenda Xie,

Xingkai Yu,

Chong Ruan,

Ping Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Chengyue and Chen, Xiaokang and Wu, Zhiyu and Ma, Yiyang and Liu, Xingchao and Pan, Zizheng and Liu, Wen and Xie, Zhenda and Yu, Xingkai and Ruan, Chong and Luo, Ping}, title = {Janus: Decoupling Visual Encoding for Unified Multimodal Understanding and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12966-12977} }
MagicArticulate: Make Your 3D Models Articulation-Ready: Chaoyue Song,

Jianfeng Zhang,

Xiu Li,

Fan Yang,

Yiwen Chen,

Zhongcong Xu,

Jun Hao Liew,

Xiaoyang Guo,

Fayao Liu,

Jiashi Feng,

Guosheng Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Chaoyue and Zhang, Jianfeng and Li, Xiu and Yang, Fan and Chen, Yiwen and Xu, Zhongcong and Liew, Jun Hao and Guo, Xiaoyang and Liu, Fayao and Feng, Jiashi and Lin, Guosheng}, title = {MagicArticulate: Make Your 3D Models Articulation-Ready}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15998-16007} }
Dual Prompting Image Restoration with Diffusion Transformers: Dehong Kong,

Fan Li,

Zhixin Wang,

Jiaqi Xu,

Renjing Pei,

Wenbo Li,

WenQi Ren; [pdf] [arXiv]
[bibtex]
@InProceedings{Kong_2025_CVPR, author = {Kong, Dehong and Li, Fan and Wang, Zhixin and Xu, Jiaqi and Pei, Renjing and Li, Wenbo and Ren, WenQi}, title = {Dual Prompting Image Restoration with Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12809-12819} }
DepthCues: Evaluating Monocular Depth Perception in Large Vision Models: Duolikun Danier,

Mehmet Aygün,

Changjian Li,

Hakan Bilen,

Oisin Mac Aodha; [pdf] [supp]
[bibtex]
@InProceedings{Danier_2025_CVPR, author = {Danier, Duolikun and Ayg\"un, Mehmet and Li, Changjian and Bilen, Hakan and Mac Aodha, Oisin}, title = {DepthCues: Evaluating Monocular Depth Perception in Large Vision Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20049-20059} }
SpecTRe-GS: Modeling Highly Specular Surfaces with Reflected Nearby Objects by Tracing Rays in 3D Gaussian Splatting: Jiajun Tang,

Fan Fei,

Zhihao Li,

Xiao Tang,

Shiyong Liu,

Youyu Chen,

Binxiao Huang,

Zhenyu Chen,

Xiaofei Wu,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Jiajun and Fei, Fan and Li, Zhihao and Tang, Xiao and Liu, Shiyong and Chen, Youyu and Huang, Binxiao and Chen, Zhenyu and Wu, Xiaofei and Shi, Boxin}, title = {SpecTRe-GS: Modeling Highly Specular Surfaces with Reflected Nearby Objects by Tracing Rays in 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16133-16142} }
AuraFusion360: Augmented Unseen Region Alignment for Reference-based 360deg Unbounded Scene Inpainting: Chung-Ho Wu,

Yang-Jung Chen,

Ying-Huan Chen,

Jie-Ying Lee,

Bo-Hsu Ke,

Chun-Wei Tuan Mu,

Yi-Chuan Huang,

Chin-Yang Lin,

Min-Hung Chen,

Yen-Yu Lin,

Yu-Lun Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Chung-Ho and Chen, Yang-Jung and Chen, Ying-Huan and Lee, Jie-Ying and Ke, Bo-Hsu and Mu, Chun-Wei Tuan and Huang, Yi-Chuan and Lin, Chin-Yang and Chen, Min-Hung and Lin, Yen-Yu and Liu, Yu-Lun}, title = {AuraFusion360: Augmented Unseen Region Alignment for Reference-based 360deg Unbounded Scene Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16366-16376} }
Language-Guided Image Tokenization for Generation: Kaiwen Zha,

Lijun Yu,

Alireza Fathi,

David A. Ross,

Cordelia Schmid,

Dina Katabi,

Xiuye Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zha_2025_CVPR, author = {Zha, Kaiwen and Yu, Lijun and Fathi, Alireza and Ross, David A. and Schmid, Cordelia and Katabi, Dina and Gu, Xiuye}, title = {Language-Guided Image Tokenization for Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15713-15722} }
Hyperbolic Uncertainty-Aware Few-Shot Incremental Point Cloud Segmentation: Tanuj Sur,

Samrat Mukherjee,

Kaizer Rahaman,

Subhasis Chaudhuri,

Muhammad Haris Khan,

Biplab Banerjee; [pdf] [supp]
[bibtex]
@InProceedings{Sur_2025_CVPR, author = {Sur, Tanuj and Mukherjee, Samrat and Rahaman, Kaizer and Chaudhuri, Subhasis and Khan, Muhammad Haris and Banerjee, Biplab}, title = {Hyperbolic Uncertainty-Aware Few-Shot Incremental Point Cloud Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11810-11821} }
D^3-Human: Dynamic Disentangled Digital Human from Monocular Video: Honghu Chen,

Bo Peng,

Yunfan Tao,

Juyong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Honghu and Peng, Bo and Tao, Yunfan and Zhang, Juyong}, title = {D{\textasciicircum}3-Human: Dynamic Disentangled Digital Human from Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10836-10846} }
Curriculum Coarse-to-Fine Selection for High-IPC Dataset Distillation: Yanda Chen,

Gongwei Chen,

Miao Zhang,

Weili Guan,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Yanda and Chen, Gongwei and Zhang, Miao and Guan, Weili and Nie, Liqiang}, title = {Curriculum Coarse-to-Fine Selection for High-IPC Dataset Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20437-20446} }
BADGR: Bundle Adjustment Diffusion Conditioned by Gradients for Wide-Baseline Floor Plan Reconstruction: Yuguang Li,

Ivaylo Boyadzhiev,

Zixuan Liu,

Linda Shapiro,

Alex Colburn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yuguang and Boyadzhiev, Ivaylo and Liu, Zixuan and Shapiro, Linda and Colburn, Alex}, title = {BADGR: Bundle Adjustment Diffusion Conditioned by Gradients for Wide-Baseline Floor Plan Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16785-16795} }
Three Cars Approaching within 100m! Enhancing Distant Geometry by Tri-Axis Voxel Scanning for Camera-based Semantic Scene Completion: Jongseong Bae,

Junwoo Ha,

Ha Young Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bae_2025_CVPR, author = {Bae, Jongseong and Ha, Junwoo and Kim, Ha Young}, title = {Three Cars Approaching within 100m! Enhancing Distant Geometry by Tri-Axis Voxel Scanning for Camera-based Semantic Scene Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11939-11948} }
DViN: Dynamic Visual Routing Network for Weakly Supervised Referring Expression Comprehension: Xiaofu Chen,

Yaxin Luo,

Gen Luo,

Jiayi Ji,

Henghui Ding,

Yiyi Zhou; [pdf]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xiaofu and Luo, Yaxin and Luo, Gen and Ji, Jiayi and Ding, Henghui and Zhou, Yiyi}, title = {DViN: Dynamic Visual Routing Network for Weakly Supervised Referring Expression Comprehension}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14347-14357} }
Spiking Transformer with Spatial-Temporal Attention: Donghyun Lee,

Yuhang Li,

Youngeun Kim,

Shiting Xiao,

Priyadarshini Panda; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Donghyun and Li, Yuhang and Kim, Youngeun and Xiao, Shiting and Panda, Priyadarshini}, title = {Spiking Transformer with Spatial-Temporal Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13948-13958} }
Perceptual Video Compression with Neural Wrapping: Muhammad Umar Karim Khan,

Aaron Chadha,

Mohammad Ashraful Anam,

Yiannis Andreopoulos; [pdf] [supp]
[bibtex]
@InProceedings{Khan_2025_CVPR, author = {Khan, Muhammad Umar Karim and Chadha, Aaron and Anam, Mohammad Ashraful and Andreopoulos, Yiannis}, title = {Perceptual Video Compression with Neural Wrapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17743-17754} }
ViKIENet: Towards Efficient 3D Object Detection with Virtual Key Instance Enhanced Network: Zhuochen Yu,

Bijie Qiu,

Andy W. H. Khong; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Zhuochen and Qiu, Bijie and Khong, Andy W. H.}, title = {ViKIENet: Towards Efficient 3D Object Detection with Virtual Key Instance Enhanced Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11844-11853} }
Data-free Universal Adversarial Perturbation with Pseudo-semantic Prior: Chanhui Lee,

Yeonghwan Song,

Jeany Son; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Chanhui and Song, Yeonghwan and Son, Jeany}, title = {Data-free Universal Adversarial Perturbation with Pseudo-semantic Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13907-13916} }
FRAME: Floor-aligned Representation for Avatar Motion from Egocentric Video: Andrea Boscolo Camiletto,

Jian Wang,

Eduardo Alvarado,

Rishabh Dabral,

Thabo Beeler,

Marc Habermann,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Camiletto_2025_CVPR, author = {Camiletto, Andrea Boscolo and Wang, Jian and Alvarado, Eduardo and Dabral, Rishabh and Beeler, Thabo and Habermann, Marc and Theobalt, Christian}, title = {FRAME: Floor-aligned Representation for Avatar Motion from Egocentric Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17497-17507} }
Generalized Zero-Shot Classification via Semantics-Free Inter-Class Feature Generation: Libiao Chen,

Dong Nie,

Junjun Pan,

Jing Yan,

Zhenyu Tang; [pdf]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Libiao and Nie, Dong and Pan, Junjun and Yan, Jing and Tang, Zhenyu}, title = {Generalized Zero-Shot Classification via Semantics-Free Inter-Class Feature Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20286-20295} }
Multi-Modal Aerial-Ground Cross-View Place Recognition with Neural ODEs: Sijie Wang,

Rui She,

Qiyu Kang,

Siqi Li,

Disheng Li,

Tianyu Geng,

Shangshu Yu,

Wee Peng Tay; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Sijie and She, Rui and Kang, Qiyu and Li, Siqi and Li, Disheng and Geng, Tianyu and Yu, Shangshu and Tay, Wee Peng}, title = {Multi-Modal Aerial-Ground Cross-View Place Recognition with Neural ODEs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11717-11728} }
MaDCoW: Marginal Distortion Correction for Wide-Angle Photography with Arbitrary Objects: Kevin Zhang,

Jia-Bin Huang,

Jose Echevarria,

Stephen DiVerdi,

Aaron Hertzmann; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Kevin and Huang, Jia-Bin and Echevarria, Jose and DiVerdi, Stephen and Hertzmann, Aaron}, title = {MaDCoW: Marginal Distortion Correction for Wide-Angle Photography with Arbitrary Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10923-10932} }
Any6D: Model-free 6D Pose Estimation of Novel Objects: Taeyeop Lee,

Bowen Wen,

Minjun Kang,

Gyuree Kang,

In So Kweon,

Kuk-Jin Yoon; [pdf] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Taeyeop and Wen, Bowen and Kang, Minjun and Kang, Gyuree and Kweon, In So and Yoon, Kuk-Jin}, title = {Any6D: Model-free 6D Pose Estimation of Novel Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11633-11643} }
DrVideo: Document Retrieval Based Long Video Understanding: Ziyu Ma,

Chenhui Gou,

Hengcan Shi,

Bin Sun,

Shutao Li,

Hamid Rezatofighi,

Jianfei Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Ziyu and Gou, Chenhui and Shi, Hengcan and Sun, Bin and Li, Shutao and Rezatofighi, Hamid and Cai, Jianfei}, title = {DrVideo: Document Retrieval Based Long Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18936-18946} }
Buffer Anytime: Zero-Shot Video Depth and Normal from Image Priors: Zhengfei Kuang,

Tianyuan Zhang,

Kai Zhang,

Hao Tan,

Sai Bi,

Yiwei Hu,

Zexiang Xu,

Milos Hasan,

Gordon Wetzstein,

Fujun Luan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kuang_2025_CVPR, author = {Kuang, Zhengfei and Zhang, Tianyuan and Zhang, Kai and Tan, Hao and Bi, Sai and Hu, Yiwei and Xu, Zexiang and Hasan, Milos and Wetzstein, Gordon and Luan, Fujun}, title = {Buffer Anytime: Zero-Shot Video Depth and Normal from Image Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17660-17670} }
PSHuman: Photorealistic Single-image 3D Human Reconstruction using Cross-Scale Multiview Diffusion and Explicit Remeshing: Peng Li,

Wangguandong Zheng,

Yuan Liu,

Tao Yu,

Yangguang Li,

Xingqun Qi,

Xiaowei Chi,

Siyu Xia,

Yan-Pei Cao,

Wei Xue,

Wenhan Luo,

Yike Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Peng and Zheng, Wangguandong and Liu, Yuan and Yu, Tao and Li, Yangguang and Qi, Xingqun and Chi, Xiaowei and Xia, Siyu and Cao, Yan-Pei and Xue, Wei and Luo, Wenhan and Guo, Yike}, title = {PSHuman: Photorealistic Single-image 3D Human Reconstruction using Cross-Scale Multiview Diffusion and Explicit Remeshing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16008-16018} }
Hiding Images in Diffusion Models by Editing Learned Score Functions: Haoyu Chen,

Yunqiao Yang,

Nan Zhong,

Kede Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Haoyu and Yang, Yunqiao and Zhong, Nan and Ma, Kede}, title = {Hiding Images in Diffusion Models by Editing Learned Score Functions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18663-18673} }
WeatherGen: A Unified Diverse Weather Generator for LiDAR Point Clouds via Spider Mamba Diffusion: Yang Wu,

Yun Zhu,

Kaihua Zhang,

Jianjun Qian,

Jin Xie,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Yang and Zhu, Yun and Zhang, Kaihua and Qian, Jianjun and Xie, Jin and Yang, Jian}, title = {WeatherGen: A Unified Diverse Weather Generator for LiDAR Point Clouds via Spider Mamba Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17019-17028} }
MUST: The First Dataset and Unified Framework for Multispectral UAV Single Object Tracking: Haolin Qin,

Tingfa Xu,

Tianhao Li,

Zhenxiang Chen,

Tao Feng,

Jianan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Haolin and Xu, Tingfa and Li, Tianhao and Chen, Zhenxiang and Feng, Tao and Li, Jianan}, title = {MUST: The First Dataset and Unified Framework for Multispectral UAV Single Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16882-16891} }
Tightening Robustness Verification of MaxPool-based Neural Networks via Minimizing the Over-Approximation Zone: Yuan Xiao,

Yuchen Chen,

Shiqing Ma,

Chunrong Fang,

Tongtong Bai,

Mingzheng Gu,

Yuxin Cheng,

Yanwei Chen,

Zhenyu Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Yuan and Chen, Yuchen and Ma, Shiqing and Fang, Chunrong and Bai, Tongtong and Gu, Mingzheng and Cheng, Yuxin and Chen, Yanwei and Chen, Zhenyu}, title = {Tightening Robustness Verification of MaxPool-based Neural Networks via Minimizing the Over-Approximation Zone}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20695-20705} }
PhysicsGen: Can Generative Models Learn from Images to Predict Complex Physical Relations?: Martin Spitznagel,

Jan Vaillant,

Janis Keuper; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Spitznagel_2025_CVPR, author = {Spitznagel, Martin and Vaillant, Jan and Keuper, Janis}, title = {PhysicsGen: Can Generative Models Learn from Images to Predict Complex Physical Relations?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11125-11134} }
Spectral Informed Mamba for Robust Point Cloud Processing: Ali Bahri,

Moslem Yazdanpanah,

Mehrdad Noori,

Sahar Dastani,

Milad Cheraghalikhani,

Gustavo Adolfo Vargas Hakim,

David Osowiechi,

Farzad Beizaee,

Ismail Ben Ayed,

Christian Desrosiers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bahri_2025_CVPR, author = {Bahri, Ali and Yazdanpanah, Moslem and Noori, Mehrdad and Dastani, Sahar and Cheraghalikhani, Milad and Hakim, Gustavo Adolfo Vargas and Osowiechi, David and Beizaee, Farzad and Ben Ayed, Ismail and Desrosiers, Christian}, title = {Spectral Informed Mamba for Robust Point Cloud Processing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11799-11809} }
BlobGEN-Vid: Compositional Text-to-Video Generation with Blob Video Representations: Weixi Feng,

Chao Liu,

Sifei Liu,

William Yang Wang,

Arash Vahdat,

Weili Nie; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Weixi and Liu, Chao and Liu, Sifei and Wang, William Yang and Vahdat, Arash and Nie, Weili}, title = {BlobGEN-Vid: Compositional Text-to-Video Generation with Blob Video Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12989-12998} }
D2SP: Dynamic Dual-Stage Purification Framework for Dual Noise Mitigation in Vision-based Affective Recognition.: Haoran Wang,

Xinji Mai,

Zeng Tao,

Xuan Tong,

Junxiong Lin,

Yan Wang,

Jiawen Yu,

Shaoqi Yan,

Ziheng Zhou,

Wenqiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Haoran and Mai, Xinji and Tao, Zeng and Tong, Xuan and Lin, Junxiong and Wang, Yan and Yu, Jiawen and Yan, Shaoqi and Zhou, Ziheng and Zhang, Wenqiang}, title = {D2SP: Dynamic Dual-Stage Purification Framework for Dual Noise Mitigation in Vision-based Affective Recognition.}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19218-19229} }
LaVin-DiT: Large Vision Diffusion Transformer: Zhaoqing Wang,

Xiaobo Xia,

Runnan Chen,

Dongdong Yu,

Changhu Wang,

Mingming Gong,

Tongliang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhaoqing and Xia, Xiaobo and Chen, Runnan and Yu, Dongdong and Wang, Changhu and Gong, Mingming and Liu, Tongliang}, title = {LaVin-DiT: Large Vision Diffusion Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20060-20070} }
CAP-Net: A Unified Network for 6D Pose and Size Estimation of Categorical Articulated Parts from a Single RGB-D Image: Jingshun Huang,

Haitao Lin,

Tianyu Wang,

Yanwei Fu,

Xiangyang Xue,

Yi Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Jingshun and Lin, Haitao and Wang, Tianyu and Fu, Yanwei and Xue, Xiangyang and Zhu, Yi}, title = {CAP-Net: A Unified Network for 6D Pose and Size Estimation of Categorical Articulated Parts from a Single RGB-D Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11654-11664} }
Aesthetic Post-Training Diffusion Models from Generic Preferences with Step-by-step Preference Optimization: Zhanhao Liang,

Yuhui Yuan,

Shuyang Gu,

Bohan Chen,

Tiankai Hang,

Mingxi Cheng,

Ji Li,

Liang Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Zhanhao and Yuan, Yuhui and Gu, Shuyang and Chen, Bohan and Hang, Tiankai and Cheng, Mingxi and Li, Ji and Zheng, Liang}, title = {Aesthetic Post-Training Diffusion Models from Generic Preferences with Step-by-step Preference Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13199-13208} }
BARD-GS: Blur-Aware Reconstruction of Dynamic Scenes via Gaussian Splatting: Yiren Lu,

Yunlai Zhou,

Disheng Liu,

Tuo Liang,

Yu Yin; [pdf]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Yiren and Zhou, Yunlai and Liu, Disheng and Liang, Tuo and Yin, Yu}, title = {BARD-GS: Blur-Aware Reconstruction of Dynamic Scenes via Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16532-16542} }
DiN: Diffusion Model for Robust Medical VQA with Semantic Noisy Labels: Erjian Guo,

Zhen Zhao,

Zicheng Wang,

Tong Chen,

Yunyi Liu,

Luping Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Erjian and Zhao, Zhen and Wang, Zicheng and Chen, Tong and Liu, Yunyi and Zhou, Luping}, title = {DiN: Diffusion Model for Robust Medical VQA with Semantic Noisy Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14337-14346} }
S^3-Face: SSS-Compliant Facial Reflectance Estimation via Diffusion Priors: Xingyu Ren,

Jiankang Deng,

Yuhao Cheng,

Wenhan Zhu,

Yichao Yan,

Xiaokang Yang,

Stefanos Zafeiriou,

Chao Ma; [pdf] [supp]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Xingyu and Deng, Jiankang and Cheng, Yuhao and Zhu, Wenhan and Yan, Yichao and Yang, Xiaokang and Zafeiriou, Stefanos and Ma, Chao}, title = {S{\textasciicircum}3-Face: SSS-Compliant Facial Reflectance Estimation via Diffusion Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16051-16060} }
FSBench: A Figure Skating Benchmark for Advancing Artistic Sports Understanding: Rong Gao,

Xin Liu,

Zhuozhao Hu,

Bohao Xing,

Baiqiang Xia,

Zitong Yu,

Heikki Kälviäinen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Rong and Liu, Xin and Hu, Zhuozhao and Xing, Bohao and Xia, Baiqiang and Yu, Zitong and K\"alvi\"ainen, Heikki}, title = {FSBench: A Figure Skating Benchmark for Advancing Artistic Sports Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13595-13605} }
Keep the Balance: A Parameter-Efficient Symmetrical Framework for RGB+X Semantic Segmentation: Jiaxin Cai,

Jingze Su,

Qi Li,

Wenjie Yang,

Shu Wang,

Tiesong Zhao,

Shengfeng He,

Wenxi Liu; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Jiaxin and Su, Jingze and Li, Qi and Yang, Wenjie and Wang, Shu and Zhao, Tiesong and He, Shengfeng and Liu, Wenxi}, title = {Keep the Balance: A Parameter-Efficient Symmetrical Framework for RGB+X Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10587-10598} }
LLM-driven Multimodal and Multi-Identity Listening Head Generation: Peiwen Lai,

Weizhi Zhong,

Yipeng Qin,

Xiaohang Ren,

Baoyuan Wang,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Peiwen and Zhong, Weizhi and Qin, Yipeng and Ren, Xiaohang and Wang, Baoyuan and Li, Guanbin}, title = {LLM-driven Multimodal and Multi-Identity Listening Head Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10656-10666} }
OffsetOPT: Explicit Surface Reconstruction without Normals: Huan Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2025_CVPR, author = {Lei, Huan}, title = {OffsetOPT: Explicit Surface Reconstruction without Normals}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11729-11738} }
Any-Resolution AI-Generated Image Detection by Spectral Learning: Dimitrios Karageorgiou,

Symeon Papadopoulos,

Ioannis Kompatsiaris,

Efstratios Gavves; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karageorgiou_2025_CVPR, author = {Karageorgiou, Dimitrios and Papadopoulos, Symeon and Kompatsiaris, Ioannis and Gavves, Efstratios}, title = {Any-Resolution AI-Generated Image Detection by Spectral Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18706-18717} }
STOP: Integrated Spatial-Temporal Dynamic Prompting for Video Understanding: Zichen Liu,

Kunlun Xu,

Bing Su,

Xu Zou,

Yuxin Peng,

Jiahuan Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zichen and Xu, Kunlun and Su, Bing and Zou, Xu and Peng, Yuxin and Zhou, Jiahuan}, title = {STOP: Integrated Spatial-Temporal Dynamic Prompting for Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13776-13786} }
TimeTracker: Event-based Continuous Point Tracking for Video Frame Interpolation with Non-linear Motion: Haoyue Liu,

Jinghan Xu,

Yi Chang,

Hanyu Zhou,

Haozhi Zhao,

Lin Wang,

Luxin Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Haoyue and Xu, Jinghan and Chang, Yi and Zhou, Hanyu and Zhao, Haozhi and Wang, Lin and Yan, Luxin}, title = {TimeTracker: Event-based Continuous Point Tracking for Video Frame Interpolation with Non-linear Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17649-17659} }
Shading Meets Motion: Self-supervised Indoor 3D Reconstruction Via Simultaneous Shape-from-Shading and Structure-from-Motion: Guoyu Lu; [pdf]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Guoyu}, title = {Shading Meets Motion: Self-supervised Indoor 3D Reconstruction Via Simultaneous Shape-from-Shading and Structure-from-Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16508-16519} }
Believing is Seeing: Unobserved Object Detection using Generative Models: Subhransu S. Bhattacharjee,

Dylan Campbell,

Rahul Shome; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bhattacharjee_2025_CVPR, author = {Bhattacharjee, Subhransu S. and Campbell, Dylan and Shome, Rahul}, title = {Believing is Seeing: Unobserved Object Detection using Generative Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19366-19377} }
NLPrompt: Noise-Label Prompt Learning for Vision-Language Models: Bikang Pan,

Qun Li,

Xiaoying Tang,

Wei Huang,

Zhen Fang,

Feng Liu,

Jingya Wang,

Jingyi Yu,

Ye Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_CVPR, author = {Pan, Bikang and Li, Qun and Tang, Xiaoying and Huang, Wei and Fang, Zhen and Liu, Feng and Wang, Jingya and Yu, Jingyi and Shi, Ye}, title = {NLPrompt: Noise-Label Prompt Learning for Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19963-19973} }
PBR-NeRF: Inverse Rendering with Physics-Based Neural Fields: Sean Wu,

Shamik Basu,

Tim Broedermann,

Luc Van Gool,

Christos Sakaridis; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Sean and Basu, Shamik and Broedermann, Tim and Van Gool, Luc and Sakaridis, Christos}, title = {PBR-NeRF: Inverse Rendering with Physics-Based Neural Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10974-10984} }
No Pains, More Gains: Recycling Sub-Salient Patches for Efficient High-Resolution Image Recognition: Rong Qin,

Xin Liu,

Xingyu Liu,

Jiaxuan Liu,

Jinglei Shi,

Liang Lin,

Jufeng Yang; [pdf]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Rong and Liu, Xin and Liu, Xingyu and Liu, Jiaxuan and Shi, Jinglei and Lin, Liang and Yang, Jufeng}, title = {No Pains, More Gains: Recycling Sub-Salient Patches for Efficient High-Resolution Image Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14965-14975} }
ClearSight: Visual Signal Enhancement for Object Hallucination Mitigation in Multimodal Large Language Models: Hao Yin,

Guangzong Si,

Zilei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Hao and Si, Guangzong and Wang, Zilei}, title = {ClearSight: Visual Signal Enhancement for Object Hallucination Mitigation in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14625-14634} }
TacoDepth: Towards Efficient Radar-Camera Depth Estimation with One-stage Fusion: Yiran Wang,

Jiaqi Li,

Chaoyi Hong,

Ruibo Li,

Liusheng Sun,

Xiao Song,

Zhe Wang,

Zhiguo Cao,

Guosheng Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yiran and Li, Jiaqi and Hong, Chaoyi and Li, Ruibo and Sun, Liusheng and Song, Xiao and Wang, Zhe and Cao, Zhiguo and Lin, Guosheng}, title = {TacoDepth: Towards Efficient Radar-Camera Depth Estimation with One-stage Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10523-10533} }
Physical Plausibility-aware Trajectory Prediction via Locomotion Embodiment: Hiromu Taketsugu,

Takeru Oba,

Takahiro Maeda,

Shohei Nobuhara,

Norimichi Ukita; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Taketsugu_2025_CVPR, author = {Taketsugu, Hiromu and Oba, Takeru and Maeda, Takahiro and Nobuhara, Shohei and Ukita, Norimichi}, title = {Physical Plausibility-aware Trajectory Prediction via Locomotion Embodiment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12324-12334} }
AvatarArtist: Open-Domain 4D Avatarization: Hongyu Liu,

Xuan Wang,

Ziyu Wan,

Yue Ma,

Jingye Chen,

Yanbo Fan,

Yujun Shen,

Yibing Song,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Hongyu and Wang, Xuan and Wan, Ziyu and Ma, Yue and Chen, Jingye and Fan, Yanbo and Shen, Yujun and Song, Yibing and Chen, Qifeng}, title = {AvatarArtist: Open-Domain 4D Avatarization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10758-10769} }
Using Powerful Prior Knowledge of Diffusion Model in Deep Unfolding Networks for Image Compressive Sensing: Chen Liao,

Yan Shen,

Dan Li,

Zhongli Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Chen and Shen, Yan and Li, Dan and Wang, Zhongli}, title = {Using Powerful Prior Knowledge of Diffusion Model in Deep Unfolding Networks for Image Compressive Sensing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18000-18010} }
UniGoal: Towards Universal Zero-shot Goal-oriented Navigation: Hang Yin,

Xiuwei Xu,

Linqing Zhao,

Ziwei Wang,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Hang and Xu, Xiuwei and Zhao, Linqing and Wang, Ziwei and Zhou, Jie and Lu, Jiwen}, title = {UniGoal: Towards Universal Zero-shot Goal-oriented Navigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19057-19066} }
Noise-Consistent Siamese-Diffusion for Medical Image Synthesis and Segmentation: Kunpeng Qiu,

Zhiqiang Gao,

Zhiying Zhou,

Mingjie Sun,

Yongxin Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2025_CVPR, author = {Qiu, Kunpeng and Gao, Zhiqiang and Zhou, Zhiying and Sun, Mingjie and Guo, Yongxin}, title = {Noise-Consistent Siamese-Diffusion for Medical Image Synthesis and Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15672-15681} }
DefectFill: Realistic Defect Generation with Inpainting Diffusion Model for Visual Inspection: Jaewoo Song,

Daemin Park,

Kanghyun Baek,

Sangyub Lee,

Jooyoung Choi,

Eunji Kim,

Sungroh Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Jaewoo and Park, Daemin and Baek, Kanghyun and Lee, Sangyub and Choi, Jooyoung and Kim, Eunji and Yoon, Sungroh}, title = {DefectFill: Realistic Defect Generation with Inpainting Diffusion Model for Visual Inspection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18718-18727} }
Less is More: Efficient Image Vectorization with Adaptive Parameterization: Kaibo Zhao,

Liang Bao,

Yufei Li,

Xu Su,

Ke Zhang,

Xiaotian Qiao; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Kaibo and Bao, Liang and Li, Yufei and Su, Xu and Zhang, Ke and Qiao, Xiaotian}, title = {Less is More: Efficient Image Vectorization with Adaptive Parameterization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18166-18175} }
FedMIA: An Effective Membership Inference Attack Exploiting "All for One" Principle in Federated Learning: Gongxi Zhu,

Donghao Li,

Hanlin Gu,

Yuan Yao,

Lixin Fan,

Yuxing Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Gongxi and Li, Donghao and Gu, Hanlin and Yao, Yuan and Fan, Lixin and Han, Yuxing}, title = {FedMIA: An Effective Membership Inference Attack Exploiting ''All for One'' Principle in Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20643-20653} }
DPFlow: Adaptive Optical Flow Estimation with a Dual-Pyramid Framework: Henrique Morimitsu,

Xiaobin Zhu,

Roberto M. Cesar,

Xiangyang Ji,

Xu-Cheng Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Morimitsu_2025_CVPR, author = {Morimitsu, Henrique and Zhu, Xiaobin and Cesar, Roberto M. and Ji, Xiangyang and Yin, Xu-Cheng}, title = {DPFlow: Adaptive Optical Flow Estimation with a Dual-Pyramid Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17810-17820} }
DocSAM: Unified Document Image Segmentation via Query Decomposition and Heterogeneous Mixed Learning: Xiao-Hui Li,

Fei Yin,

Cheng-Lin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xiao-Hui and Yin, Fei and Liu, Cheng-Lin}, title = {DocSAM: Unified Document Image Segmentation via Query Decomposition and Heterogeneous Mixed Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15021-15032} }
Spatiotemporal Skip Guidance for Enhanced Video Diffusion Sampling: Junha Hyung,

Kinam Kim,

Susung Hong,

Min-Jung Kim,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hyung_2025_CVPR, author = {Hyung, Junha and Kim, Kinam and Hong, Susung and Kim, Min-Jung and Choo, Jaegul}, title = {Spatiotemporal Skip Guidance for Enhanced Video Diffusion Sampling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11006-11015} }
ODE: Open-Set Evaluation of Hallucinations in Multimodal Large Language Models: Yahan Tu,

Rui Hu,

Jitao Sang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tu_2025_CVPR, author = {Tu, Yahan and Hu, Rui and Sang, Jitao}, title = {ODE: Open-Set Evaluation of Hallucinations in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19836-19845} }
Masking meets Supervision: A Strong Learning Alliance: Byeongho Heo,

Taekyung Kim,

Sangdoo Yun,

Dongyoon Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Heo_2025_CVPR, author = {Heo, Byeongho and Kim, Taekyung and Yun, Sangdoo and Han, Dongyoon}, title = {Masking meets Supervision: A Strong Learning Alliance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20447-20457} }
DI-PCG: Diffusion-based Efficient Inverse Procedural Content Generation for High-quality 3D Asset Creation: Wang Zhao,

Yan-Pei Cao,

Jiale Xu,

Yuejiang Dong,

Ying Shan; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Wang and Cao, Yan-Pei and Xu, Jiale and Dong, Yuejiang and Shan, Ying}, title = {DI-PCG: Diffusion-based Efficient Inverse Procedural Content Generation for High-quality 3D Asset Creation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11061-11072} }
Notes-guided MLLM Reasoning: Enhancing MLLM with Knowledge and Visual Notes for Visual Question Answering: Wenlong Fang,

Qiaofeng Wu,

Jing Chen,

Yun Xue; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Wenlong and Wu, Qiaofeng and Chen, Jing and Xue, Yun}, title = {Notes-guided MLLM Reasoning: Enhancing MLLM with Knowledge and Visual Notes for Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19597-19607} }
UniRestore: Unified Perceptual and Task-Oriented Image Restoration Model Using Diffusion Prior: I-Hsiang Chen,

Wei-Ting Chen,

Yu-Wei Liu,

Yuan-Chun Chiang,

Sy-Yen Kuo,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, I-Hsiang and Chen, Wei-Ting and Liu, Yu-Wei and Chiang, Yuan-Chun and Kuo, Sy-Yen and Yang, Ming-Hsuan}, title = {UniRestore: Unified Perceptual and Task-Oriented Image Restoration Model Using Diffusion Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17969-17979} }
Condensing Action Segmentation Datasets via Generative Network Inversion: Guodong Ding,

Rongyu Chen,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2025_CVPR, author = {Ding, Guodong and Chen, Rongyu and Yao, Angela}, title = {Condensing Action Segmentation Datasets via Generative Network Inversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17733-17742} }
Can Generative Video Models Help Pose Estimation?: Ruojin Cai,

Jason Y. Zhang,

Philipp Henzler,

Zhengqi Li,

Noah Snavely,

Ricardo Martin-Brualla; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Ruojin and Zhang, Jason Y. and Henzler, Philipp and Li, Zhengqi and Snavely, Noah and Martin-Brualla, Ricardo}, title = {Can Generative Video Models Help Pose Estimation?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16764-16773} }
DriveGPT4-V2: Harnessing Large Language Model Capabilities for Enhanced Closed-Loop Autonomous Driving: Zhenhua Xu,

Yan Bai,

Yujia Zhang,

Zhuoling Li,

Fei Xia,

Kwan-Yee K. Wong,

Jianqiang Wang,

Hengshuang Zhao; [pdf]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Zhenhua and Bai, Yan and Zhang, Yujia and Li, Zhuoling and Xia, Fei and Wong, Kwan-Yee K. and Wang, Jianqiang and Zhao, Hengshuang}, title = {DriveGPT4-V2: Harnessing Large Language Model Capabilities for Enhanced Closed-Loop Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17261-17270} }
High-Fidelity Lightweight Mesh Reconstruction from Point Clouds: Chen Zhang,

Wentao Wang,

Ximeng Li,

Xinyao Liao,

Wanjuan Su,

Wenbing Tao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chen and Wang, Wentao and Li, Ximeng and Liao, Xinyao and Su, Wanjuan and Tao, Wenbing}, title = {High-Fidelity Lightweight Mesh Reconstruction from Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11739-11748} }
MDP: Multidimensional Vision Model Pruning with Latency Constraint: Xinglong Sun,

Barath Lakshmanan,

Maying Shen,

Shiyi Lan,

Jingde Chen,

Jose M. Alvarez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Xinglong and Lakshmanan, Barath and Shen, Maying and Lan, Shiyi and Chen, Jingde and Alvarez, Jose M.}, title = {MDP: Multidimensional Vision Model Pruning with Latency Constraint}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20113-20123} }
OSDFace: One-Step Diffusion Model for Face Restoration: Jingkai Wang,

Jue Gong,

Lin Zhang,

Zheng Chen,

Xing Liu,

Hong Gu,

Yutong Liu,

Yulun Zhang,

Xiaokang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jingkai and Gong, Jue and Zhang, Lin and Chen, Zheng and Liu, Xing and Gu, Hong and Liu, Yutong and Zhang, Yulun and Yang, Xiaokang}, title = {OSDFace: One-Step Diffusion Model for Face Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12626-12636} }
Task Singular Vectors: Reducing Task Interference in Model Merging: Antonio Andrea Gargiulo,

Donato Crisostomi,

Maria Sofia Bucarelli,

Simone Scardapane,

Fabrizio Silvestri,

Emanuele Rodolà; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gargiulo_2025_CVPR, author = {Gargiulo, Antonio Andrea and Crisostomi, Donato and Bucarelli, Maria Sofia and Scardapane, Simone and Silvestri, Fabrizio and Rodol\`a, Emanuele}, title = {Task Singular Vectors: Reducing Task Interference in Model Merging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18695-18705} }
Self-Evolving Visual Concept Library using Vision-Language Critics: Atharva Sehgal,

Patrick Yuan,

Ziniu Hu,

Yisong Yue,

Jennifer J. Sun,

Swarat Chaudhuri; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sehgal_2025_CVPR, author = {Sehgal, Atharva and Yuan, Patrick and Hu, Ziniu and Yue, Yisong and Sun, Jennifer J. and Chaudhuri, Swarat}, title = {Self-Evolving Visual Concept Library using Vision-Language Critics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13124-13134} }
Boosting Point-Supervised Temporal Action Localization through Integrating Query Reformation and Optimal Transport: Mengnan Liu,

Le Wang,

Sanping Zhou,

Kun Xia,

Xiaolong Sun,

Gang Hua; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Mengnan and Wang, Le and Zhou, Sanping and Xia, Kun and Sun, Xiaolong and Hua, Gang}, title = {Boosting Point-Supervised Temporal Action Localization through Integrating Query Reformation and Optimal Transport}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13865-13875} }
Effective Cloud Removal for Remote Sensing Images by an Improved Mean-Reverting Denoising Model with Elucidated Design Space: Yi Liu,

Wengen Li,

Jihong Guan,

Shuigeng Zhou,

Yichao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yi and Li, Wengen and Guan, Jihong and Zhou, Shuigeng and Zhang, Yichao}, title = {Effective Cloud Removal for Remote Sensing Images by an Improved Mean-Reverting Denoising Model with Elucidated Design Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17851-17861} }
OpticalNet: An Optical Imaging Dataset and Benchmark Beyond the Diffraction Limit: Benquan Wang,

Ruyi An,

Jin-Kyu So,

Sergei Kurdiumov,

Eng Aik Chan,

Giorgio Adamo,

Yuhan Peng,

Yewen Li,

Bo An; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Benquan and An, Ruyi and So, Jin-Kyu and Kurdiumov, Sergei and Chan, Eng Aik and Adamo, Giorgio and Peng, Yuhan and Li, Yewen and An, Bo}, title = {OpticalNet: An Optical Imaging Dataset and Benchmark Beyond the Diffraction Limit}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10900-10912} }
Empowering Vector Graphics with Consistently Arbitrary Viewing and View-dependent Visibility: Yidi Li,

Jun Xiao,

Zhengda Lu,

Yiqun Wang,

Haiyong Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yidi and Xiao, Jun and Lu, Zhengda and Wang, Yiqun and Jiang, Haiyong}, title = {Empowering Vector Graphics with Consistently Arbitrary Viewing and View-dependent Visibility}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18531-18540} }
DIFFER: Disentangling Identity Features via Semantic Cues for Clothes-Changing Person Re-ID: Xin Liang,

Yogesh S Rawat; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Xin and Rawat, Yogesh S}, title = {DIFFER: Disentangling Identity Features via Semantic Cues for Clothes-Changing Person Re-ID}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13980-13989} }
HyperPose: Hypernetwork-Infused Camera Pose Localization and an Extended Cambridge Landmarks Dataset: Ron Ferens,

Yosi Keller; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ferens_2025_CVPR, author = {Ferens, Ron and Keller, Yosi}, title = {HyperPose: Hypernetwork-Infused Camera Pose Localization and an Extended Cambridge Landmarks Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11547-11557} }
Mono3DVLT: Monocular-Video-Based 3D Visual Language Tracking: Hongkai Wei,

Yang Yang,

Shijie Sun,

Mingtao Feng,

Xiangyu Song,

Qi Lei,

Hongli Hu,

Rong Wang,

Huansheng Song,

Naveed Akhtar,

Ajmal Saeed Mian; [pdf]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Hongkai and Yang, Yang and Sun, Shijie and Feng, Mingtao and Song, Xiangyu and Lei, Qi and Hu, Hongli and Wang, Rong and Song, Huansheng and Akhtar, Naveed and Mian, Ajmal Saeed}, title = {Mono3DVLT: Monocular-Video-Based 3D Visual Language Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13886-13896} }
Towards Universal Dataset Distillation via Task-Driven Diffusion: Ding Qi,

Jian Li,

Junyao Gao,

Shuguang Dou,

Ying Tai,

Jianlong Hu,

Bo Zhao,

Yabiao Wang,

Chengjie Wang,

Cairong Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2025_CVPR, author = {Qi, Ding and Li, Jian and Gao, Junyao and Dou, Shuguang and Tai, Ying and Hu, Jianlong and Zhao, Bo and Wang, Yabiao and Wang, Chengjie and Zhao, Cairong}, title = {Towards Universal Dataset Distillation via Task-Driven Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10557-10566} }
Parametric Point Cloud Completion for Polygonal Surface Reconstruction: Zhaiyu Chen,

Yuqing Wang,

Liangliang Nan,

Xiao Xiang Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zhaiyu and Wang, Yuqing and Nan, Liangliang and Zhu, Xiao Xiang}, title = {Parametric Point Cloud Completion for Polygonal Surface Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11749-11758} }
SyncSDE: A Probabilistic Framework for Diffusion Synchronization: Hyunjun Lee,

Hyunsoo Lee,

Sookwan Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Hyunjun and Lee, Hyunsoo and Han, Sookwan}, title = {SyncSDE: A Probabilistic Framework for Diffusion Synchronization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17508-17517} }
MCCD: Multi-Agent Collaboration-based Compositional Diffusion for Complex Text-to-Image Generation: Mingcheng Li,

Xiaolu Hou,

Ziyang Liu,

Dingkang Yang,

Ziyun Qian,

Jiawei Chen,

Jinjie Wei,

Yue Jiang,

Qingyao Xu,

Lihua Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Mingcheng and Hou, Xiaolu and Liu, Ziyang and Yang, Dingkang and Qian, Ziyun and Chen, Jiawei and Wei, Jinjie and Jiang, Yue and Xu, Qingyao and Zhang, Lihua}, title = {MCCD: Multi-Agent Collaboration-based Compositional Diffusion for Complex Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13263-13272} }
Dual Semantic Guidance for Open Vocabulary Semantic Segmentation: Zhengyang Wang,

Tingliang Feng,

Fan Lyu,

Fanhua Shang,

Wei Feng,

Liang Wan; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhengyang and Feng, Tingliang and Lyu, Fan and Shang, Fanhua and Feng, Wei and Wan, Liang}, title = {Dual Semantic Guidance for Open Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20212-20222} }
Generalizable Object Keypoint Localization from Generative Priors: Dongkai Wang,

Jiang Duan,

Liangjian Wen,

Shiyu Xuan,

Hao Chen,

Shiliang Zhang; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Dongkai and Duan, Jiang and Wen, Liangjian and Xuan, Shiyu and Chen, Hao and Zhang, Shiliang}, title = {Generalizable Object Keypoint Localization from Generative Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20265-20274} }
FedCALM: Conflict-aware Layer-wise Mitigation for Selective Aggregation in Deeper Personalized Federated Learning: Hao Zheng,

Zhigang Hu,

Liu Yang,

Meiguang Zheng,

Aikun Xu,

Boyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Hao and Hu, Zhigang and Yang, Liu and Zheng, Meiguang and Xu, Aikun and Wang, Boyu}, title = {FedCALM: Conflict-aware Layer-wise Mitigation for Selective Aggregation in Deeper Personalized Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15444-15453} }
CaricatureBooth: Data-Free Interactive Caricature Generation in a Photo Booth: Zhiyu Qu,

Yunqi Miao,

Zhensong Zhang,

Jifei Song,

Jiankang Deng,

Yi-Zhe Song; [pdf] [supp]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Zhiyu and Miao, Yunqi and Zhang, Zhensong and Song, Jifei and Deng, Jiankang and Song, Yi-Zhe}, title = {CaricatureBooth: Data-Free Interactive Caricature Generation in a Photo Booth}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10815-10824} }
FlexGS: Train Once, Deploy Everywhere with Many-in-One Flexible 3D Gaussian Splatting: Hengyu Liu,

Yuehao Wang,

Chenxin Li,

Ruisi Cai,

Kevin Wang,

Wuyang Li,

Pavlo Molchanov,

Peihao Wang,

Zhangyang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Hengyu and Wang, Yuehao and Li, Chenxin and Cai, Ruisi and Wang, Kevin and Li, Wuyang and Molchanov, Pavlo and Wang, Peihao and Wang, Zhangyang}, title = {FlexGS: Train Once, Deploy Everywhere with Many-in-One Flexible 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16336-16345} }
Generalizing Deepfake Video Detection with Plug-and-Play: Video-Level Blending and Spatiotemporal Adapter Tuning: Zhiyuan Yan,

Yandan Zhao,

Shen Chen,

Mingyi Guo,

Xinghe Fu,

Taiping Yao,

Shouhong Ding,

Yunsheng Wu,

Li Yuan; [pdf]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Zhiyuan and Zhao, Yandan and Chen, Shen and Guo, Mingyi and Fu, Xinghe and Yao, Taiping and Ding, Shouhong and Wu, Yunsheng and Yuan, Li}, title = {Generalizing Deepfake Video Detection with Plug-and-Play: Video-Level Blending and Spatiotemporal Adapter Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12615-12625} }
T2ISafety: Benchmark for Assessing Fairness, Toxicity, and Privacy in Image Generation: Lijun Li,

Zhelun Shi,

Xuhao Hu,

Bowen Dong,

Yiran Qin,

Xihui Liu,

Lu Sheng,

Jing Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Lijun and Shi, Zhelun and Hu, Xuhao and Dong, Bowen and Qin, Yiran and Liu, Xihui and Sheng, Lu and Shao, Jing}, title = {T2ISafety: Benchmark for Assessing Fairness, Toxicity, and Privacy in Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13381-13392} }
Make It Count: Text-to-Image Generation with an Accurate Number of Objects: Lital Binyamin,

Yoad Tewel,

Hilit Segev,

Eran Hirsch,

Royi Rassin,

Gal Chechik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Binyamin_2025_CVPR, author = {Binyamin, Lital and Tewel, Yoad and Segev, Hilit and Hirsch, Eran and Rassin, Royi and Chechik, Gal}, title = {Make It Count: Text-to-Image Generation with an Accurate Number of Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13242-13251} }
TraF-Align: Trajectory-aware Feature Alignment for Asynchronous Multi-agent Perception: Zhiying Song,

Lei Yang,

Fuxi Wen,

Jun Li; [pdf] [supp]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Zhiying and Yang, Lei and Wen, Fuxi and Li, Jun}, title = {TraF-Align: Trajectory-aware Feature Alignment for Asynchronous Multi-agent Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12048-12057} }
DreamCache: Finetuning-Free Lightweight Personalized Image Generation via Feature Caching: Emanuele Aiello,

Umberto Michieli,

Diego Valsesia,

Mete Ozay,

Enrico Magli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aiello_2025_CVPR, author = {Aiello, Emanuele and Michieli, Umberto and Valsesia, Diego and Ozay, Mete and Magli, Enrico}, title = {DreamCache: Finetuning-Free Lightweight Personalized Image Generation via Feature Caching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12480-12489} }
FlexUOD: The Answer to Real-world Unsupervised Image Outlier Detection: Zhonghang Liu,

Kun Zhou,

Changshuo Wang,

Wen-Yan Lin,

Jiangbo Lu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhonghang and Zhou, Kun and Wang, Changshuo and Lin, Wen-Yan and Lu, Jiangbo}, title = {FlexUOD: The Answer to Real-world Unsupervised Image Outlier Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15183-15193} }
Focusing on Tracks for Online Multi-Object Tracking: Kyujin Shim,

Kangwook Ko,

Yujin Yang,

Changick Kim; [pdf] [supp]
[bibtex]
@InProceedings{Shim_2025_CVPR, author = {Shim, Kyujin and Ko, Kangwook and Yang, Yujin and Kim, Changick}, title = {Focusing on Tracks for Online Multi-Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11687-11696} }
Identity-preserving Distillation Sampling by Fixed-Point Iterator: SeonHwa Kim,

Jiwon Kim,

Soobin Park,

Donghoon Ahn,

Jiwon Kang,

Seungryong Kim,

Kyong Hwan Jin,

Eunju Cha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, SeonHwa and Kim, Jiwon and Park, Soobin and Ahn, Donghoon and Kang, Jiwon and Kim, Seungryong and Jin, Kyong Hwan and Cha, Eunju}, title = {Identity-preserving Distillation Sampling by Fixed-Point Iterator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11115-11124} }
WiLoR: End-to-end 3D Hand Localization and Reconstruction in-the-wild: Rolandos Alexandros Potamias,

Jinglei Zhang,

Jiankang Deng,

Stefanos Zafeiriou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Potamias_2025_CVPR, author = {Potamias, Rolandos Alexandros and Zhang, Jinglei and Deng, Jiankang and Zafeiriou, Stefanos}, title = {WiLoR: End-to-end 3D Hand Localization and Reconstruction in-the-wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12242-12254} }
BiomedCoOp: Learning to Prompt for Biomedical Vision-Language Models: Taha Koleilat,

Hojat Asgariandehkordi,

Hassan Rivaz,

Yiming Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koleilat_2025_CVPR, author = {Koleilat, Taha and Asgariandehkordi, Hojat and Rivaz, Hassan and Xiao, Yiming}, title = {BiomedCoOp: Learning to Prompt for Biomedical Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14766-14776} }
MV-SSM: Multi-View State Space Modeling for 3D Human Pose Estimation: Aviral Chharia,

Wenbo Gou,

Haoye Dong; [pdf] [supp]
[bibtex]
@InProceedings{Chharia_2025_CVPR, author = {Chharia, Aviral and Gou, Wenbo and Dong, Haoye}, title = {MV-SSM: Multi-View State Space Modeling for 3D Human Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11590-11599} }
AnySat: One Earth Observation Model for Many Resolutions, Scales, and Modalities: Guillaume Astruc,

Nicolas Gonthier,

Clément Mallet,

Loic Landrieu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Astruc_2025_CVPR, author = {Astruc, Guillaume and Gonthier, Nicolas and Mallet, Cl\'ement and Landrieu, Loic}, title = {AnySat: One Earth Observation Model for Many Resolutions, Scales, and Modalities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19530-19540} }
OVO-Bench: How Far is Your Video-LLMs from Real-World Online Video Understanding?: Junbo Niu,

Yifei Li,

Ziyang Miao,

Chunjiang Ge,

Yuanhang Zhou,

Qihao He,

Xiaoyi Dong,

Haodong Duan,

Shuangrui Ding,

Rui Qian,

Pan Zhang,

Yuhang Zang,

Yuhang Cao,

Conghui He,

Jiaqi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Niu_2025_CVPR, author = {Niu, Junbo and Li, Yifei and Miao, Ziyang and Ge, Chunjiang and Zhou, Yuanhang and He, Qihao and Dong, Xiaoyi and Duan, Haodong and Ding, Shuangrui and Qian, Rui and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and He, Conghui and Wang, Jiaqi}, title = {OVO-Bench: How Far is Your Video-LLMs from Real-World Online Video Understanding?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18902-18913} }
GuardSplat: Efficient and Robust Watermarking for 3D Gaussian Splatting: Zixuan Chen,

Guangcong Wang,

Jiahao Zhu,

Jianhuang Lai,

Xiaohua Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zixuan and Wang, Guangcong and Zhu, Jiahao and Lai, Jianhuang and Xie, Xiaohua}, title = {GuardSplat: Efficient and Robust Watermarking for 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16325-16335} }
RoadSocial: A Diverse VideoQA Dataset and Benchmark for Road Event Understanding from Social Video Narratives: Chirag Parikh,

Deepti Rawat,

Rakshitha R. T.,

Tathagata Ghosh,

Ravi Kiran Sarvadevabhatla; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parikh_2025_CVPR, author = {Parikh, Chirag and Rawat, Deepti and T., Rakshitha R. and Ghosh, Tathagata and Sarvadevabhatla, Ravi Kiran}, title = {RoadSocial: A Diverse VideoQA Dataset and Benchmark for Road Event Understanding from Social Video Narratives}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19002-19011} }
Is Your World Simulator a Good Story Presenter? A Consecutive Events-Based Benchmark for Future Long Video Generation: Yiping Wang,

Xuehai He,

Kuan Wang,

Luyao Ma,

Jianwei Yang,

Shuohang Wang,

Simon Shaolei Du,

Yelong Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yiping and He, Xuehai and Wang, Kuan and Ma, Luyao and Yang, Jianwei and Wang, Shuohang and Du, Simon Shaolei and Shen, Yelong}, title = {Is Your World Simulator a Good Story Presenter? A Consecutive Events-Based Benchmark for Future Long Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13629-13638} }
LookCloser: Frequency-aware Radiance Field for Tiny-Detail Scene: Xiaoyu Zhang,

Weihong Pan,

Chong Bao,

Xiyu Zhang,

Xiaojun Xiang,

Hanqing Jiang,

Hujun Bao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xiaoyu and Pan, Weihong and Bao, Chong and Zhang, Xiyu and Xiang, Xiaojun and Jiang, Hanqing and Bao, Hujun}, title = {LookCloser: Frequency-aware Radiance Field for Tiny-Detail Scene}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16122-16132} }
Convex Relaxation for Robust Vanishing Point Estimation in Manhattan World: Bangyan Liao,

Zhenjun Zhao,

Haoang Li,

Yi Zhou,

Yingping Zeng,

Hao Li,

Peidong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Bangyan and Zhao, Zhenjun and Li, Haoang and Zhou, Yi and Zeng, Yingping and Li, Hao and Liu, Peidong}, title = {Convex Relaxation for Robust Vanishing Point Estimation in Manhattan World}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15823-15832} }
FruitNinja: 3D Object Interior Texture Generation with Gaussian Splatting: Fangyu Wu,

Yuhao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Fangyu and Chen, Yuhao}, title = {FruitNinja: 3D Object Interior Texture Generation with Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11051-11060} }
Take the Bull by the Horns: Learning to Segment Hard Samples: Yuan Guo,

Jingyu Kong,

Yu Wang,

Yuping Duan; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Yuan and Kong, Jingyu and Wang, Yu and Duan, Yuping}, title = {Take the Bull by the Horns: Learning to Segment Hard Samples}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15642-15652} }
EIDT-V: Exploiting Intersections in Diffusion Trajectories for Model-Agnostic, Zero-Shot, Training-Free Text-to-Video Generation: Diljeet Jagpal,

Xi Chen,

Vinay P. Namboodiri; [pdf] [supp]
[bibtex]
@InProceedings{Jagpal_2025_CVPR, author = {Jagpal, Diljeet and Chen, Xi and Namboodiri, Vinay P.}, title = {EIDT-V: Exploiting Intersections in Diffusion Trajectories for Model-Agnostic, Zero-Shot, Training-Free Text-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18219-18228} }
Reproducible Vision-Language Models Meet Concepts Out of Pre-Training: Ziliang Chen,

Xin Huang,

Xiaoxuan Fan,

Keze Wang,

Yuyu Zhou,

Quanlong Guan,

Liang Lin; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Ziliang and Huang, Xin and Fan, Xiaoxuan and Wang, Keze and Zhou, Yuyu and Guan, Quanlong and Lin, Liang}, title = {Reproducible Vision-Language Models Meet Concepts Out of Pre-Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14701-14711} }
Through-The-Mask: Mask-based Motion Trajectories for Image-to-Video Generation: Guy Yariv,

Yuval Kirstain,

Amit Zohar,

Shelly Sheynin,

Yaniv Taigman,

Yossi Adi,

Sagie Benaim,

Adam Polyak; [pdf] [supp]
[bibtex]
@InProceedings{Yariv_2025_CVPR, author = {Yariv, Guy and Kirstain, Yuval and Zohar, Amit and Sheynin, Shelly and Taigman, Yaniv and Adi, Yossi and Benaim, Sagie and Polyak, Adam}, title = {Through-The-Mask: Mask-based Motion Trajectories for Image-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18198-18208} }
MAGE : Single Image to Material-Aware 3D via the Multi-View G-Buffer Estimation Model: Haoyuan Wang,

Zhenwei Wang,

Xiaoxiao Long,

Cheng Lin,

Gerhard Hancke,

Rynson W.H. Lau; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Haoyuan and Wang, Zhenwei and Long, Xiaoxiao and Lin, Cheng and Hancke, Gerhard and Lau, Rynson W.H.}, title = {MAGE : Single Image to Material-Aware 3D via the Multi-View G-Buffer Estimation Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10985-10995} }
MESC-3D:Mining Effective Semantic Cues for 3D Reconstruction from a Single Image: Shaoming Li,

Qing Cai,

Songqi Kong,

Runqing Tan,

Heng Tong,

Shiji Qiu,

Yongguo Jiang,

Zhi Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Shaoming and Cai, Qing and Kong, Songqi and Tan, Runqing and Tong, Heng and Qiu, Shiji and Jiang, Yongguo and Liu, Zhi}, title = {MESC-3D:Mining Effective Semantic Cues for 3D Reconstruction from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16912-16921} }
Advancing Multiple Instance Learning with Continual Learning for Whole Slide Imaging: Xianrui Li,

Yufei Cui,

Jun Li,

Antoni B. Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xianrui and Cui, Yufei and Li, Jun and Chan, Antoni B.}, title = {Advancing Multiple Instance Learning with Continual Learning for Whole Slide Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20800-20809} }
Decoupled Distillation to Erase: A General Unlearning Method for Any Class-centric Tasks: Yu Zhou,

Dian Zheng,

Qijie Mo,

Renjie Lu,

Kun-Yu Lin,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yu and Zheng, Dian and Mo, Qijie and Lu, Renjie and Lin, Kun-Yu and Zheng, Wei-Shi}, title = {Decoupled Distillation to Erase: A General Unlearning Method for Any Class-centric Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20350-20359} }
TAET: Two-Stage Adversarial Equalization Training on Long-Tailed Distributions: Wang Yu-Hang,

Junkang Guo,

Aolei Liu,

Kaihao Wang,

Zaitong Wu,

Zhenyu Liu,

Wenfei Yin,

Jian Liu; [pdf] [supp]
[bibtex]
@InProceedings{Yu-Hang_2025_CVPR, author = {Yu-Hang, Wang and Guo, Junkang and Liu, Aolei and Wang, Kaihao and Wu, Zaitong and Liu, Zhenyu and Yin, Wenfei and Liu, Jian}, title = {TAET: Two-Stage Adversarial Equalization Training on Long-Tailed Distributions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15476-15485} }
Few-shot Personalized Scanpath Prediction: Ruoyu Xue,

Jingyi Xu,

Sounak Mondal,

Hieu Le,

Greg Zelinsky,

Minh Hoai,

Dimitris Samaras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2025_CVPR, author = {Xue, Ruoyu and Xu, Jingyi and Mondal, Sounak and Le, Hieu and Zelinsky, Greg and Hoai, Minh and Samaras, Dimitris}, title = {Few-shot Personalized Scanpath Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13497-13507} }
Mamba4D: Efficient 4D Point Cloud Video Understanding with Disentangled Spatial-Temporal State Space Models: Jiuming Liu,

Jinru Han,

Lihao Liu,

Angelica I. Aviles-Rivero,

Chaokang Jiang,

Zhe Liu,

Hesheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jiuming and Han, Jinru and Liu, Lihao and Aviles-Rivero, Angelica I. and Jiang, Chaokang and Liu, Zhe and Wang, Hesheng}, title = {Mamba4D: Efficient 4D Point Cloud Video Understanding with Disentangled Spatial-Temporal State Space Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17626-17636} }
Mamba as a Bridge: Where Vision Foundation Models Meet Vision Language Models for Domain-Generalized Semantic Segmentation: Xin Zhang,

Robby T. Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xin and Tan, Robby T.}, title = {Mamba as a Bridge: Where Vision Foundation Models Meet Vision Language Models for Domain-Generalized Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14527-14537} }
Vision-Guided Action: Enhancing 3D Human Motion Prediction with Gaze-informed Affordance in 3D Scenes: Ting Yu,

Yi Lin,

Jun Yu,

Zhenyu Lou,

Qiongjie Cui; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Ting and Lin, Yi and Yu, Jun and Lou, Zhenyu and Cui, Qiongjie}, title = {Vision-Guided Action: Enhancing 3D Human Motion Prediction with Gaze-informed Affordance in 3D Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12335-12346} }
ChainHOI: Joint-based Kinematic Chain Modeling for Human-Object Interaction Generation: Ling-An Zeng,

Guohong Huang,

Yi-Lin Wei,

Shengbo Gu,

Yu-Ming Tang,

Jingke Meng,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_CVPR, author = {Zeng, Ling-An and Huang, Guohong and Wei, Yi-Lin and Gu, Shengbo and Tang, Yu-Ming and Meng, Jingke and Zheng, Wei-Shi}, title = {ChainHOI: Joint-based Kinematic Chain Modeling for Human-Object Interaction Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12358-12369} }
CLOC: Contrastive Learning for Ordinal Classification with Multi-Margin N-pair Loss: Dileepa Pitawela,

Gustavo Carneiro,

Hsiang-Ting Chen; [pdf] [supp]
[bibtex]
@InProceedings{Pitawela_2025_CVPR, author = {Pitawela, Dileepa and Carneiro, Gustavo and Chen, Hsiang-Ting}, title = {CLOC: Contrastive Learning for Ordinal Classification with Multi-Margin N-pair Loss}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15538-15548} }
ObjectMover: Generative Object Movement with Video Prior: Xin Yu,

Tianyu Wang,

Soo Ye Kim,

Paul Guerrero,

Xi Chen,

Qing Liu,

Zhe Lin,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Xin and Wang, Tianyu and Kim, Soo Ye and Guerrero, Paul and Chen, Xi and Liu, Qing and Lin, Zhe and Qi, Xiaojuan}, title = {ObjectMover: Generative Object Movement with Video Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17682-17691} }
MLLM-as-a-Judge for Image Safety without Human Labeling: Zhenting Wang,

Shuming Hu,

Shiyu Zhao,

Xiaowen Lin,

Felix Juefei-Xu,

Zhuowei Li,

Ligong Han,

Harihar Subramanyam,

Li Chen,

Jianfa Chen,

Nan Jiang,

Lingjuan Lyu,

Shiqing Ma,

Dimitris N. Metaxas,

Ankit Jain; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhenting and Hu, Shuming and Zhao, Shiyu and Lin, Xiaowen and Juefei-Xu, Felix and Li, Zhuowei and Han, Ligong and Subramanyam, Harihar and Chen, Li and Chen, Jianfa and Jiang, Nan and Lyu, Lingjuan and Ma, Shiqing and Metaxas, Dimitris N. and Jain, Ankit}, title = {MLLM-as-a-Judge for Image Safety without Human Labeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14657-14666} }
Learning to Filter Outlier Edges in Global SfM: Nicole Damblon,

Marc Pollefeys,

Daniel Barath; [pdf] [supp]
[bibtex]
@InProceedings{Damblon_2025_CVPR, author = {Damblon, Nicole and Pollefeys, Marc and Barath, Daniel}, title = {Learning to Filter Outlier Edges in Global SfM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11558-11568} }
Forensics Adapter: Adapting CLIP for Generalizable Face Forgery Detection: Xinjie Cui,

Yuezun Li,

Ao Luo,

Jiaran Zhou,

Junyu Dong; [pdf]
[bibtex]
@InProceedings{Cui_2025_CVPR, author = {Cui, Xinjie and Li, Yuezun and Luo, Ao and Zhou, Jiaran and Dong, Junyu}, title = {Forensics Adapter: Adapting CLIP for Generalizable Face Forgery Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19207-19217} }
KAC: Kolmogorov-Arnold Classifier for Continual Learning: Yusong Hu,

Zichen Liang,

Fei Yang,

Qibin Hou,

Xialei Liu,

Ming-Ming Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Yusong and Liang, Zichen and Yang, Fei and Hou, Qibin and Liu, Xialei and Cheng, Ming-Ming}, title = {KAC: Kolmogorov-Arnold Classifier for Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15297-15307} }
BOOTPLACE: Bootstrapped Object Placement with Detection Transformers: Hang Zhou,

Xinxin Zuo,

Rui Ma,

Li Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Hang and Zuo, Xinxin and Ma, Rui and Cheng, Li}, title = {BOOTPLACE: Bootstrapped Object Placement with Detection Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19294-19303} }
FASTer: Focal token Acquiring-and-Scaling Transformer for Long-term 3D Objection Detection: Chenxu Dang,

ZaiPeng Duan,

Pei An,

Xinmin Zhang,

Xuzhong Hu,

Jie Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dang_2025_CVPR, author = {Dang, Chenxu and Duan, ZaiPeng and An, Pei and Zhang, Xinmin and Hu, Xuzhong and Ma, Jie}, title = {FASTer: Focal token Acquiring-and-Scaling Transformer for Long-term 3D Objection Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17029-17038} }
Geometry-guided Online 3D Video Synthesis with Multi-View Temporal Consistency: Hyunho Ha,

Lei Xiao,

Christian Richardt,

Thu Nguyen-Phuoc,

Changil Kim,

Min H. Kim,

Douglas Lanman,

Numair Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ha_2025_CVPR, author = {Ha, Hyunho and Xiao, Lei and Richardt, Christian and Nguyen-Phuoc, Thu and Kim, Changil and Kim, Min H. and Lanman, Douglas and Khan, Numair}, title = {Geometry-guided Online 3D Video Synthesis with Multi-View Temporal Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11275-11285} }
Point2RBox-v2: Rethinking Point-supervised Oriented Object Detection with Spatial Layout Among Instances: Yi Yu,

Botao Ren,

Peiyuan Zhang,

Mingxin Liu,

Junwei Luo,

Shaofeng Zhang,

Feipeng Da,

Junchi Yan,

Xue Yang; [pdf]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Yi and Ren, Botao and Zhang, Peiyuan and Liu, Mingxin and Luo, Junwei and Zhang, Shaofeng and Da, Feipeng and Yan, Junchi and Yang, Xue}, title = {Point2RBox-v2: Rethinking Point-supervised Oriented Object Detection with Spatial Layout Among Instances}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19283-19293} }
CoCoGaussian: Leveraging Circle of Confusion for Gaussian Splatting from Defocused Images: Jungho Lee,

Suhwan Cho,

Taeoh Kim,

Ho-Deok Jang,

Minhyeok Lee,

Geonho Cha,

Dongyoon Wee,

Dogyoon Lee,

Sangyoun Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Jungho and Cho, Suhwan and Kim, Taeoh and Jang, Ho-Deok and Lee, Minhyeok and Cha, Geonho and Wee, Dongyoon and Lee, Dogyoon and Lee, Sangyoun}, title = {CoCoGaussian: Leveraging Circle of Confusion for Gaussian Splatting from Defocused Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16101-16110} }
Semantic and Sequential Alignment for Referring Video Object Segmentation: Feiyu Pan,

Hao Fang,

Fangkai Li,

Yanyu Xu,

Yawei Li,

Luca Benini,

Xiankai Lu; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2025_CVPR, author = {Pan, Feiyu and Fang, Hao and Li, Fangkai and Xu, Yanyu and Li, Yawei and Benini, Luca and Lu, Xiankai}, title = {Semantic and Sequential Alignment for Referring Video Object Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19067-19076} }
Continual SFT Matches Multimodal RLHF with Negative Supervision: Ke Zhu,

Yu Wang,

Yanpeng Sun,

Qiang Chen,

Jiangjiang Liu,

Gang Zhang,

Jingdong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Ke and Wang, Yu and Sun, Yanpeng and Chen, Qiang and Liu, Jiangjiang and Zhang, Gang and Wang, Jingdong}, title = {Continual SFT Matches Multimodal RLHF with Negative Supervision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14615-14624} }
Semantic-guided Cross-Modal Prompt Learning for Skeleton-based Zero-shot Action Recognition: Anqi Zhu,

Jingmin Zhu,

James Bailey,

Mingming Gong,

Qiuhong Ke; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Anqi and Zhu, Jingmin and Bailey, James and Gong, Mingming and Ke, Qiuhong}, title = {Semantic-guided Cross-Modal Prompt Learning for Skeleton-based Zero-shot Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13876-13885} }
ChatGen: Automatic Text-to-Image Generation From FreeStyle Chatting: Chengyou Jia,

Changliang Xia,

Zhuohang Dang,

Weijia Wu,

Hangwei Qian,

Minnan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2025_CVPR, author = {Jia, Chengyou and Xia, Changliang and Dang, Zhuohang and Wu, Weijia and Qian, Hangwei and Luo, Minnan}, title = {ChatGen: Automatic Text-to-Image Generation From FreeStyle Chatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13284-13293} }
VEU-Bench: Towards Comprehensive Understanding of Video Editing: Bozheng Li,

Yongliang Wu,

Yi Lu,

Jiashuo Yu,

Licheng Tang,

Jiawang Cao,

Wenqing Zhu,

Yuyang Sun,

Jay Wu,

Wenbo Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Bozheng and Wu, Yongliang and Lu, Yi and Yu, Jiashuo and Tang, Licheng and Cao, Jiawang and Zhu, Wenqing and Sun, Yuyang and Wu, Jay and Zhu, Wenbo}, title = {VEU-Bench: Towards Comprehensive Understanding of Video Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13671-13680} }
Decouple Distortion from Perception: Region Adaptive Diffusion for Extreme-low Bitrate Perception Image Compression: Jinchang Xu,

Shaokang Wang,

Jintao Chen,

Zhe Li,

Peidong Jia,

Fei Zhao,

Guoqing Xiang,

Zhijian Hao,

Shanghang Zhang,

Xiaodong Xie; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Jinchang and Wang, Shaokang and Chen, Jintao and Li, Zhe and Jia, Peidong and Zhao, Fei and Xiang, Guoqing and Hao, Zhijian and Zhang, Shanghang and Xie, Xiaodong}, title = {Decouple Distortion from Perception: Region Adaptive Diffusion for Extreme-low Bitrate Perception Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18051-18061} }
Yo'Chameleon: Personalized Vision and Language Generation: Thao Nguyen,

Krishna Kumar Singh,

Jing Shi,

Trung Bui,

Yong Jae Lee,

Yuheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Thao and Singh, Krishna Kumar and Shi, Jing and Bui, Trung and Lee, Yong Jae and Li, Yuheng}, title = {Yo'Chameleon: Personalized Vision and Language Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14438-14448} }
PatchVSR: Breaking Video Diffusion Resolution Limits with Patch-wise Video Super-Resolution: Shian Du,

Menghan Xia,

Chang Liu,

Xintao Wang,

Jing Wang,

Pengfei Wan,

Di Zhang,

Xiangyang Ji; [pdf] [supp]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Shian and Xia, Menghan and Liu, Chang and Wang, Xintao and Wang, Jing and Wan, Pengfei and Zhang, Di and Ji, Xiangyang}, title = {PatchVSR: Breaking Video Diffusion Resolution Limits with Patch-wise Video Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17799-17809} }
FluxSpace: Disentangled Semantic Editing in Rectified Flow Models: Yusuf Dalva,

Kavana Venkatesh,

Pinar Yanardag; [pdf] [supp]
[bibtex]
@InProceedings{Dalva_2025_CVPR, author = {Dalva, Yusuf and Venkatesh, Kavana and Yanardag, Pinar}, title = {FluxSpace: Disentangled Semantic Editing in Rectified Flow Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13083-13092} }
Adversarial Domain Prompt Tuning and Generation for Single Domain Generalization: Zhipeng Xu,

De Cheng,

Xinyang Jiang,

Nannan Wang,

Dongsheng Li,

Xinbo Gao; [pdf]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Zhipeng and Cheng, De and Jiang, Xinyang and Wang, Nannan and Li, Dongsheng and Gao, Xinbo}, title = {Adversarial Domain Prompt Tuning and Generation for Single Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18584-18595} }
ShapeWords: Guiding Text-to-Image Synthesis with 3D Shape-Aware Prompts: Dmitry Petrov,

Pradyumn Goyal,

Divyansh Shivashok,

Yuanming Tao,

Melinos Averkiou,

Evangelos Kalogerakis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Petrov_2025_CVPR, author = {Petrov, Dmitry and Goyal, Pradyumn and Shivashok, Divyansh and Tao, Yuanming and Averkiou, Melinos and Kalogerakis, Evangelos}, title = {ShapeWords: Guiding Text-to-Image Synthesis with 3D Shape-Aware Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13305-13314} }
Auto-Encoded Supervision for Perceptual Image Super-Resolution: MinKyu Lee,

Sangeek Hyun,

Woojin Jun,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, MinKyu and Hyun, Sangeek and Jun, Woojin and Heo, Jae-Pil}, title = {Auto-Encoded Supervision for Perceptual Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17958-17968} }
Silence is Golden: Leveraging Adversarial Examples to Nullify Audio Control in LDM-based Talking-Head Generation: Yuan Gan,

Jiaxu Miao,

Yunze Wang,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gan_2025_CVPR, author = {Gan, Yuan and Miao, Jiaxu and Wang, Yunze and Yang, Yi}, title = {Silence is Golden: Leveraging Adversarial Examples to Nullify Audio Control in LDM-based Talking-Head Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13434-13444} }
Iterative Predictor-Critic Code Decoding for Real-World Image Dehazing: Jiayi Fu,

Siyu Liu,

Zikun Liu,

Chun-Le Guo,

Hyunhee Park,

Ruiqi Wu,

Guoqing Wang,

Chongyi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Jiayi and Liu, Siyu and Liu, Zikun and Guo, Chun-Le and Park, Hyunhee and Wu, Ruiqi and Wang, Guoqing and Li, Chongyi}, title = {Iterative Predictor-Critic Code Decoding for Real-World Image Dehazing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12700-12709} }
Filter Images First, Generate Instructions Later: Pre-Instruction Data Selection for Visual Instruction Tuning: Bardia Safaei,

Faizan Siddiqui,

Jiacong Xu,

Vishal M. Patel,

Shao-Yuan Lo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Safaei_2025_CVPR, author = {Safaei, Bardia and Siddiqui, Faizan and Xu, Jiacong and Patel, Vishal M. and Lo, Shao-Yuan}, title = {Filter Images First, Generate Instructions Later: Pre-Instruction Data Selection for Visual Instruction Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14247-14256} }
Gradient-Guided Annealing for Domain Generalization: Aristotelis Ballas,

Christos Diou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ballas_2025_CVPR, author = {Ballas, Aristotelis and Diou, Christos}, title = {Gradient-Guided Annealing for Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20558-20568} }
MicroVQA: A Multimodal Reasoning Benchmark for Microscopy-Based Scientific Research: James Burgess,

Jeffrey J Nirschl,

Laura Bravo-Sánchez,

Alejandro Lozano,

Sanket Rajan Gupte,

Jesus G. Galaz-Montoya,

Yuhui Zhang,

Yuchang Su,

Disha Bhowmik,

Zachary Coman,

Sarina M Hasan,

Alexandra Johannesson,

William D. Leineweber,

Malvika G Nair,

Ridhi Yarlagadda,

Connor Zuraski,

Wah Chiu,

Sarah Cohen,

Jan N. Hansen,

Manuel D Leonetti,

Chad Liu,

Emma Lundberg,

Serena Yeung-Levy; [pdf] [supp]
[bibtex]
@InProceedings{Burgess_2025_CVPR, author = {Burgess, James and Nirschl, Jeffrey J and Bravo-S\'anchez, Laura and Lozano, Alejandro and Gupte, Sanket Rajan and Galaz-Montoya, Jesus G. and Zhang, Yuhui and Su, Yuchang and Bhowmik, Disha and Coman, Zachary and Hasan, Sarina M and Johannesson, Alexandra and Leineweber, William D. and Nair, Malvika G and Yarlagadda, Ridhi and Zuraski, Connor and Chiu, Wah and Cohen, Sarah and Hansen, Jan N. and Leonetti, Manuel D and Liu, Chad and Lundberg, Emma and Yeung-Levy, Serena}, title = {MicroVQA: A Multimodal Reasoning Benchmark for Microscopy-Based Scientific Research}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19552-19564} }
Event-based Video Super-Resolution via State Space Models: Zeyu Xiao,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Zeyu and Wang, Xinchao}, title = {Event-based Video Super-Resolution via State Space Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12564-12574} }
Masked Scene Modeling: Narrowing the Gap Between Supervised and Self-Supervised Learning in 3D Scene Understanding: Pedro Hermosilla,

Christian Stippel,

Leon Sick; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hermosilla_2025_CVPR, author = {Hermosilla, Pedro and Stippel, Christian and Sick, Leon}, title = {Masked Scene Modeling: Narrowing the Gap Between Supervised and Self-Supervised Learning in 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14835-14844} }
VidHalluc: Evaluating Temporal Hallucinations in Multimodal Large Language Models for Video Understanding: Chaoyu Li,

Eun Woo Im,

Pooyan Fazli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Chaoyu and Im, Eun Woo and Fazli, Pooyan}, title = {VidHalluc: Evaluating Temporal Hallucinations in Multimodal Large Language Models for Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13723-13733} }
CARE Transformer: Mobile-Friendly Linear Visual Transformer via Decoupled Dual Interaction: Yuan Zhou,

Qingshan Xu,

Jiequan Cui,

Junbao Zhou,

Jing Zhang,

Richang Hong,

Hanwang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yuan and Xu, Qingshan and Cui, Jiequan and Zhou, Junbao and Zhang, Jing and Hong, Richang and Zhang, Hanwang}, title = {CARE Transformer: Mobile-Friendly Linear Visual Transformer via Decoupled Dual Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20135-20145} }
Paint by Inpaint: Learning to Add Image Objects by Removing Them First: Navve Wasserman,

Noam Rotstein,

Roy Ganz,

Ron Kimmel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wasserman_2025_CVPR, author = {Wasserman, Navve and Rotstein, Noam and Ganz, Roy and Kimmel, Ron}, title = {Paint by Inpaint: Learning to Add Image Objects by Removing Them First}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18313-18324} }
PMA: Towards Parameter-Efficient Point Cloud Understanding via Point Mamba Adapter: Yaohua Zha,

Yanzi Wang,

Hang Guo,

Jinpeng Wang,

Tao Dai,

Bin Chen,

Zhihao Ouyang,

Xue Yuerong,

Ke Chen,

Shu-Tao Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zha_2025_CVPR, author = {Zha, Yaohua and Wang, Yanzi and Guo, Hang and Wang, Jinpeng and Dai, Tao and Chen, Bin and Ouyang, Zhihao and Yuerong, Xue and Chen, Ke and Xia, Shu-Tao}, title = {PMA: Towards Parameter-Efficient Point Cloud Understanding via Point Mamba Adapter}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16976-16986} }
LC-Mamba: Local and Continuous Mamba with Shifted Windows for Frame Interpolation: Min Wu Jeong,

Chae Eun Rhee; [pdf] [supp]
[bibtex]
@InProceedings{Jeong_2025_CVPR, author = {Jeong, Min Wu and Rhee, Chae Eun}, title = {LC-Mamba: Local and Continuous Mamba with Shifted Windows for Frame Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17671-17681} }
Zero-Shot Head Swapping in Real-World Scenarios: Taewoong Kang,

Sohyun Jeong,

Hyojin Jang,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_CVPR, author = {Kang, Taewoong and Jeong, Sohyun and Jang, Hyojin and Choo, Jaegul}, title = {Zero-Shot Head Swapping in Real-World Scenarios}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10805-10814} }
CAV-MAE Sync: Improving Contrastive Audio-Visual Mask Autoencoders via Fine-Grained Alignment: Edson Araujo,

Andrew Rouditchenko,

Yuan Gong,

Saurabhchand Bhati,

Samuel Thomas,

Brian Kingsbury,

Leonid Karlinsky,

Rogerio Feris,

James R. Glass,

Hilde Kuehne; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Araujo_2025_CVPR, author = {Araujo, Edson and Rouditchenko, Andrew and Gong, Yuan and Bhati, Saurabhchand and Thomas, Samuel and Kingsbury, Brian and Karlinsky, Leonid and Feris, Rogerio and Glass, James R. and Kuehne, Hilde}, title = {CAV-MAE Sync: Improving Contrastive Audio-Visual Mask Autoencoders via Fine-Grained Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18794-18803} }
COBRA: COmBinatorial Retrieval Augmentation for Few-Shot Adaptation: Arnav M. Das,

Gantavya Bhatt,

Lilly Kumari,

Sahil Verma,

Jeff Bilmes; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Das_2025_CVPR, author = {Das, Arnav M. and Bhatt, Gantavya and Kumari, Lilly and Verma, Sahil and Bilmes, Jeff}, title = {COBRA: COmBinatorial Retrieval Augmentation for Few-Shot Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20534-20546} }
ProbeSDF: Light Field Probes For Neural Surface Reconstruction: Briac Toussaint,

Diego Thomas,

Jean-Sébastien Franco; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Toussaint_2025_CVPR, author = {Toussaint, Briac and Thomas, Diego and Franco, Jean-S\'ebastien}, title = {ProbeSDF: Light Field Probes For Neural Surface Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11026-11035} }
Hybrid Concept Bottleneck Models: Yang Liu,

Tianwei Zhang,

Shi Gu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yang and Zhang, Tianwei and Gu, Shi}, title = {Hybrid Concept Bottleneck Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20179-20189} }
Dual Consolidation for Pre-Trained Model-Based Domain-Incremental Learning: Da-Wei Zhou,

Zi-Wen Cai,

Han-Jia Ye,

Lijun Zhang,

De-Chuan Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Da-Wei and Cai, Zi-Wen and Ye, Han-Jia and Zhang, Lijun and Zhan, De-Chuan}, title = {Dual Consolidation for Pre-Trained Model-Based Domain-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20547-20557} }
RORem: Training a Robust Object Remover with Human-in-the-Loop: Ruibin Li,

Tao Yang,

Song Guo,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Ruibin and Yang, Tao and Guo, Song and Zhang, Lei}, title = {RORem: Training a Robust Object Remover with Human-in-the-Loop}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14024-14035} }
All Languages Matter: Evaluating LMMs on Culturally Diverse 100 Languages: Ashmal Vayani,

Dinura Dissanayake,

Hasindri Watawana,

Noor Ahsan,

Nevasini Sasikumar,

Omkar Thawakar,

Henok Biadglign Ademtew,

Yahya Hmaiti,

Amandeep Kumar,

Kartik Kukreja,

Mykola Maslych,

Wafa Al Ghallabi,

Mihail Minkov Mihaylov,

Chao Qin,

Abdelrahman M. Shaker,

Mike Zhang,

Mahardika Krisna Ihsani,

Amiel Gian Esplana,

Monil Gokani,

Shachar Mirkin,

Harsh Singh,

Ashay Srivastava,

Endre Hamerlik,

Fathinah Asma Izzati,

Fadillah Adamsyah Maani,

Sebastian Cavada,

Jenny Chim,

Rohit Gupta,

Sanjay Manjunath,

Kamila Zhumakhanova,

Feno Heriniaina Rabevohitra,

Azril Hafizi Amirudin,

Muhammad Ridzuan,

Daniya Najiha Abdul Kareem,

Ketan Pravin More,

Kunyang Li,

Pramesh Shakya,

Muhammad Saad,

Amirpouya Ghasemaghaei,

Amirbek Djanibekov,

Dilshod Azizov,

Branislava Jankovic,

Naman Bhatia,

Alvaro Cabrera,

Johan Obando-Ceron,

Olympiah Otieno,

Febian Farestam,

Muztoba Rabbani,

Sanoojan Ballah,

Santosh Sanjeev,

Abduragim Shtanchaev,

Maheen Fatima,

Thao Nguyen,

Amrin Kareem,

Toluwani Aremu,

Nathan Augusto Zacarias Xavier,

Amit Bhatkal,

Hawau Olamide Toyin,

Aman Chadha,

Hisham Cholakkal,

Rao Muhammad Anwer,

Michael Felsberg,

Jorma Laaksonen,

Thamar Solorio,

Monojit Choudhury,

Ivan Laptev,

Mubarak Shah,

Salman Khan,

Fahad Shahbaz Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vayani_2025_CVPR, author = {Vayani, Ashmal and Dissanayake, Dinura and Watawana, Hasindri and Ahsan, Noor and Sasikumar, Nevasini and Thawakar, Omkar and Ademtew, Henok Biadglign and Hmaiti, Yahya and Kumar, Amandeep and Kukreja, Kartik and Maslych, Mykola and Al Ghallabi, Wafa and Mihaylov, Mihail Minkov and Qin, Chao and Shaker, Abdelrahman M. and Zhang, Mike and Ihsani, Mahardika Krisna and Esplana, Amiel Gian and Gokani, Monil and Mirkin, Shachar and Singh, Harsh and Srivastava, Ashay and Hamerlik, Endre and Izzati, Fathinah Asma and Maani, Fadillah Adamsyah and Cavada, Sebastian and Chim, Jenny and Gupta, Rohit and Manjunath, Sanjay and Zhumakhanova, Kamila and Rabevohitra, Feno Heriniaina and Amirudin, Azril Hafizi and Ridzuan, Muhammad and Kareem, Daniya Najiha Abdul and More, Ketan Pravin and Li, Kunyang and Shakya, Pramesh and Saad, Muhammad and Ghasemaghaei, Amirpouya and Djanibekov, Amirbek and Azizov, Dilshod and Jankovic, Branislava and Bhatia, Naman and Cabrera, Alvaro and Obando-Ceron, Johan and Otieno, Olympiah and Farestam, Febian and Rabbani, Muztoba and Ballah, Sanoojan and Sanjeev, Santosh and Shtanchaev, Abduragim and Fatima, Maheen and Nguyen, Thao and Kareem, Amrin and Aremu, Toluwani and Xavier, Nathan Augusto Zacarias and Bhatkal, Amit and Toyin, Hawau Olamide and Chadha, Aman and Cholakkal, Hisham and Anwer, Rao Muhammad and Felsberg, Michael and Laaksonen, Jorma and Solorio, Thamar and Choudhury, Monojit and Laptev, Ivan and Shah, Mubarak and Khan, Salman and Khan, Fahad Shahbaz}, title = {All Languages Matter: Evaluating LMMs on Culturally Diverse 100 Languages}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19565-19575} }
Video-Bench: Human-Aligned Video Generation Benchmark: Hui Han,

Siyuan Li,

Jiaqi Chen,

Yiwen Yuan,

Yuling Wu,

Yufan Deng,

Chak Tou Leong,

Hanwen Du,

Junchen Fu,

Youhua Li,

Jie Zhang,

Chi Zhang,

Li-jia Li,

Yongxin Ni; [pdf] [supp]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Hui and Li, Siyuan and Chen, Jiaqi and Yuan, Yiwen and Wu, Yuling and Deng, Yufan and Leong, Chak Tou and Du, Hanwen and Fu, Junchen and Li, Youhua and Zhang, Jie and Zhang, Chi and Li, Li-jia and Ni, Yongxin}, title = {Video-Bench: Human-Aligned Video Generation Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18858-18868} }
MergeVQ: A Unified Framework for Visual Generation and Representation with Disentangled Token Merging and Quantization: Siyuan Li,

Luyuan Zhang,

Zedong Wang,

Juanxi Tian,

Cheng Tan,

Zicheng Liu,

Chang Yu,

Qingsong Xie,

Haonan Lu,

Haoqian Wang,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Siyuan and Zhang, Luyuan and Wang, Zedong and Tian, Juanxi and Tan, Cheng and Liu, Zicheng and Yu, Chang and Xie, Qingsong and Lu, Haonan and Wang, Haoqian and Lei, Zhen}, title = {MergeVQ: A Unified Framework for Visual Generation and Representation with Disentangled Token Merging and Quantization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19713-19723} }
Anyattack: Towards Large-scale Self-supervised Adversarial Attacks on Vision-language Models: Jiaming Zhang,

Junhong Ye,

Xingjun Ma,

Yige Li,

Yunfan Yang,

Yunhao Chen,

Jitao Sang,

Dit-Yan Yeung; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiaming and Ye, Junhong and Ma, Xingjun and Li, Yige and Yang, Yunfan and Chen, Yunhao and Sang, Jitao and Yeung, Dit-Yan}, title = {Anyattack: Towards Large-scale Self-supervised Adversarial Attacks on Vision-language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19900-19909} }
Joint Optimization of Neural Radiance Fields and Continuous Camera Motion from a Monocular Video: Hoang Chuong Nguyen,

Wei Mao,

Jose M. Alvarez,

Miaomiao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Hoang Chuong and Mao, Wei and Alvarez, Jose M. and Liu, Miaomiao}, title = {Joint Optimization of Neural Radiance Fields and Continuous Camera Motion from a Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11472-11481} }
IRGS: Inter-Reflective Gaussian Splatting with 2D Gaussian Ray Tracing: Chun Gu,

Xiaofei Wei,

Zixuan Zeng,

Yuxuan Yao,

Li Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Chun and Wei, Xiaofei and Zeng, Zixuan and Yao, Yuxuan and Zhang, Li}, title = {IRGS: Inter-Reflective Gaussian Splatting with 2D Gaussian Ray Tracing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10943-10952} }
InterMimic: Towards Universal Whole-Body Control for Physics-Based Human-Object Interactions: Sirui Xu,

Hung Yu Ling,

Yu-Xiong Wang,

Liang-Yan Gui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Sirui and Ling, Hung Yu and Wang, Yu-Xiong and Gui, Liang-Yan}, title = {InterMimic: Towards Universal Whole-Body Control for Physics-Based Human-Object Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12266-12277} }
Efficient Test-time Adaptive Object Detection via Sensitivity-Guided Pruning: Kunyu Wang,

Xueyang Fu,

Xin Lu,

Chengjie Ge,

Chengzhi Cao,

Wei Zhai,

Zheng-Jun Zha; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Kunyu and Fu, Xueyang and Lu, Xin and Ge, Chengjie and Cao, Chengzhi and Zhai, Wei and Zha, Zheng-Jun}, title = {Efficient Test-time Adaptive Object Detection via Sensitivity-Guided Pruning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10577-10586} }
A Data-Centric Revisit of Pre-Trained Vision Models for Robot Learning: Xin Wen,

Bingchen Zhao,

Yilun Chen,

Jiangmiao Pang,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2025_CVPR, author = {Wen, Xin and Zhao, Bingchen and Chen, Yilun and Pang, Jiangmiao and Qi, Xiaojuan}, title = {A Data-Centric Revisit of Pre-Trained Vision Models for Robot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12143-12154} }
Visual Agentic AI for Spatial Reasoning with a Dynamic API: Damiano Marsili,

Rohun Agrawal,

Yisong Yue,

Georgia Gkioxari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Marsili_2025_CVPR, author = {Marsili, Damiano and Agrawal, Rohun and Yue, Yisong and Gkioxari, Georgia}, title = {Visual Agentic AI for Spatial Reasoning with a Dynamic API}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19446-19455} }
Feature Spectrum Learning for Remote Sensing Change Detection: Qi Zang,

Dong Zhao,

Shuang Wang,

Dou Quan,

Zhun Zhong; [pdf] [supp]
[bibtex]
@InProceedings{Zang_2025_CVPR, author = {Zang, Qi and Zhao, Dong and Wang, Shuang and Quan, Dou and Zhong, Zhun}, title = {Feature Spectrum Learning for Remote Sensing Change Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12647-12657} }
DriveDreamer4D: World Models Are Effective Data Machines for 4D Driving Scene Representation: Guosheng Zhao,

Chaojun Ni,

Xiaofeng Wang,

Zheng Zhu,

Xueyang Zhang,

Yida Wang,

Guan Huang,

Xinze Chen,

Boyuan Wang,

Youyi Zhang,

Wenjun Mei,

Xingang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Guosheng and Ni, Chaojun and Wang, Xiaofeng and Zhu, Zheng and Zhang, Xueyang and Wang, Yida and Huang, Guan and Chen, Xinze and Wang, Boyuan and Zhang, Youyi and Mei, Wenjun and Wang, Xingang}, title = {DriveDreamer4D: World Models Are Effective Data Machines for 4D Driving Scene Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12015-12026} }
LoKi: Low-dimensional KAN for Efficient Fine-tuning Image Models: Xuan Cai,

Renjie Pan,

Hua Yang; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Xuan and Pan, Renjie and Yang, Hua}, title = {LoKi: Low-dimensional KAN for Efficient Fine-tuning Image Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14869-14880} }
Dr. Splat: Directly Referring 3D Gaussian Splatting via Direct Language Embedding Registration: Kim Jun-Seong,

GeonU Kim,

Kim Yu-Ji,

Yu-Chiang Frank Wang,

Jaesung Choe,

Tae-Hyun Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jun-Seong_2025_CVPR, author = {Jun-Seong, Kim and Kim, GeonU and Yu-Ji, Kim and Wang, Yu-Chiang Frank and Choe, Jaesung and Oh, Tae-Hyun}, title = {Dr. Splat: Directly Referring 3D Gaussian Splatting via Direct Language Embedding Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14137-14146} }
Consistent Normal Orientation for 3D Point Clouds via Least Squares on Delaunay Graph: Rao Fu,

Jianmin Zheng,

Liang Yu; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Rao and Zheng, Jianmin and Yu, Liang}, title = {Consistent Normal Orientation for 3D Point Clouds via Least Squares on Delaunay Graph}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16932-16942} }
ATA: Adaptive Transformation Agent for Text-Guided Subject-Position Variable Background Inpainting: Yizhe Tang,

Zhimin Sun,

Yuzhen Du,

Ran Yi,

Guangben Lu,

Teng Hu,

Luying Li,

Lizhuang Ma,

Fangyuan Zou; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Yizhe and Sun, Zhimin and Du, Yuzhen and Yi, Ran and Lu, Guangben and Hu, Teng and Li, Luying and Ma, Lizhuang and Zou, Fangyuan}, title = {ATA: Adaptive Transformation Agent for Text-Guided Subject-Position Variable Background Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18335-18345} }
Optimizing for the Shortest Path in Denoising Diffusion Model: Ping Chen,

Xingpeng Zhang,

Zhaoxiang Liu,

Huan Hu,

Xiang Liu,

Kai Wang,

Min Wang,

Yanlin Qian,

Shiguo Lian; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Ping and Zhang, Xingpeng and Liu, Zhaoxiang and Hu, Huan and Liu, Xiang and Wang, Kai and Wang, Min and Qian, Yanlin and Lian, Shiguo}, title = {Optimizing for the Shortest Path in Denoising Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18021-18030} }
Antidote: A Unified Framework for Mitigating LVLM Hallucinations in Counterfactual Presupposition and Object Perception: Yuanchen Wu,

Lu Zhang,

Hang Yao,

Junlong Du,

Ke Yan,

Shouhong Ding,

Yunsheng Wu,

Xiaoqiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Yuanchen and Zhang, Lu and Yao, Hang and Du, Junlong and Yan, Ke and Ding, Shouhong and Wu, Yunsheng and Li, Xiaoqiang}, title = {Antidote: A Unified Framework for Mitigating LVLM Hallucinations in Counterfactual Presupposition and Object Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14646-14656} }
Dynamic Pseudo Labeling via Gradient Cutting for High-Low Entropy Exploration: Jae Hyeon Park,

Joo Hyeon Jeon,

Jae Yun Lee,

Sangyeon Ahn,

Min Hee Cha,

Min Geol Kim,

Hyeok Nam,

Sung In Cho; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Jae Hyeon and Jeon, Joo Hyeon and Lee, Jae Yun and Ahn, Sangyeon and Cha, Min Hee and Kim, Min Geol and Nam, Hyeok and Cho, Sung In}, title = {Dynamic Pseudo Labeling via Gradient Cutting for High-Low Entropy Exploration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20602-20611} }
VODiff: Controlling Object Visibility Order in Text-to-Image Generation: Dong Liang,

Jinyuan Jia,

Yuhao Liu,

Zhanghan Ke,

Hongbo Fu,

Rynson W. H. Lau; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Dong and Jia, Jinyuan and Liu, Yuhao and Ke, Zhanghan and Fu, Hongbo and Lau, Rynson W. H.}, title = {VODiff: Controlling Object Visibility Order in Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18379-18389} }
CAD-Llama: Leveraging Large Language Models for Computer-Aided Design Parametric 3D Model Generation: Jiahao Li,

Weijian Ma,

Xueyang Li,

Yunzhong Lou,

Guichun Zhou,

Xiangdong Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiahao and Ma, Weijian and Li, Xueyang and Lou, Yunzhong and Zhou, Guichun and Zhou, Xiangdong}, title = {CAD-Llama: Leveraging Large Language Models for Computer-Aided Design Parametric 3D Model Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18563-18573} }
Leveraging SD Map to Augment HD Map-based Trajectory Prediction: Zhiwei Dong,

Ran Ding,

Wei Li,

Peng Zhang,

Guobin Tang,

Jia Guo; [pdf]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Zhiwei and Ding, Ran and Li, Wei and Zhang, Peng and Tang, Guobin and Guo, Jia}, title = {Leveraging SD Map to Augment HD Map-based Trajectory Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17219-17228} }
ONDA-Pose: Occlusion-Aware Neural Domain Adaptation for Self-Supervised 6D Object Pose Estimation: Tao Tan,

Qiulei Dong; [pdf]
[bibtex]
@InProceedings{Tan_2025_CVPR, author = {Tan, Tao and Dong, Qiulei}, title = {ONDA-Pose: Occlusion-Aware Neural Domain Adaptation for Self-Supervised 6D Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16829-16838} }
Q-PART: Quasi-Periodic Adaptive Regression with Test-time Training for Pediatric Left Ventricular Ejection Fraction Regression: Jie Liu,

Tiexin Qin,

Hui Liu,

Yilei Shi,

Lichao Mou,

Xiao Xiang Zhu,

Shiqi Wang,

Haoliang Li; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jie and Qin, Tiexin and Liu, Hui and Shi, Yilei and Mou, Lichao and Zhu, Xiao Xiang and Wang, Shiqi and Li, Haoliang}, title = {Q-PART: Quasi-Periodic Adaptive Regression with Test-time Training for Pediatric Left Ventricular Ejection Fraction Regression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15560-15569} }
Composing Parts for Expressive Object Generation: Harsh Rangwani,

Aishwarya Agarwal,

Kuldeep Kulkarni,

R. Venkatesh Babu,

Srikrishna Karanam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rangwani_2025_CVPR, author = {Rangwani, Harsh and Agarwal, Aishwarya and Kulkarni, Kuldeep and Babu, R. Venkatesh and Karanam, Srikrishna}, title = {Composing Parts for Expressive Object Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13209-13219} }
CarPlanner: Consistent Auto-regressive Trajectory Planning for Large-Scale Reinforcement Learning in Autonomous Driving: Dongkun Zhang,

Jiaming Liang,

Ke Guo,

Sha Lu,

Qi Wang,

Rong Xiong,

Zhenwei Miao,

Yue Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Dongkun and Liang, Jiaming and Guo, Ke and Lu, Sha and Wang, Qi and Xiong, Rong and Miao, Zhenwei and Wang, Yue}, title = {CarPlanner: Consistent Auto-regressive Trajectory Planning for Large-Scale Reinforcement Learning in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17239-17248} }
Apply Hierarchical-Chain-of-Generation to Complex Attributes Text-to-3D Generation: Yiming Qin,

Zhu Xu,

Yang Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Yiming and Xu, Zhu and Liu, Yang}, title = {Apply Hierarchical-Chain-of-Generation to Complex Attributes Text-to-3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18521-18530} }
SOLVE: Synergy of Language-Vision and End-to-End Networks for Autonomous Driving: Xuesong Chen,

Linjiang Huang,

Tao Ma,

Rongyao Fang,

Shaoshuai Shi,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xuesong and Huang, Linjiang and Ma, Tao and Fang, Rongyao and Shi, Shaoshuai and Li, Hongsheng}, title = {SOLVE: Synergy of Language-Vision and End-to-End Networks for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12068-12077} }
Shift the Lens: Environment-Aware Unsupervised Camouflaged Object Detection: Ji Du,

Fangwei Hao,

Mingyang Yu,

Desheng Kong,

Jiesheng Wu,

Bin Wang,

Jing Xu,

Ping Li; [pdf] [supp]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Ji and Hao, Fangwei and Yu, Mingyang and Kong, Desheng and Wu, Jiesheng and Wang, Bin and Xu, Jing and Li, Ping}, title = {Shift the Lens: Environment-Aware Unsupervised Camouflaged Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19271-19282} }
DriveScape: High-Resolution Driving Video Generation by Multi-View Feature Fusion: Wei Wu,

Xi Guo,

Weixuan Tang,

Tingxuan Huang,

Chiyu Wang,

Chenjing Ding; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Wei and Guo, Xi and Tang, Weixuan and Huang, Tingxuan and Wang, Chiyu and Ding, Chenjing}, title = {DriveScape: High-Resolution Driving Video Generation by Multi-View Feature Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17187-17196} }
Training-free Neural Architecture Search through Variance of Knowledge of Deep Network Weights: Ondrej Tybl,

Lukas Neumann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tybl_2025_CVPR, author = {Tybl, Ondrej and Neumann, Lukas}, title = {Training-free Neural Architecture Search through Variance of Knowledge of Deep Network Weights}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14881-14890} }
Every SAM Drop Counts: Embracing Semantic Priors for Multi-Modality Image Fusion and Beyond: Guanyao Wu,

Haoyu Liu,

Hongming Fu,

Yichuan Peng,

Jinyuan Liu,

Xin Fan,

Risheng Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Guanyao and Liu, Haoyu and Fu, Hongming and Peng, Yichuan and Liu, Jinyuan and Fan, Xin and Liu, Risheng}, title = {Every SAM Drop Counts: Embracing Semantic Priors for Multi-Modality Image Fusion and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17882-17891} }
RAEncoder: A Label-Free Reversible Adversarial Examples Encoder for Dataset Intellectual Property Protection: Fan Xing,

Zhuo Tian,

Xuefeng Fan,

Xiaoyi Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Fan and Tian, Zhuo and Fan, Xuefeng and Zhou, Xiaoyi}, title = {RAEncoder: A Label-Free Reversible Adversarial Examples Encoder for Dataset Intellectual Property Protection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20665-20674} }
LatentHOI: On the Generalizable Hand Object Motion Generation with Latent Hand Diffusion.: Muchen Li,

Sammy Christen,

Chengde Wan,

Yujun Cai,

Renjie Liao,

Leonid Sigal,

Shugao Ma; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Muchen and Christen, Sammy and Wan, Chengde and Cai, Yujun and Liao, Renjie and Sigal, Leonid and Ma, Shugao}, title = {LatentHOI: On the Generalizable Hand Object Motion Generation with Latent Hand Diffusion.}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17416-17425} }
Chapter-Llama: Efficient Chaptering in Hour-Long Videos with LLMs: Lucas Ventura,

Antoine Yang,

Cordelia Schmid,

Gül Varol; [pdf] [supp]
[bibtex]
@InProceedings{Ventura_2025_CVPR, author = {Ventura, Lucas and Yang, Antoine and Schmid, Cordelia and Varol, G\"ul}, title = {Chapter-Llama: Efficient Chaptering in Hour-Long Videos with LLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18947-18958} }
Distribution Prototype Diffusion Learning for Open-set Supervised Anomaly Detection: Fuyun Wang,

Tong Zhang,

Yuanzhi Wang,

Yide Qiu,

Xin Liu,

Xu Guo,

Zhen Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Fuyun and Zhang, Tong and Wang, Yuanzhi and Qiu, Yide and Liu, Xin and Guo, Xu and Cui, Zhen}, title = {Distribution Prototype Diffusion Learning for Open-set Supervised Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20416-20426} }
Full-DoF Egomotion Estimation for Event Cameras Using Geometric Solvers: Ji Zhao,

Banglei Guan,

Zibin Liu,

Laurent Kneip; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Ji and Guan, Banglei and Liu, Zibin and Kneip, Laurent}, title = {Full-DoF Egomotion Estimation for Event Cameras Using Geometric Solvers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11515-11524} }
Teaching Large Language Models to Regress Accurate Image Quality Scores Using Score Distribution: Zhiyuan You,

Xin Cai,

Jinjin Gu,

Tianfan Xue,

Chao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2025_CVPR, author = {You, Zhiyuan and Cai, Xin and Gu, Jinjin and Xue, Tianfan and Dong, Chao}, title = {Teaching Large Language Models to Regress Accurate Image Quality Scores Using Score Distribution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14483-14494} }
Your Scale Factors are My Weapon: Targeted Bit-Flip Attacks on Vision Transformers via Scale Factor Manipulation: Jialai Wang,

Yuxiao Wu,

Weiye Xu,

Yating Huang,

Chao Zhang,

Zongpeng Li,

Mingwei Xu,

Zhenkai Liang; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jialai and Wu, Yuxiao and Xu, Weiye and Huang, Yating and Zhang, Chao and Li, Zongpeng and Xu, Mingwei and Liang, Zhenkai}, title = {Your Scale Factors are My Weapon: Targeted Bit-Flip Attacks on Vision Transformers via Scale Factor Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20103-20112} }
Marten: Visual Question Answering with Mask Generation for Multi-modal Document Understanding: Zining Wang,

Tongkun Guan,

Pei Fu,

Chen Duan,

Qianyi Jiang,

Zhentao Guo,

Shan Guo,

Junfeng Luo,

Wei Shen,

Xiaokang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zining and Guan, Tongkun and Fu, Pei and Duan, Chen and Jiang, Qianyi and Guo, Zhentao and Guo, Shan and Luo, Junfeng and Shen, Wei and Yang, Xiaokang}, title = {Marten: Visual Question Answering with Mask Generation for Multi-modal Document Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14460-14471} }
Mamba-Reg: Vision Mamba Also Needs Registers: Feng Wang,

Jiahao Wang,

Sucheng Ren,

Guoyizhe Wei,

Jieru Mei,

Wei Shao,

Yuyin Zhou,

Alan Yuille,

Cihang Xie; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Feng and Wang, Jiahao and Ren, Sucheng and Wei, Guoyizhe and Mei, Jieru and Shao, Wei and Zhou, Yuyin and Yuille, Alan and Xie, Cihang}, title = {Mamba-Reg: Vision Mamba Also Needs Registers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14944-14953} }
Visual Persona: Foundation Model for Full-Body Human Customization: Jisu Nam,

Soowon Son,

Zhan Xu,

Jing Shi,

Difan Liu,

Feng Liu,

Seungryong Kim,

Yang Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2025_CVPR, author = {Nam, Jisu and Son, Soowon and Xu, Zhan and Shi, Jing and Liu, Difan and Liu, Feng and Kim, Seungryong and Zhou, Yang}, title = {Visual Persona: Foundation Model for Full-Body Human Customization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18630-18641} }
SOGS: Second-Order Anchor for Advanced 3D Gaussian Splatting: Jiahui Zhang,

Fangneng Zhan,

Ling Shao,

Shijian Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiahui and Zhan, Fangneng and Shao, Ling and Lu, Shijian}, title = {SOGS: Second-Order Anchor for Advanced 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11167-11176} }
MExD: An Expert-Infused Diffusion Model for Whole-Slide Image Classification: Jianwei Zhao,

Xin Li,

Fan Yang,

Qiang Zhai,

Ao Luo,

Yang Zhao,

Hong Cheng,

Huazhu Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jianwei and Li, Xin and Yang, Fan and Zhai, Qiang and Luo, Ao and Zhao, Yang and Cheng, Hong and Fu, Huazhu}, title = {MExD: An Expert-Infused Diffusion Model for Whole-Slide Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20789-20799} }
Let Samples Speak: Mitigating Spurious Correlation by Exploiting the Clusterness of Samples: Weiwei Li,

Junzhuo Liu,

Yuanyuan Ren,

Yuchen Zheng,

Yahao Liu,

Wen Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Weiwei and Liu, Junzhuo and Ren, Yuanyuan and Zheng, Yuchen and Liu, Yahao and Li, Wen}, title = {Let Samples Speak: Mitigating Spurious Correlation by Exploiting the Clusterness of Samples}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15486-15496} }
MoDec-GS: Global-to-Local Motion Decomposition and Temporal Interval Adjustment for Compact Dynamic 3D Gaussian Splatting: Sangwoon Kwak,

Joonsoo Kim,

Jun Young Jeong,

Won-Sik Cheong,

Jihyong Oh,

Munchurl Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kwak_2025_CVPR, author = {Kwak, Sangwoon and Kim, Joonsoo and Jeong, Jun Young and Cheong, Won-Sik and Oh, Jihyong and Kim, Munchurl}, title = {MoDec-GS: Global-to-Local Motion Decomposition and Temporal Interval Adjustment for Compact Dynamic 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11338-11348} }
DaCapo: Score Distillation as Stacked Bridge for Fast and High-quality 3D Editing: Yufei Huang,

Bangyan Liao,

Yuqi Hu,

Haitao Lin,

Lirong Wu,

Siyuan Li,

Cheng Tan,

Zicheng Liu,

Yunfan Liu,

Zelin Zang,

Chang Yu,

Zhen Lei; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yufei and Liao, Bangyan and Hu, Yuqi and Lin, Haitao and Wu, Lirong and Li, Siyuan and Tan, Cheng and Liu, Zicheng and Liu, Yunfan and Zang, Zelin and Yu, Chang and Lei, Zhen}, title = {DaCapo: Score Distillation as Stacked Bridge for Fast and High-quality 3D Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16304-16313} }
Number it: Temporal Grounding Videos like Flipping Manga: Yongliang Wu,

Xinting Hu,

Yuyang Sun,

Yizhou Zhou,

Wenbo Zhu,

Fengyun Rao,

Bernt Schiele,

Xu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Yongliang and Hu, Xinting and Sun, Yuyang and Zhou, Yizhou and Zhu, Wenbo and Rao, Fengyun and Schiele, Bernt and Yang, Xu}, title = {Number it: Temporal Grounding Videos like Flipping Manga}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13754-13765} }
SyncVP: Joint Diffusion for Synchronous Multi-Modal Video Prediction: Enrico Pallotta,

Sina Mokhtarzadeh Azar,

Shuai Li,

Olga Zatsarynna,

Juergen Gall; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pallotta_2025_CVPR, author = {Pallotta, Enrico and Azar, Sina Mokhtarzadeh and Li, Shuai and Zatsarynna, Olga and Gall, Juergen}, title = {SyncVP: Joint Diffusion for Synchronous Multi-Modal Video Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13787-13797} }
HUSH: Holistic Panoramic 3D Scene Understanding using Spherical Harmonics: Jongsung Lee,

Harin Park,

Byeong-Uk Lee,

Kyungdon Joo; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Jongsung and Park, Harin and Lee, Byeong-Uk and Joo, Kyungdon}, title = {HUSH: Holistic Panoramic 3D Scene Understanding using Spherical Harmonics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16599-16608} }
SkillMimic: Learning Basketball Interaction Skills from Demonstrations: Yinhuai Wang,

Qihan Zhao,

Runyi Yu,

Hok Wai Tsui,

Ailing Zeng,

Jing Lin,

Zhengyi Luo,

Jiwen Yu,

Xiu Li,

Qifeng Chen,

Jian Zhang,

Lei Zhang,

Ping Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yinhuai and Zhao, Qihan and Yu, Runyi and Tsui, Hok Wai and Zeng, Ailing and Lin, Jing and Luo, Zhengyi and Yu, Jiwen and Li, Xiu and Chen, Qifeng and Zhang, Jian and Zhang, Lei and Tan, Ping}, title = {SkillMimic: Learning Basketball Interaction Skills from Demonstrations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17540-17549} }
RGBAvatar: Reduced Gaussian Blendshapes for Online Modeling of Head Avatars: Linzhou Li,

Yumeng Li,

Yanlin Weng,

Youyi Zheng,

Kun Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Linzhou and Li, Yumeng and Weng, Yanlin and Zheng, Youyi and Zhou, Kun}, title = {RGBAvatar: Reduced Gaussian Blendshapes for Online Modeling of Head Avatars}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10747-10757} }
EEE-Bench: A Comprehensive Multimodal Electrical And Electronics Engineering Benchmark: Ming Li,

Jike Zhong,

Tianle Chen,

Yuxiang Lai,

Konstantinos Psounis; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Ming and Zhong, Jike and Chen, Tianle and Lai, Yuxiang and Psounis, Konstantinos}, title = {EEE-Bench: A Comprehensive Multimodal Electrical And Electronics Engineering Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13337-13349} }
A Unified Framework for Heterogeneous Semi-supervised Learning: Marzi Heidari,

Abdullah Alchihabi,

Hao Yan,

Yuhong Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Heidari_2025_CVPR, author = {Heidari, Marzi and Alchihabi, Abdullah and Yan, Hao and Guo, Yuhong}, title = {A Unified Framework for Heterogeneous Semi-supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15371-15380} }
Free360: Layered Gaussian Splatting for Unbounded 360-Degree View Synthesis from Extremely Sparse and Unposed Views: Chong Bao,

Xiyu Zhang,

Zehao Yu,

Jiale Shi,

Guofeng Zhang,

Songyou Peng,

Zhaopeng Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bao_2025_CVPR, author = {Bao, Chong and Zhang, Xiyu and Yu, Zehao and Shi, Jiale and Zhang, Guofeng and Peng, Songyou and Cui, Zhaopeng}, title = {Free360: Layered Gaussian Splatting for Unbounded 360-Degree View Synthesis from Extremely Sparse and Unposed Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16377-16387} }
Open Ad-hoc Categorization with Contextualized Feature Learning: Zilin Wang,

Sangwoo Mo,

Stella X. Yu,

Sima Behpour,

Liu Ren; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zilin and Mo, Sangwoo and Yu, Stella X. and Behpour, Sima and Ren, Liu}, title = {Open Ad-hoc Categorization with Contextualized Feature Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15108-15117} }
Dynamic Updates for Language Adaptation in Visual-Language Tracking: Xiaohai Li,

Bineng Zhong,

Qihua Liang,

Zhiyi Mo,

Jian Nong,

Shuxiang Song; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xiaohai and Zhong, Bineng and Liang, Qihua and Mo, Zhiyi and Nong, Jian and Song, Shuxiang}, title = {Dynamic Updates for Language Adaptation in Visual-Language Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19165-19174} }
Multi-focal Conditioned Latent Diffusion for Person Image Synthesis: Jiaqi Liu,

Jichao Zhang,

Paolo Rota,

Nicu Sebe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jiaqi and Zhang, Jichao and Rota, Paolo and Sebe, Nicu}, title = {Multi-focal Conditioned Latent Diffusion for Person Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16019-16028} }
Uncertainty Meets Diversity: A Comprehensive Active Learning Framework for Indoor 3D Object Detection: Jiangyi Wang,

Na Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jiangyi and Zhao, Na}, title = {Uncertainty Meets Diversity: A Comprehensive Active Learning Framework for Indoor 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20329-20339} }
Identity-Clothing Similarity Modeling for Unsupervised Clothing Change Person Re-Identification: Zhiqi Pang,

Junjie Wang,

Lingling Zhao,

Chunyu Wang; [pdf]
[bibtex]
@InProceedings{Pang_2025_CVPR, author = {Pang, Zhiqi and Wang, Junjie and Zhao, Lingling and Wang, Chunyu}, title = {Identity-Clothing Similarity Modeling for Unsupervised Clothing Change Person Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19251-19260} }
OccMamba: Semantic Occupancy Prediction with State Space Models: Heng Li,

Yuenan Hou,

Xiaohan Xing,

Yuexin Ma,

Xiao Sun,

Yanyong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Heng and Hou, Yuenan and Xing, Xiaohan and Ma, Yuexin and Sun, Xiao and Zhang, Yanyong}, title = {OccMamba: Semantic Occupancy Prediction with State Space Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11949-11959} }
Cheb-GR: Rethinking K-nearest Neighbor Search in Re-ranking for Person Re-identification: Jinxi Yang,

He Li,

Bo Du,

Mang Ye; [pdf]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jinxi and Li, He and Du, Bo and Ye, Mang}, title = {Cheb-GR: Rethinking K-nearest Neighbor Search in Re-ranking for Person Re-identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19261-19270} }
Spotting the Unexpected (STU): A 3D LiDAR Dataset for Anomaly Segmentation in Autonomous Driving: Alexey Nekrasov,

Malcolm Burdorf,

Stewart Worrall,

Bastian Leibe,

Julie Stephany Berrio Perez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nekrasov_2025_CVPR, author = {Nekrasov, Alexey and Burdorf, Malcolm and Worrall, Stewart and Leibe, Bastian and Perez, Julie Stephany Berrio}, title = {Spotting the Unexpected (STU): A 3D LiDAR Dataset for Anomaly Segmentation in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11875-11885} }
Is `Right' Right? Enhancing Object Orientation Understanding in Multimodal Large Language Models through Egocentric Instruction Tuning: Ji Hyeok Jung,

Eun Tae Kim,

Seoyeon Kim,

Joo Ho Lee,

Bumsoo Kim,

Buru Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_CVPR, author = {Jung, Ji Hyeok and Kim, Eun Tae and Kim, Seoyeon and Lee, Joo Ho and Kim, Bumsoo and Chang, Buru}, title = {Is `Right' Right? Enhancing Object Orientation Understanding in Multimodal Large Language Models through Egocentric Instruction Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14257-14267} }
GCC: Generative Color Constancy via Diffusing a Color Checker: Chen-Wei Chang,

Cheng-De Fan,

Chia-Che Chang,

Yi-Chen Lo,

Yu-Chee Tseng,

Jiun-Long Huang,

Yu-Lun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chang_2025_CVPR, author = {Chang, Chen-Wei and Fan, Cheng-De and Chang, Chia-Che and Lo, Yi-Chen and Tseng, Yu-Chee and Huang, Jiun-Long and Liu, Yu-Lun}, title = {GCC: Generative Color Constancy via Diffusing a Color Checker}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10868-10878} }
On Denoising Walking Videos for Gait Recognition: Dongyang Jin,

Chao Fan,

Jingzhe Ma,

Jingkai Zhou,

Weihua Chen,

Shiqi Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Dongyang and Fan, Chao and Ma, Jingzhe and Zhou, Jingkai and Chen, Weihua and Yu, Shiqi}, title = {On Denoising Walking Videos for Gait Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12347-12357} }
Conformal Prediction for Zero-Shot Models: Julio Silva-Rodríguez,

Ismail Ben Ayed,

Jose Dolz; [pdf] [supp]
[bibtex]
@InProceedings{Silva-Rodriguez_2025_CVPR, author = {Silva-Rodr{\'\i}guez, Julio and Ben Ayed, Ismail and Dolz, Jose}, title = {Conformal Prediction for Zero-Shot Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19931-19941} }
PhysAnimator: Physics-Guided Generative Cartoon Animation: Tianyi Xie,

Yiwei Zhao,

Ying Jiang,

Chenfanfu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Tianyi and Zhao, Yiwei and Jiang, Ying and Jiang, Chenfanfu}, title = {PhysAnimator: Physics-Guided Generative Cartoon Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10793-10804} }
FIMA-Q: Post-Training Quantization for Vision Transformers by Fisher Information Matrix Approximation: Zhuguanyu Wu,

Shihe Wang,

Jiayi Zhang,

Jiaxin Chen,

Yunhong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Zhuguanyu and Wang, Shihe and Zhang, Jiayi and Chen, Jiaxin and Wang, Yunhong}, title = {FIMA-Q: Post-Training Quantization for Vision Transformers by Fisher Information Matrix Approximation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14891-14900} }
BACON: Improving Clarity of Image Captions via Bag-of-Concept Graphs: Zhantao Yang,

Ruili Feng,

Keyu Yan,

Huangji Wang,

Zhicai Wang,

Shangwen Zhu,

Han Zhang,

Jie Xiao,

Pingyu Wu,

Kai Zhu,

Jixuan Chen,

Chen-Wei Xie,

Yue Yang,

Hongyang Zhang,

Yu Liu,

Fan Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Zhantao and Feng, Ruili and Yan, Keyu and Wang, Huangji and Wang, Zhicai and Zhu, Shangwen and Zhang, Han and Xiao, Jie and Wu, Pingyu and Zhu, Kai and Chen, Jixuan and Xie, Chen-Wei and Yang, Yue and Zhang, Hongyang and Liu, Yu and Cheng, Fan}, title = {BACON: Improving Clarity of Image Captions via Bag-of-Concept Graphs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14380-14389} }
VasTSD: Learning 3D Vascular Tree-state Space Diffusion Model for Angiography Synthesis: Zhifeng Wang,

Renjiao Yi,

Xin Wen,

Chenyang Zhu,

Kai Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhifeng and Yi, Renjiao and Wen, Xin and Zhu, Chenyang and Xu, Kai}, title = {VasTSD: Learning 3D Vascular Tree-state Space Diffusion Model for Angiography Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15693-15702} }
PanSplat: 4K Panorama Synthesis with Feed-Forward Gaussian Splatting: Cheng Zhang,

Haofei Xu,

Qianyi Wu,

Camilo Cruz Gambardella,

Dinh Phung,

Jianfei Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Cheng and Xu, Haofei and Wu, Qianyi and Gambardella, Camilo Cruz and Phung, Dinh and Cai, Jianfei}, title = {PanSplat: 4K Panorama Synthesis with Feed-Forward Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11437-11447} }
WISNet: Pseudo Label Generation on Unbalanced and Patch Annotated Waste Images: Shifan Zhang,

Hongzi Zhu,

Yinan He,

Minyi Guo,

Ziyang Lou,

Shan Chang; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shifan and Zhu, Hongzi and He, Yinan and Guo, Minyi and Lou, Ziyang and Chang, Shan}, title = {WISNet: Pseudo Label Generation on Unbalanced and Patch Annotated Waste Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15076-15085} }
MixerMDM: Learnable Composition of Human Motion Diffusion Models: Pablo Ruiz-Ponce,

German Barquero,

Cristina Palmero,

Sergio Escalera,

José García-Rodríguez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ruiz-Ponce_2025_CVPR, author = {Ruiz-Ponce, Pablo and Barquero, German and Palmero, Cristina and Escalera, Sergio and Garc{\'\i}a-Rodr{\'\i}guez, Jos\'e}, title = {MixerMDM: Learnable Composition of Human Motion Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12380-12390} }
Hand-held Object Reconstruction from RGB Video with Dynamic Interaction: Shijian Jiang,

Qi Ye,

Rengan Xie,

Yuchi Huo,

Jiming Chen; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Shijian and Ye, Qi and Xie, Rengan and Huo, Yuchi and Chen, Jiming}, title = {Hand-held Object Reconstruction from RGB Video with Dynamic Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12220-12230} }
AudCast: Audio-Driven Human Video Generation by Cascaded Diffusion Transformers: Jiazhi Guan,

Kaisiyuan Wang,

Zhiliang Xu,

Quanwei Yang,

Yasheng Sun,

Shengyi He,

Borong Liang,

Yukang Cao,

Yingying Li,

Haocheng Feng,

Errui Ding,

Jingdong Wang,

Youjian Zhao,

Hang Zhou,

Ziwei Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Guan_2025_CVPR, author = {Guan, Jiazhi and Wang, Kaisiyuan and Xu, Zhiliang and Yang, Quanwei and Sun, Yasheng and He, Shengyi and Liang, Borong and Cao, Yukang and Li, Yingying and Feng, Haocheng and Ding, Errui and Wang, Jingdong and Zhao, Youjian and Zhou, Hang and Liu, Ziwei}, title = {AudCast: Audio-Driven Human Video Generation by Cascaded Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10678-10689} }
Thinking in Space: How Multimodal Large Language Models See, Remember, and Recall Spaces: Jihan Yang,

Shusheng Yang,

Anjali W. Gupta,

Rilyn Han,

Li Fei-Fei,

Saining Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jihan and Yang, Shusheng and Gupta, Anjali W. and Han, Rilyn and Fei-Fei, Li and Xie, Saining}, title = {Thinking in Space: How Multimodal Large Language Models See, Remember, and Recall Spaces}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10632-10643} }
A Stitch in Time Saves Nine: Small VLM is a Precise Guidance for Accelerating Large VLMs: Wangbo Zhao,

Yizeng Han,

Jiasheng Tang,

Zhikai Li,

Yibing Song,

Kai Wang,

Zhangyang Wang,

Yang You; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Wangbo and Han, Yizeng and Tang, Jiasheng and Li, Zhikai and Song, Yibing and Wang, Kai and Wang, Zhangyang and You, Yang}, title = {A Stitch in Time Saves Nine: Small VLM is a Precise Guidance for Accelerating Large VLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19814-19824} }
SemanticDraw: Towards Real-Time Interactive Content Creation from Image Diffusion Models: Jaerin Lee,

Daniel Sungho Jung,

Kanggeon Lee,

Kyoung Mu Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Jaerin and Jung, Daniel Sungho and Lee, Kanggeon and Lee, Kyoung Mu}, title = {SemanticDraw: Towards Real-Time Interactive Content Creation from Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13021-13030} }
Arc2Avatar: Generating Expressive 3D Avatars from a Single Image via ID Guidance: Dimitrios Gerogiannis,

Foivos Paraperas Papantoniou,

Rolandos Alexandros Potamias,

Alexandros Lattas,

Stefanos Zafeiriou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gerogiannis_2025_CVPR, author = {Gerogiannis, Dimitrios and Papantoniou, Foivos Paraperas and Potamias, Rolandos Alexandros and Lattas, Alexandros and Zafeiriou, Stefanos}, title = {Arc2Avatar: Generating Expressive 3D Avatars from a Single Image via ID Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10770-10782} }
Seeing Speech and Sound: Distinguishing and Locating Audio Sources in Visual Scenes: Hyeonggon Ryu,

Seongyu Kim,

Joon Son Chung,

Arda Senocak; [pdf] [supp]
[bibtex]
@InProceedings{Ryu_2025_CVPR, author = {Ryu, Hyeonggon and Kim, Seongyu and Chung, Joon Son and Senocak, Arda}, title = {Seeing Speech and Sound: Distinguishing and Locating Audio Sources in Visual Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13540-13549} }
Structure from Collision: Takuhiro Kaneko; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kaneko_2025_CVPR, author = {Kaneko, Takuhiro}, title = {Structure from Collision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16314-16324} }
Crab: A Unified Audio-Visual Scene Understanding Model with Explicit Cooperation: Henghui Du,

Guangyao Li,

Chang Zhou,

Chunjie Zhang,

Alan Zhao,

Di Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Henghui and Li, Guangyao and Zhou, Chang and Zhang, Chunjie and Zhao, Alan and Hu, Di}, title = {Crab: A Unified Audio-Visual Scene Understanding Model with Explicit Cooperation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18804-18814} }
Nullu: Mitigating Object Hallucinations in Large Vision-Language Models via HalluSpace Projection: Le Yang,

Ziwei Zheng,

Boxu Chen,

Zhengyu Zhao,

Chenhao Lin,

Chao Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Le and Zheng, Ziwei and Chen, Boxu and Zhao, Zhengyu and Lin, Chenhao and Shen, Chao}, title = {Nullu: Mitigating Object Hallucinations in Large Vision-Language Models via HalluSpace Projection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14635-14645} }
OralXrays-9: Towards Hospital-Scale Panoramic X-ray Anomaly Detection via Personalized Multi-Object Query-Aware Mining: Bingzhi Chen,

Sisi Fu,

Xiaocheng Fang,

Jieyi Cai,

Boya Zhang,

Minhua Lu,

Yishu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Bingzhi and Fu, Sisi and Fang, Xiaocheng and Cai, Jieyi and Zhang, Boya and Lu, Minhua and Liu, Yishu}, title = {OralXrays-9: Towards Hospital-Scale Panoramic X-ray Anomaly Detection via Personalized Multi-Object Query-Aware Mining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15570-15579} }
SplatAD: Real-Time Lidar and Camera Rendering with 3D Gaussian Splatting for Autonomous Driving: Georg Hess,

Carl Lindström,

Maryam Fatemi,

Christoffer Petersson,

Lennart Svensson; [pdf] [supp]
[bibtex]
@InProceedings{Hess_2025_CVPR, author = {Hess, Georg and Lindstr\"om, Carl and Fatemi, Maryam and Petersson, Christoffer and Svensson, Lennart}, title = {SplatAD: Real-Time Lidar and Camera Rendering with 3D Gaussian Splatting for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11982-11992} }
Audio-Visual Instance Segmentation: Ruohao Guo,

Xianghua Ying,

Yaru Chen,

Dantong Niu,

Guangyao Li,

Liao Qu,

Yanyu Qi,

Jinxing Zhou,

Bowei Xing,

Wenzhen Yue,

Ji Shi,

Qixun Wang,

Peiliang Zhang,

Buwen Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Ruohao and Ying, Xianghua and Chen, Yaru and Niu, Dantong and Li, Guangyao and Qu, Liao and Qi, Yanyu and Zhou, Jinxing and Xing, Bowei and Yue, Wenzhen and Shi, Ji and Wang, Qixun and Zhang, Peiliang and Liang, Buwen}, title = {Audio-Visual Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13550-13560} }
UniHOPE: A Unified Approach for Hand-Only and Hand-Object Pose Estimation: Yinqiao Wang,

Hao Xu,

Pheng-Ann Heng,

Chi-Wing Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yinqiao and Xu, Hao and Heng, Pheng-Ann and Fu, Chi-Wing}, title = {UniHOPE: A Unified Approach for Hand-Only and Hand-Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12231-12241} }
RL-RC-DoT: A Block-level RL agent for Task-Aware Video Compression: Uri Gadot,

Assaf Shocher,

Shie Mannor,

Gal Chechik,

Assaf Hallak; [pdf] [supp]
[bibtex]
@InProceedings{Gadot_2025_CVPR, author = {Gadot, Uri and Shocher, Assaf and Mannor, Shie and Chechik, Gal and Hallak, Assaf}, title = {RL-RC-DoT: A Block-level RL agent for Task-Aware Video Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12533-12542} }
Recognition-Synergistic Scene Text Editing: Zhengyao Fang,

Pengyuan Lyu,

Jingjing Wu,

Chengquan Zhang,

Jun Yu,

Guangming Lu,

Wenjie Pei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Zhengyao and Lyu, Pengyuan and Wu, Jingjing and Zhang, Chengquan and Yu, Jun and Lu, Guangming and Pei, Wenjie}, title = {Recognition-Synergistic Scene Text Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13104-13113} }
WildAvatar: Learning In-the-wild 3D Avatars from the Web: Zihao Huang,

Shoukang Hu,

Guangcong Wang,

Tianqi Liu,

Yuhang Zang,

Zhiguo Cao,

Wei Li,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zihao and Hu, Shoukang and Wang, Guangcong and Liu, Tianqi and Zang, Yuhang and Cao, Zhiguo and Li, Wei and Liu, Ziwei}, title = {WildAvatar: Learning In-the-wild 3D Avatars from the Web}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15963-15975} }
Rectified Diffusion Guidance for Conditional Generation: Mengfei Xia,

Nan Xue,

Yujun Shen,

Ran Yi,

Tieliang Gong,

Yong-Jin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Mengfei and Xue, Nan and Shen, Yujun and Yi, Ran and Gong, Tieliang and Liu, Yong-Jin}, title = {Rectified Diffusion Guidance for Conditional Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13371-13380} }
IAAO: Interactive Affordance Learning for Articulated Objects in 3D Environments: Can Zhang,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Can and Lee, Gim Hee}, title = {IAAO: Interactive Affordance Learning for Articulated Objects in 3D Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12132-12142} }
RaSS: Improving Denoising Diffusion Samplers with Reinforced Active Sampling Scheduler: Xin Ding,

Lei Yu,

Xin Li,

Zhijun Tu,

Hanting Chen,

Jie Hu,

Zhibo Chen; [pdf] [supp]
[bibtex]
@InProceedings{Ding_2025_CVPR, author = {Ding, Xin and Yu, Lei and Li, Xin and Tu, Zhijun and Chen, Hanting and Hu, Jie and Chen, Zhibo}, title = {RaSS: Improving Denoising Diffusion Samplers with Reinforced Active Sampling Scheduler}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12923-12933} }
OSV: One Step is Enough for High-Quality Image to Video Generation: Xiaofeng Mao,

Zhengkai Jiang,

Fu-yun Wang,

Jiangning Zhang,

Hao Chen,

Mingmin Chi,

Yabiao Wang,

Wenhan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2025_CVPR, author = {Mao, Xiaofeng and Jiang, Zhengkai and Wang, Fu-yun and Zhang, Jiangning and Chen, Hao and Chi, Mingmin and Wang, Yabiao and Luo, Wenhan}, title = {OSV: One Step is Enough for High-Quality Image to Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12585-12594} }
Fuzzy Multimodal Learning for Trusted Cross-modal Retrieval: Siyuan Duan,

Yuan Sun,

Dezhong Peng,

Zheng Liu,

Xiaomin Song,

Peng Hu; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2025_CVPR, author = {Duan, Siyuan and Sun, Yuan and Peng, Dezhong and Liu, Zheng and Song, Xiaomin and Hu, Peng}, title = {Fuzzy Multimodal Learning for Trusted Cross-modal Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20747-20756} }
GUI-Xplore: Empowering Generalizable GUI Agents with One Exploration: Yuchen Sun,

Shanhui Zhao,

Tao Yu,

Hao Wen,

Samith Va,

Mengwei Xu,

Yuanchun Li,

Chongyang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Yuchen and Zhao, Shanhui and Yu, Tao and Wen, Hao and Va, Samith and Xu, Mengwei and Li, Yuanchun and Zhang, Chongyang}, title = {GUI-Xplore: Empowering Generalizable GUI Agents with One Exploration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19477-19486} }
Few-Shot Recognition via Stage-Wise Retrieval-Augmented Finetuning: Tian Liu,

Huixin Zhang,

Shubham Parashar,

Shu Kong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Tian and Zhang, Huixin and Parashar, Shubham and Kong, Shu}, title = {Few-Shot Recognition via Stage-Wise Retrieval-Augmented Finetuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15086-15097} }
RestorGS: Depth-aware Gaussian Splatting for Efficient 3D Scene Restoration: Yuanjian Qiao,

Mingwen Shao,

Lingzhuang Meng,

Kai Xu; [pdf] [supp]
[bibtex]
@InProceedings{Qiao_2025_CVPR, author = {Qiao, Yuanjian and Shao, Mingwen and Meng, Lingzhuang and Xu, Kai}, title = {RestorGS: Depth-aware Gaussian Splatting for Efficient 3D Scene Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11177-11186} }
4Real-Video: Learning Generalizable Photo-Realistic 4D Video Diffusion: Chaoyang Wang,

Peiye Zhuang,

Tuan Duc Ngo,

Willi Menapace,

Aliaksandr Siarohin,

Michael Vasilkovsky,

Ivan Skorokhodov,

Sergey Tulyakov,

Peter Wonka,

Hsin-Ying Lee; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Chaoyang and Zhuang, Peiye and Ngo, Tuan Duc and Menapace, Willi and Siarohin, Aliaksandr and Vasilkovsky, Michael and Skorokhodov, Ivan and Tulyakov, Sergey and Wonka, Peter and Lee, Hsin-Ying}, title = {4Real-Video: Learning Generalizable Photo-Realistic 4D Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17723-17732} }
Z-Magic: Zero-shot Multiple Attributes Guided Image Creator: Yingying Deng,

Xiangyu He,

Fan Tang,

Weiming Dong; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Yingying and He, Xiangyu and Tang, Fan and Dong, Weiming}, title = {Z-Magic: Zero-shot Multiple Attributes Guided Image Creator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18390-18400} }
On the Zero-shot Adversarial Robustness of Vision-Language Models: A Truly Zero-shot and Training-free Approach: Baoshun Tong,

Hanjiang Lai,

Yan Pan,

Jian Yin; [pdf]
[bibtex]
@InProceedings{Tong_2025_CVPR, author = {Tong, Baoshun and Lai, Hanjiang and Pan, Yan and Yin, Jian}, title = {On the Zero-shot Adversarial Robustness of Vision-Language Models: A Truly Zero-shot and Training-free Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19921-19930} }
Towards General Visual-Linguistic Face Forgery Detection: Ke Sun,

Shen Chen,

Taiping Yao,

Ziyin Zhou,

Jiayi Ji,

Xiaoshuai Sun,

Chia-Wen Lin,

Rongrong Ji; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Ke and Chen, Shen and Yao, Taiping and Zhou, Ziyin and Ji, Jiayi and Sun, Xiaoshuai and Lin, Chia-Wen and Ji, Rongrong}, title = {Towards General Visual-Linguistic Face Forgery Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19576-19586} }
Movie Weaver: Tuning-Free Multi-Concept Video Personalization with Anchored Prompts: Feng Liang,

Haoyu Ma,

Zecheng He,

Tingbo Hou,

Ji Hou,

Kunpeng Li,

Xiaoliang Dai,

Felix Juefei-Xu,

Samaneh Azadi,

Animesh Sinha,

Peizhao Zhang,

Peter Vajda,

Diana Marculescu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Feng and Ma, Haoyu and He, Zecheng and Hou, Tingbo and Hou, Ji and Li, Kunpeng and Dai, Xiaoliang and Juefei-Xu, Felix and Azadi, Samaneh and Sinha, Animesh and Zhang, Peizhao and Vajda, Peter and Marculescu, Diana}, title = {Movie Weaver: Tuning-Free Multi-Concept Video Personalization with Anchored Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13146-13156} }
LongVALE: Vision-Audio-Language-Event Benchmark Towards Time-Aware Omni-Modal Perception of Long Videos: Tiantian Geng,

Jinrui Zhang,

Qingni Wang,

Teng Wang,

Jinming Duan,

Feng Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Geng_2025_CVPR, author = {Geng, Tiantian and Zhang, Jinrui and Wang, Qingni and Wang, Teng and Duan, Jinming and Zheng, Feng}, title = {LongVALE: Vision-Audio-Language-Event Benchmark Towards Time-Aware Omni-Modal Perception of Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18959-18969} }
Mitigating Hallucinations in Large Vision-Language Models via DPO: On-Policy Data Hold the Key: Zhihe Yang,

Xufang Luo,

Dongqi Han,

Yunjian Xu,

Dongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Zhihe and Luo, Xufang and Han, Dongqi and Xu, Yunjian and Li, Dongsheng}, title = {Mitigating Hallucinations in Large Vision-Language Models via DPO: On-Policy Data Hold the Key}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10610-10620} }
Simpler Diffusion: 1.5 FID on ImageNet512 with Pixel-space Diffusion: Emiel Hoogeboom,

Thomas Mensink,

Jonathan Heek,

Kay Lamerigts,

Ruiqi Gao,

Tim Salimans; [pdf] [supp]
[bibtex]
@InProceedings{Hoogeboom_2025_CVPR, author = {Hoogeboom, Emiel and Mensink, Thomas and Heek, Jonathan and Lamerigts, Kay and Gao, Ruiqi and Salimans, Tim}, title = {Simpler Diffusion: 1.5 FID on ImageNet512 with Pixel-space Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18062-18071} }
STING-BEE: Towards Vision-Language Model for Real-World X-ray Baggage Security Inspection: Divya Velayudhan,

Abdelfatah Ahmed,

Mohamad Alansari,

Neha Gour,

Abderaouf Behouch,

Taimur Hassan,

Syed Talal Wasim,

Nabil Maalej,

Muzammal Naseer,

Juergen Gall,

Mohammed Bennamoun,

Ernesto Damiani,

Naoufel Werghi; [pdf] [supp]
[bibtex]
@InProceedings{Velayudhan_2025_CVPR, author = {Velayudhan, Divya and Ahmed, Abdelfatah and Alansari, Mohamad and Gour, Neha and Behouch, Abderaouf and Hassan, Taimur and Wasim, Syed Talal and Maalej, Nabil and Naseer, Muzammal and Gall, Juergen and Bennamoun, Mohammed and Damiani, Ernesto and Werghi, Naoufel}, title = {STING-BEE: Towards Vision-Language Model for Real-World X-ray Baggage Security Inspection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20767-20777} }
Not All Parameters Matter: Masking Diffusion Models for Enhancing Generation Ability: Lei Wang,

Senmao Li,

Fei Yang,

Jianye Wang,

Ziheng Zhang,

Yuhan Liu,

Yaxing Wang,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Lei and Li, Senmao and Yang, Fei and Wang, Jianye and Zhang, Ziheng and Liu, Yuhan and Wang, Yaxing and Yang, Jian}, title = {Not All Parameters Matter: Masking Diffusion Models for Enhancing Generation Ability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12880-12890} }
Complexity Experts are Task-Discriminative Learners for Any Image Restoration: Eduard Zamfir,

Zongwei Wu,

Nancy Mehta,

Yuedong Tan,

Danda Pani Paudel,

Yulun Zhang,

Radu Timofte; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zamfir_2025_CVPR, author = {Zamfir, Eduard and Wu, Zongwei and Mehta, Nancy and Tan, Yuedong and Paudel, Danda Pani and Zhang, Yulun and Timofte, Radu}, title = {Complexity Experts are Task-Discriminative Learners for Any Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12753-12763} }
Generative Omnimatte: Learning to Decompose Video into Layers: Yao-Chih Lee,

Erika Lu,

Sarah Rumbley,

Michal Geyer,

Jia-Bin Huang,

Tali Dekel,

Forrester Cole; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Yao-Chih and Lu, Erika and Rumbley, Sarah and Geyer, Michal and Huang, Jia-Bin and Dekel, Tali and Cole, Forrester}, title = {Generative Omnimatte: Learning to Decompose Video into Layers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12522-12532} }
5%>100%: Breaking Performance Shackles of Full Fine-Tuning on Visual Recognition Tasks: Dongshuo Yin,

Leiyi Hu,

Bin Li,

Youqun Zhang,

Xue Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Dongshuo and Hu, Leiyi and Li, Bin and Zhang, Youqun and Yang, Xue}, title = {5\%\ensuremath{>}100\%: Breaking Performance Shackles of Full Fine-Tuning on Visual Recognition Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20071-20081} }
Real-IAD D3: A Real-World 2D/Pseudo-3D/3D Dataset for Industrial Anomaly Detection: Wenbing Zhu,

Lidong Wang,

Ziqing Zhou,

Chengjie Wang,

Yurui Pan,

Ruoyi Zhang,

Zhuhao Chen,

Linjie Cheng,

Bin-Bin Gao,

Jiangning Zhang,

Zhenye Gan,

Yuxie Wang,

Yulong Chen,

Shuguang Qian,

Mingmin Chi,

Bo Peng,

Lizhuang Ma; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Wenbing and Wang, Lidong and Zhou, Ziqing and Wang, Chengjie and Pan, Yurui and Zhang, Ruoyi and Chen, Zhuhao and Cheng, Linjie and Gao, Bin-Bin and Zhang, Jiangning and Gan, Zhenye and Wang, Yuxie and Chen, Yulong and Qian, Shuguang and Chi, Mingmin and Peng, Bo and Ma, Lizhuang}, title = {Real-IAD D3: A Real-World 2D/Pseudo-3D/3D Dataset for Industrial Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15214-15223} }
ATP: Adaptive Threshold Pruning for Efficient Data Encoding in Quantum Neural Networks: Mohamed Afane,

Gabrielle Ebbrecht,

Ying Wang,

Juntao Chen,

Junaid Farooq; [pdf] [arXiv]
[bibtex]
@InProceedings{Afane_2025_CVPR, author = {Afane, Mohamed and Ebbrecht, Gabrielle and Wang, Ying and Chen, Juntao and Farooq, Junaid}, title = {ATP: Adaptive Threshold Pruning for Efficient Data Encoding in Quantum Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20427-20436} }
Decoupled Motion Expression Video Segmentation: Hao Fang,

Runmin Cong,

Xiankai Lu,

Xiaofei Zhou,

Sam Kwong,

Wei Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Hao and Cong, Runmin and Lu, Xiankai and Zhou, Xiaofei and Kwong, Sam and Zhang, Wei}, title = {Decoupled Motion Expression Video Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13821-13831} }
K-LoRA: Unlocking Training-Free Fusion of Any Subject and Style LoRAs: Ziheng Ouyang,

Zhen Li,

Qibin Hou; [pdf] [supp]
[bibtex]
@InProceedings{Ouyang_2025_CVPR, author = {Ouyang, Ziheng and Li, Zhen and Hou, Qibin}, title = {K-LoRA: Unlocking Training-Free Fusion of Any Subject and Style LoRAs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13041-13050} }
WF-VAE: Enhancing Video VAE by Wavelet-Driven Energy Flow for Latent Video Diffusion Model: Zongjian Li,

Bin Lin,

Yang Ye,

Liuhan Chen,

Xinhua Cheng,

Shenghai Yuan,

Li Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zongjian and Lin, Bin and Ye, Yang and Chen, Liuhan and Cheng, Xinhua and Yuan, Shenghai and Yuan, Li}, title = {WF-VAE: Enhancing Video VAE by Wavelet-Driven Energy Flow for Latent Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17778-17788} }
XLRS-Bench: Could Your Multimodal LLMs Understand Extremely Large Ultra-High-Resolution Remote Sensing Imagery?: Fengxiang Wang,

Hongzhen Wang,

Zonghao Guo,

Di Wang,

Yulin Wang,

Mingshuo Chen,

Qiang Ma,

Long Lan,

Wenjing Yang,

Jing Zhang,

Zhiyuan Liu,

Maosong Sun; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Fengxiang and Wang, Hongzhen and Guo, Zonghao and Wang, Di and Wang, Yulin and Chen, Mingshuo and Ma, Qiang and Lan, Long and Yang, Wenjing and Zhang, Jing and Liu, Zhiyuan and Sun, Maosong}, title = {XLRS-Bench: Could Your Multimodal LLMs Understand Extremely Large Ultra-High-Resolution Remote Sensing Imagery?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14325-14336} }
Efficient Data Driven Mixture-of-Expert Extraction from Trained Networks: Uranik Berisha,

Jens Mehnert,

Alexandru Paul Condurache; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Berisha_2025_CVPR, author = {Berisha, Uranik and Mehnert, Jens and Condurache, Alexandru Paul}, title = {Efficient Data Driven Mixture-of-Expert Extraction from Trained Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20082-20091} }
StyleSSP: Sampling StartPoint Enhancement for Training-free Diffusion-based Method for Style Transfer: Ruojun Xu,

Weijie Xi,

XiaoDi Wang,

Yongbo Mao,

Zach Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Ruojun and Xi, Weijie and Wang, XiaoDi and Mao, Yongbo and Cheng, Zach}, title = {StyleSSP: Sampling StartPoint Enhancement for Training-free Diffusion-based Method for Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18260-18269} }
Motions as Queries: One-Stage Multi-Person Holistic Human Motion Capture: Kenkun Liu,

Yurong Fu,

Weihao Yuan,

Jing Lin,

Peihao Li,

Xiaodong Gu,

Lingteng Qiu,

Haoqian Wang,

Zilong Dong,

Xiaoguang Han; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Kenkun and Fu, Yurong and Yuan, Weihao and Lin, Jing and Li, Peihao and Gu, Xiaodong and Qiu, Lingteng and Wang, Haoqian and Dong, Zilong and Han, Xiaoguang}, title = {Motions as Queries: One-Stage Multi-Person Holistic Human Motion Capture}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17529-17539} }
AMO Sampler: Enhancing Text Rendering with Overshooting: Xixi Hu,

Keyang Xu,

Bo Liu,

Qiang Liu,

Hongliang Fei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Xixi and Xu, Keyang and Liu, Bo and Liu, Qiang and Fei, Hongliang}, title = {AMO Sampler: Enhancing Text Rendering with Overshooting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13157-13166} }
ImViD: Immersive Volumetric Videos for Enhanced VR Engagement: Zhengxian Yang,

Shi Pan,

Shengqi Wang,

Haoxiang Wang,

Li Lin,

Guanjun Li,

Zhengqi Wen,

Borong Lin,

Jianhua Tao,

Tao Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Zhengxian and Pan, Shi and Wang, Shengqi and Wang, Haoxiang and Lin, Li and Li, Guanjun and Wen, Zhengqi and Lin, Borong and Tao, Jianhua and Yu, Tao}, title = {ImViD: Immersive Volumetric Videos for Enhanced VR Engagement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16554-16564} }
I2VGuard: Safeguarding Images against Misuse in Diffusion-based Image-to-Video Models: Dongnan Gui,

Xun Guo,

Wengang Zhou,

Yan Lu; [pdf] [supp]
[bibtex]
@InProceedings{Gui_2025_CVPR, author = {Gui, Dongnan and Guo, Xun and Zhou, Wengang and Lu, Yan}, title = {I2VGuard: Safeguarding Images against Misuse in Diffusion-based Image-to-Video Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12595-12604} }
Saliuitl: Ensemble Salience Guided Recovery of Adversarial Patches against CNNs: Mauricio Byrd Victorica,

György Dán,

Henrik Sandberg; [pdf] [supp]
[bibtex]
@InProceedings{Victorica_2025_CVPR, author = {Victorica, Mauricio Byrd and D\'an, Gy\"orgy and Sandberg, Henrik}, title = {Saliuitl: Ensemble Salience Guided Recovery of Adversarial Patches against CNNs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20360-20369} }
OPTICAL: Leveraging Optimal Transport for Contribution Allocation in Dataset Distillation: Xiao Cui,

Yulei Qin,

Wengang Zhou,

Hongsheng Li,

Houqiang Li; [pdf]
[bibtex]
@InProceedings{Cui_2025_CVPR, author = {Cui, Xiao and Qin, Yulei and Zhou, Wengang and Li, Hongsheng and Li, Houqiang}, title = {OPTICAL: Leveraging Optimal Transport for Contribution Allocation in Dataset Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15245-15254} }
Show and Segment: Universal Medical Image Segmentation via In-Context Learning: Yunhe Gao,

Di Liu,

Zhuowei Li,

Yunsheng Li,

Dongdong Chen,

Mu Zhou,

Dimitris N. Metaxas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Yunhe and Liu, Di and Li, Zhuowei and Li, Yunsheng and Chen, Dongdong and Zhou, Mu and Metaxas, Dimitris N.}, title = {Show and Segment: Universal Medical Image Segmentation via In-Context Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20830-20840} }
CADCrafter: Generating Computer-Aided Design Models from Unconstrained Images: Cheng Chen,

Jiacheng Wei,

Tianrun Chen,

Chi Zhang,

Xiaofeng Yang,

Shangzhan Zhang,

Bingchen Yang,

Chuan-Sheng Foo,

Guosheng Lin,

Qixing Huang,

Fayao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Cheng and Wei, Jiacheng and Chen, Tianrun and Zhang, Chi and Yang, Xiaofeng and Zhang, Shangzhan and Yang, Bingchen and Foo, Chuan-Sheng and Lin, Guosheng and Huang, Qixing and Liu, Fayao}, title = {CADCrafter: Generating Computer-Aided Design Models from Unconstrained Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11073-11082} }
Generative Multiview Relighting for 3D Reconstruction under Extreme Illumination Variation: Hadi Alzayer,

Philipp Henzler,

Jonathan T. Barron,

Jia-Bin Huang,

Pratul P. Srinivasan,

Dor Verbin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alzayer_2025_CVPR, author = {Alzayer, Hadi and Henzler, Philipp and Barron, Jonathan T. and Huang, Jia-Bin and Srinivasan, Pratul P. and Verbin, Dor}, title = {Generative Multiview Relighting for 3D Reconstruction under Extreme Illumination Variation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10933-10942} }
DyMO: Training-Free Diffusion Model Alignment with Dynamic Multi-Objective Scheduling: Xin Xie,

Dong Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Xin and Gong, Dong}, title = {DyMO: Training-Free Diffusion Model Alignment with Dynamic Multi-Objective Scheduling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13220-13230} }
GENIUS: A Generative Framework for Universal Multimodal Search: Sungyeon Kim,

Xinliang Zhu,

Xiaofan Lin,

Muhammet Bastan,

Douglas Gray,

Suha Kwak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Sungyeon and Zhu, Xinliang and Lin, Xiaofan and Bastan, Muhammet and Gray, Douglas and Kwak, Suha}, title = {GENIUS: A Generative Framework for Universal Multimodal Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19659-19669} }
SF3D: Stable Fast 3D Mesh Reconstruction with UV-unwrapping and Illumination Disentanglement: Mark Boss,

Zixuan Huang,

Aaryaman Vasishta,

Varun Jampani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Boss_2025_CVPR, author = {Boss, Mark and Huang, Zixuan and Vasishta, Aaryaman and Jampani, Varun}, title = {SF3D: Stable Fast 3D Mesh Reconstruction with UV-unwrapping and Illumination Disentanglement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16240-16250} }
Towards Precise Embodied Dialogue Localization via Causality Guided Diffusion: Haoyu Wang,

Le Wang,

Sanping Zhou,

Jingyi Tian,

Zheng Qin,

Yabing Wang,

Gang Hua,

Wei Tang; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Haoyu and Wang, Le and Zhou, Sanping and Tian, Jingyi and Qin, Zheng and Wang, Yabing and Hua, Gang and Tang, Wei}, title = {Towards Precise Embodied Dialogue Localization via Causality Guided Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13350-13360} }
EfficientViM: Efficient Vision Mamba with Hidden State Mixer based State Space Duality: Sanghyeok Lee,

Joonmyung Choi,

Hyunwoo J. Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Sanghyeok and Choi, Joonmyung and Kim, Hyunwoo J.}, title = {EfficientViM: Efficient Vision Mamba with Hidden State Mixer based State Space Duality}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14923-14933} }
A4A: Adapter for Adapter Transfer via All-for-All Mapping for Cross-Architecture Models: Keyu Tu,

Mengqi Huang,

Zhuowei Chen,

Zhendong Mao; [pdf] [supp]
[bibtex]
@InProceedings{Tu_2025_CVPR, author = {Tu, Keyu and Huang, Mengqi and Chen, Zhuowei and Mao, Zhendong}, title = {A4A: Adapter for Adapter Transfer via All-for-All Mapping for Cross-Architecture Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18476-18485} }
ViCaS: A Dataset for Combining Holistic and Pixel-level Video Understanding using Captions with Grounded Segmentation: Ali Athar,

Xueqing Deng,

Liang-Chieh Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Athar_2025_CVPR, author = {Athar, Ali and Deng, Xueqing and Chen, Liang-Chieh}, title = {ViCaS: A Dataset for Combining Holistic and Pixel-level Video Understanding using Captions with Grounded Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19023-19035} }
A Universal Scale-Adaptive Deformable Transformer for Image Restoration across Diverse Artifacts: Xuyi He,

Yuhui Quan,

Ruotao Xu,

Hui Ji; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Xuyi and Quan, Yuhui and Xu, Ruotao and Ji, Hui}, title = {A Universal Scale-Adaptive Deformable Transformer for Image Restoration across Diverse Artifacts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12731-12741} }
Towards Precise Scaling Laws for Video Diffusion Transformers: Yuanyang Yin,

Yaqi Zhao,

Mingwu Zheng,

Ke Lin,

Jiarong Ou,

Rui Chen,

Victor Shea-Jay Huang,

Jiahao Wang,

Xin Tao,

Pengfei Wan,

Di Zhang,

Baoqun Yin,

Wentao Zhang,

Kun Gai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Yuanyang and Zhao, Yaqi and Zheng, Mingwu and Lin, Ke and Ou, Jiarong and Chen, Rui and Huang, Victor Shea-Jay and Wang, Jiahao and Tao, Xin and Wan, Pengfei and Zhang, Di and Yin, Baoqun and Zhang, Wentao and Gai, Kun}, title = {Towards Precise Scaling Laws for Video Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18155-18165} }
SPMTrack: Spatio-Temporal Parameter-Efficient Fine-Tuning with Mixture of Experts for Scalable Visual Tracking: Wenrui Cai,

Qingjie Liu,

Yunhong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Wenrui and Liu, Qingjie and Wang, Yunhong}, title = {SPMTrack: Spatio-Temporal Parameter-Efficient Fine-Tuning with Mixture of Experts for Scalable Visual Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16871-16881} }
AnyCam: Learning to Recover Camera Poses and Intrinsics from Casual Videos: Felix Wimbauer,

Weirong Chen,

Dominik Muhle,

Christian Rupprecht,

Daniel Cremers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wimbauer_2025_CVPR, author = {Wimbauer, Felix and Chen, Weirong and Muhle, Dominik and Rupprecht, Christian and Cremers, Daniel}, title = {AnyCam: Learning to Recover Camera Poses and Intrinsics from Casual Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16717-16727} }
Pixel-aligned RGB-NIR Stereo Imaging and Dataset for Robot Vision: Jinnyeong Kim,

Seung-Hwan Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Jinnyeong and Baek, Seung-Hwan}, title = {Pixel-aligned RGB-NIR Stereo Imaging and Dataset for Robot Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11482-11492} }
Can Machines Understand Composition? Dataset and Benchmark for Photographic Image Composition Embedding and Understanding: Zhaoran Zhao,

Peng Lu,

Anran Zhang,

Peipei Li,

Xia Li,

Xuannan Liu,

Yang Hu,

Shiyi Chen,

Liwei Wang,

Wenhao Guo; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zhaoran and Lu, Peng and Zhang, Anran and Li, Peipei and Li, Xia and Liu, Xuannan and Hu, Yang and Chen, Shiyi and Wang, Liwei and Guo, Wenhao}, title = {Can Machines Understand Composition? Dataset and Benchmark for Photographic Image Composition Embedding and Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14411-14421} }
Towards Efficient Foundation Model for Zero-shot Amodal Segmentation: Zhaochen Liu,

Limeng Qiao,

Xiangxiang Chu,

Lin Ma,

Tingting Jiang; [pdf]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhaochen and Qiao, Limeng and Chu, Xiangxiang and Ma, Lin and Jiang, Tingting}, title = {Towards Efficient Foundation Model for Zero-shot Amodal Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20254-20264} }
Scaling Properties of Diffusion Models For Perceptual Tasks: Rahul Ravishankar,

Zeeshan Patel,

Jathushan Rajasegaran,

Jitendra Malik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ravishankar_2025_CVPR, author = {Ravishankar, Rahul and Patel, Zeeshan and Rajasegaran, Jathushan and Malik, Jitendra}, title = {Scaling Properties of Diffusion Models For Perceptual Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12945-12954} }
Exact: Exploring Space-Time Perceptive Clues for Weakly Supervised Satellite Image Time Series Semantic Segmentation: Hao Zhu,

Yan Zhu,

Jiayu Xiao,

Tianxiang Xiao,

Yike Ma,

Yucheng Zhang,

Feng Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Hao and Zhu, Yan and Xiao, Jiayu and Xiao, Tianxiang and Ma, Yike and Zhang, Yucheng and Dai, Feng}, title = {Exact: Exploring Space-Time Perceptive Clues for Weakly Supervised Satellite Image Time Series Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14036-14045} }
PolarFree: Polarization-based Reflection-Free Imaging: Mingde Yao,

Menglu Wang,

King-Man Tam,

Lingen Li,

Tianfan Xue,

Jinwei Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_CVPR, author = {Yao, Mingde and Wang, Menglu and Tam, King-Man and Li, Lingen and Xue, Tianfan and Gu, Jinwei}, title = {PolarFree: Polarization-based Reflection-Free Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10890-10899} }
Seeking Consistent Flat Minima for Better Domain Generalization via Refining Loss Landscapes: Aodi Li,

Liansheng Zhuang,

Xiao Long,

Minghong Yao,

Shafei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Aodi and Zhuang, Liansheng and Long, Xiao and Yao, Minghong and Wang, Shafei}, title = {Seeking Consistent Flat Minima for Better Domain Generalization via Refining Loss Landscapes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15349-15359} }
MultimodalStudio: A Heterogeneous Sensor Dataset and Framework for Neural Rendering across Multiple Imaging Modalities: Federico Lincetto,

Gianluca Agresti,

Mattia Rossi,

Pietro Zanuttigh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lincetto_2025_CVPR, author = {Lincetto, Federico and Agresti, Gianluca and Rossi, Mattia and Zanuttigh, Pietro}, title = {MultimodalStudio: A Heterogeneous Sensor Dataset and Framework for Neural Rendering across Multiple Imaging Modalities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10964-10973} }
MuTri: Multi-view Tri-alignment for OCT to OCTA 3D Image Translation: Zhuangzhuang Chen,

Hualiang Wang,

Chubin Ou,

Xiaomeng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zhuangzhuang and Wang, Hualiang and Ou, Chubin and Li, Xiaomeng}, title = {MuTri: Multi-view Tri-alignment for OCT to OCTA 3D Image Translation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20885-20894} }
Image Quality Assessment: Investigating Causal Perceptual Effects with Abductive Counterfactual Inference: Wenhao Shen,

Mingliang Zhou,

Yu Chen,

Xuekai Wei,

Yong Feng,

Huayan Pu,

Weijia Jia; [pdf] [arXiv]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Wenhao and Zhou, Mingliang and Chen, Yu and Wei, Xuekai and Feng, Yong and Pu, Huayan and Jia, Weijia}, title = {Image Quality Assessment: Investigating Causal Perceptual Effects with Abductive Counterfactual Inference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17990-17999} }
Pos3R: 6D Pose Estimation for Unseen Objects Made Easy: Weijian Deng,

Dylan Campbell,

Chunyi Sun,

Jiahao Zhang,

Shubham Kanitkar,

Matt E. Shaffer,

Stephen Gould; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Weijian and Campbell, Dylan and Sun, Chunyi and Zhang, Jiahao and Kanitkar, Shubham and Shaffer, Matt E. and Gould, Stephen}, title = {Pos3R: 6D Pose Estimation for Unseen Objects Made Easy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16818-16828} }
RaCFormer: Towards High-Quality 3D Object Detection via Query-based Radar-Camera Fusion: Xiaomeng Chu,

Jiajun Deng,

Guoliang You,

Yifan Duan,

Houqiang Li,

Yanyong Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Chu_2025_CVPR, author = {Chu, Xiaomeng and Deng, Jiajun and You, Guoliang and Duan, Yifan and Li, Houqiang and Zhang, Yanyong}, title = {RaCFormer: Towards High-Quality 3D Object Detection via Query-based Radar-Camera Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17081-17091} }
Understanding Multi-Task Activities from Single-Task Videos: Yuhan Shen,

Ehsan Elhamifar; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Yuhan and Elhamifar, Ehsan}, title = {Understanding Multi-Task Activities from Single-Task Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19120-19131} }
Co-Speech Gesture Video Generation with Implicit Motion-Audio Entanglement: Xinjie Li,

Ziyi Chen,

Xinlu Yu,

Iek-Heng Chu,

Peng Chang,

Jing Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xinjie and Chen, Ziyi and Yu, Xinlu and Chu, Iek-Heng and Chang, Peng and Xiao, Jing}, title = {Co-Speech Gesture Video Generation with Implicit Motion-Audio Entanglement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11384-11394} }
TransPixeler: Advancing Text-to-Video Generation with Transparency: Luozhou Wang,

Yijun Li,

Zhifei Chen,

Jui-Hsien Wang,

Zhifei Zhang,

He Zhang,

Zhe Lin,

Ying-Cong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Luozhou and Li, Yijun and Chen, Zhifei and Wang, Jui-Hsien and Zhang, Zhifei and Zhang, He and Lin, Zhe and Chen, Ying-Cong}, title = {TransPixeler: Advancing Text-to-Video Generation with Transparency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18229-18239} }
What's in the Image? A Deep-Dive into the Vision of Vision Language Models: Omri Kaduri,

Shai Bagon,

Tali Dekel; [pdf] [supp]
[bibtex]
@InProceedings{Kaduri_2025_CVPR, author = {Kaduri, Omri and Bagon, Shai and Dekel, Tali}, title = {What's in the Image? A Deep-Dive into the Vision of Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14549-14558} }
FreeSim: Toward Free-viewpoint Camera Simulation in Driving Scenes: Lue Fan,

Hao Zhang,

Qitai Wang,

Hongsheng Li,

Zhaoxiang Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Lue and Zhang, Hao and Wang, Qitai and Li, Hongsheng and Zhang, Zhaoxiang}, title = {FreeSim: Toward Free-viewpoint Camera Simulation in Driving Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12004-12014} }
Seq2Time: Sequential Knowledge Transfer for Video LLM Temporal Grounding: Andong Deng,

Zhongpai Gao,

Anwesa Choudhuri,

Benjamin Planche,

Meng Zheng,

Bin Wang,

Terrence Chen,

Chen Chen,

Ziyan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Andong and Gao, Zhongpai and Choudhuri, Anwesa and Planche, Benjamin and Zheng, Meng and Wang, Bin and Chen, Terrence and Chen, Chen and Wu, Ziyan}, title = {Seq2Time: Sequential Knowledge Transfer for Video LLM Temporal Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13766-13775} }
GPVK-VL: Geometry-Preserving Virtual Keyframes for Visual Localization under Large Viewpoint Changes: Yunxuan Li,

Lei Fan,

Xiaoying Xing,

Jianxiong Zhou,

Ying Wu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yunxuan and Fan, Lei and Xing, Xiaoying and Zhou, Jianxiong and Wu, Ying}, title = {GPVK-VL: Geometry-Preserving Virtual Keyframes for Visual Localization under Large Viewpoint Changes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16728-16738} }
Enhancing Online Continual Learning with Plug-and-Play State Space Model and Class-Conditional Mixture of Discretization: Sihao Liu,

Yibo Yang,

Xiaojie Li,

David A. Clifton,

Bernard Ghanem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Sihao and Yang, Yibo and Li, Xiaojie and Clifton, David A. and Ghanem, Bernard}, title = {Enhancing Online Continual Learning with Plug-and-Play State Space Model and Class-Conditional Mixture of Discretization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20502-20511} }
GRAPHGPT-O: Synergistic Multimodal Comprehension and Generation on Graphs: Yi Fang,

Bowen Jin,

Jiacheng Shen,

Sirui Ding,

Qiaoyu Tan,

Jiawei Han; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Yi and Jin, Bowen and Shen, Jiacheng and Ding, Sirui and Tan, Qiaoyu and Han, Jiawei}, title = {GRAPHGPT-O: Synergistic Multimodal Comprehension and Generation on Graphs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19467-19476} }
Model Poisoning Attacks to Federated Learning via Multi-Round Consistency: Yueqi Xie,

Minghong Fang,

Neil Zhenqiang Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Yueqi and Fang, Minghong and Gong, Neil Zhenqiang}, title = {Model Poisoning Attacks to Federated Learning via Multi-Round Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15454-15463} }
TaoAvatar: Real-Time Lifelike Full-Body Talking Avatars for Augmented Reality via 3D Gaussian Splatting: Jianchuan Chen,

Jingchuan Hu,

Gaige Wang,

Zhonghua Jiang,

Tiansong Zhou,

Zhiwen Chen,

Chengfei Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Jianchuan and Hu, Jingchuan and Wang, Gaige and Jiang, Zhonghua and Zhou, Tiansong and Chen, Zhiwen and Lv, Chengfei}, title = {TaoAvatar: Real-Time Lifelike Full-Body Talking Avatars for Augmented Reality via 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10723-10734} }
Stacking Brick by Brick: Aligned Feature Isolation for Incremental Face Forgery Detection: Jikang Cheng,

Zhiyuan Yan,

Ying Zhang,

Li Hao,

Jiaxin Ai,

Qin Zou,

Chen Li,

Zhongyuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Jikang and Yan, Zhiyuan and Zhang, Ying and Hao, Li and Ai, Jiaxin and Zou, Qin and Li, Chen and Wang, Zhongyuan}, title = {Stacking Brick by Brick: Aligned Feature Isolation for Incremental Face Forgery Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13927-13936} }
CO-SPY: Combining Semantic and Pixel Features to Detect Synthetic Images by AI: Siyuan Cheng,

Lingjuan Lyu,

Zhenting Wang,

Xiangyu Zhang,

Vikash Sehwag; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Siyuan and Lyu, Lingjuan and Wang, Zhenting and Zhang, Xiangyu and Sehwag, Vikash}, title = {CO-SPY: Combining Semantic and Pixel Features to Detect Synthetic Images by AI}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13455-13465} }
GENMANIP: LLM-driven Simulation for Generalizable Instruction-Following Manipulation: Ning Gao,

Yilun Chen,

Shuai Yang,

Xinyi Chen,

Yang Tian,

Hao Li,

Haifeng Huang,

Hanqing Wang,

Tai Wang,

Jiangmiao Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Ning and Chen, Yilun and Yang, Shuai and Chen, Xinyi and Tian, Yang and Li, Hao and Huang, Haifeng and Wang, Hanqing and Wang, Tai and Pang, Jiangmiao}, title = {GENMANIP: LLM-driven Simulation for Generalizable Instruction-Following Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12187-12198} }
Localized Concept Erasure for Text-to-Image Diffusion Models Using Training-Free Gated Low-Rank Adaptation: Byung Hyun Lee,

Sungjin Lim,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Byung Hyun and Lim, Sungjin and Chun, Se Young}, title = {Localized Concept Erasure for Text-to-Image Diffusion Models Using Training-Free Gated Low-Rank Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18596-18606} }
Camera Resection from Known Line Pencils and a Radially Distorted Scanline: Juan C. Dibene,

Enrique Dunn; [pdf]
[bibtex]
@InProceedings{Dibene_2025_CVPR, author = {Dibene, Juan C. and Dunn, Enrique}, title = {Camera Resection from Known Line Pencils and a Radially Distorted Scanline}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15843-15851} }
SPC-GS: Gaussian Splatting with Semantic-Prompt Consistency for Indoor Open-World Free-view Synthesis from Sparse Inputs: Guibiao Liao,

Qing Li,

Zhenyu Bao,

Guoping Qiu,

Kanglin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Guibiao and Li, Qing and Bao, Zhenyu and Qiu, Guoping and Liu, Kanglin}, title = {SPC-GS: Gaussian Splatting with Semantic-Prompt Consistency for Indoor Open-World Free-view Synthesis from Sparse Inputs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11264-11274} }
M3amba: Memory Mamba is All You Need for Whole Slide Image Classification: Tingting Zheng,

Kui Jiang,

Yi Xiao,

Sicheng Zhao,

Hongxun Yao; [pdf]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Tingting and Jiang, Kui and Xiao, Yi and Zhao, Sicheng and Yao, Hongxun}, title = {M3amba: Memory Mamba is All You Need for Whole Slide Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15601-15610} }
Redefining <Creative> in Dictionary: Towards an Enhanced Semantic Understanding of Creative Generation: Fu Feng,

Yucheng Xie,

Xu Yang,

Jing Wang,

Xin Geng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Fu and Xie, Yucheng and Yang, Xu and Wang, Jing and Geng, Xin}, title = {Redefining \ensuremath{<}Creative\ensuremath{>} in Dictionary: Towards an Enhanced Semantic Understanding of Creative Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18444-18454} }
Dinomaly: The Less Is More Philosophy in Multi-Class Unsupervised Anomaly Detection: Jia Guo,

Shuai Lu,

Weihang Zhang,

Fang Chen,

Huiqi Li,

Hongen Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Jia and Lu, Shuai and Zhang, Weihang and Chen, Fang and Li, Huiqi and Liao, Hongen}, title = {Dinomaly: The Less Is More Philosophy in Multi-Class Unsupervised Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20405-20415} }
Detect-and-Guide: Self-regulation of Diffusion Models for Safe Text-to-Image Generation via Guideline Token Optimization: Feifei Li,

Mi Zhang,

Yiming Sun,

Min Yang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Feifei and Zhang, Mi and Sun, Yiming and Yang, Min}, title = {Detect-and-Guide: Self-regulation of Diffusion Models for Safe Text-to-Image Generation via Guideline Token Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13252-13262} }
MirrorVerse: Pushing Diffusion Models to Realistically Reflect the World: Ankit Dhiman,

Manan Shah,

R Venkatesh Babu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dhiman_2025_CVPR, author = {Dhiman, Ankit and Shah, Manan and Babu, R Venkatesh}, title = {MirrorVerse: Pushing Diffusion Models to Realistically Reflect the World}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11239-11249} }
EAP-GS: Efficient Augmentation of Pointcloud for 3D Gaussian Splatting in Few-shot Scene Reconstruction: Dongrui Dai,

Yuxiang Xing; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Dongrui and Xing, Yuxiang}, title = {EAP-GS: Efficient Augmentation of Pointcloud for 3D Gaussian Splatting in Few-shot Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16498-16507} }
Empowering Large Language Models with 3D Situation Awareness: Zhihao Yuan,

Yibo Peng,

Jinke Ren,

Yinghong Liao,

Yatong Han,

Chun-Mei Feng,

Hengshuang Zhao,

Guanbin Li,

Shuguang Cui,

Zhen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Zhihao and Peng, Yibo and Ren, Jinke and Liao, Yinghong and Han, Yatong and Feng, Chun-Mei and Zhao, Hengshuang and Li, Guanbin and Cui, Shuguang and Li, Zhen}, title = {Empowering Large Language Models with 3D Situation Awareness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19435-19445} }
EchoTraffic: Enhancing Traffic Anomaly Understanding with Audio-Visual Insights: Zhenghao Xing,

Hao Chen,

Binzhu Xie,

Jiaqi Xu,

Ziyu Guo,

Xuemiao Xu,

Jianye Hao,

Chi-Wing Fu,

Xiaowei Hu,

Pheng-Ann Heng; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Zhenghao and Chen, Hao and Xie, Binzhu and Xu, Jiaqi and Guo, Ziyu and Xu, Xuemiao and Hao, Jianye and Fu, Chi-Wing and Hu, Xiaowei and Heng, Pheng-Ann}, title = {EchoTraffic: Enhancing Traffic Anomaly Understanding with Audio-Visual Insights}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19098-19108} }
Interactive Medical Image Segmentation: A Benchmark Dataset and Baseline: Junlong Cheng,

Bin Fu,

Jin Ye,

Guoan Wang,

Tianbin Li,

Haoyu Wang,

Ruoyu Li,

He Yao,

Junren Cheng,

Jingwen Li,

Yanzhou Su,

Min Zhu,

Junjun He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Junlong and Fu, Bin and Ye, Jin and Wang, Guoan and Li, Tianbin and Wang, Haoyu and Li, Ruoyu and Yao, He and Cheng, Junren and Li, Jingwen and Su, Yanzhou and Zhu, Min and He, Junjun}, title = {Interactive Medical Image Segmentation: A Benchmark Dataset and Baseline}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20841-20851} }
GigaHands: A Massive Annotated Dataset of Bimanual Hand Activities: Rao Fu,

Dingxi Zhang,

Alex Jiang,

Wanjia Fu,

Austin Funk,

Daniel Ritchie,

Srinath Sridhar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Rao and Zhang, Dingxi and Jiang, Alex and Fu, Wanjia and Funk, Austin and Ritchie, Daniel and Sridhar, Srinath}, title = {GigaHands: A Massive Annotated Dataset of Bimanual Hand Activities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17461-17474} }
AutoSSVH: Exploring Automated Frame Sampling for Efficient Self-Supervised Video Hashing: Niu Lian,

Jun Li,

Jinpeng Wang,

Ruisheng Luo,

Yaowei Wang,

Shu-Tao Xia,

Bin Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Lian_2025_CVPR, author = {Lian, Niu and Li, Jun and Wang, Jinpeng and Luo, Ruisheng and Wang, Yaowei and Xia, Shu-Tao and Chen, Bin}, title = {AutoSSVH: Exploring Automated Frame Sampling for Efficient Self-Supervised Video Hashing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18881-18890} }
FrugalNeRF: Fast Convergence for Extreme Few-shot Novel View Synthesis without Learned Priors: Chin-Yang Lin,

Chung-Ho Wu,

Chang-Han Yeh,

Shih-Han Yen,

Cheng Sun,

Yu-Lun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Chin-Yang and Wu, Chung-Ho and Yeh, Chang-Han and Yen, Shih-Han and Sun, Cheng and Liu, Yu-Lun}, title = {FrugalNeRF: Fast Convergence for Extreme Few-shot Novel View Synthesis without Learned Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11227-11238} }
Pioneering 4-Bit FP Quantization for Diffusion Models: Mixup-Sign Quantization and Timestep-Aware Fine-Tuning: Maosen Zhao,

Pengtao Chen,

Chong Yu,

Yan Wen,

Xudong Tan,

Tao Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Maosen and Chen, Pengtao and Yu, Chong and Wen, Yan and Tan, Xudong and Chen, Tao}, title = {Pioneering 4-Bit FP Quantization for Diffusion Models: Mixup-Sign Quantization and Timestep-Aware Fine-Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18134-18143} }
CompGS: Unleashing 2D Compositionality for Compositional Text-to-3D via Dynamically Optimizing 3D Gaussians: Chongjian Ge,

Chenfeng Xu,

Yuanfeng Ji,

Chensheng Peng,

Masayoshi Tomizuka,

Ping Luo,

Mingyu Ding,

Varun Jampani,

Wei Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2025_CVPR, author = {Ge, Chongjian and Xu, Chenfeng and Ji, Yuanfeng and Peng, Chensheng and Tomizuka, Masayoshi and Luo, Ping and Ding, Mingyu and Jampani, Varun and Zhan, Wei}, title = {CompGS: Unleashing 2D Compositionality for Compositional Text-to-3D via Dynamically Optimizing 3D Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18509-18520} }
FIRE: Robust Detection of Diffusion-Generated Images via Frequency-Guided Reconstruction Error: Beilin Chu,

Xuan Xu,

Xin Wang,

Yufei Zhang,

Weike You,

Linna Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chu_2025_CVPR, author = {Chu, Beilin and Xu, Xuan and Wang, Xin and Zhang, Yufei and You, Weike and Zhou, Linna}, title = {FIRE: Robust Detection of Diffusion-Generated Images via Frequency-Guided Reconstruction Error}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12830-12839} }
Assessing and Learning Alignment of Unimodal Vision and Language Models: Le Zhang,

Qian Yang,

Aishwarya Agrawal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Le and Yang, Qian and Agrawal, Aishwarya}, title = {Assessing and Learning Alignment of Unimodal Vision and Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14604-14614} }
Action Detail Matters: Refining Video Recognition with Local Action Queries: Mengmeng Wang,

Zeyi Huang,

Xiangjie Kong,

Guojiang Shen,

Guang Dai,

Jingdong Wang,

Yong Liu; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Mengmeng and Huang, Zeyi and Kong, Xiangjie and Shen, Guojiang and Dai, Guang and Wang, Jingdong and Liu, Yong}, title = {Action Detail Matters: Refining Video Recognition with Local Action Queries}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19132-19142} }
Generative Map Priors for Collaborative BEV Semantic Segmentation: Jiahui Fu,

Yue Gong,

Luting Wang,

Shifeng Zhang,

Xu Zhou,

Si Liu; [pdf]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Jiahui and Gong, Yue and Wang, Luting and Zhang, Shifeng and Zhou, Xu and Liu, Si}, title = {Generative Map Priors for Collaborative BEV Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11919-11928} }
Coherent 3D Portrait Video Reconstruction via Triplane Fusion: Shengze Wang,

Xueting Li,

Chao Liu,

Matthew Chan,

Michael Stengel,

Henry Fuchs,

Shalini De Mello,

Koki Nagano; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shengze and Li, Xueting and Liu, Chao and Chan, Matthew and Stengel, Michael and Fuchs, Henry and De Mello, Shalini and Nagano, Koki}, title = {Coherent 3D Portrait Video Reconstruction via Triplane Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10712-10722} }
ManiVideo: Generating Hand-Object Manipulation Video with Dexterous and Generalizable Grasping: Youxin Pang,

Ruizhi Shao,

Jiajun Zhang,

Hanzhang Tu,

Yun Liu,

Boyao Zhou,

Hongwen Zhang,

Yebin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2025_CVPR, author = {Pang, Youxin and Shao, Ruizhi and Zhang, Jiajun and Tu, Hanzhang and Liu, Yun and Zhou, Boyao and Zhang, Hongwen and Liu, Yebin}, title = {ManiVideo: Generating Hand-Object Manipulation Video with Dexterous and Generalizable Grasping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12209-12219} }
FedCS: Coreset Selection for Federated Learning: Chenhe Hao,

Weiying Xie,

Daixun Li,

Haonan Qin,

Hangyu Ye,

Leyuan Fang,

Yunsong Li; [pdf] [supp]
[bibtex]
@InProceedings{Hao_2025_CVPR, author = {Hao, Chenhe and Xie, Weiying and Li, Daixun and Qin, Haonan and Ye, Hangyu and Fang, Leyuan and Li, Yunsong}, title = {FedCS: Coreset Selection for Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15434-15443} }
Dual-Granularity Semantic Guided Sparse Routing Diffusion Model for General Pansharpening: Yinghui Xing,

Litao Qu,

Shizhou Zhang,

Di Xu,

Yingkun Yang,

Yanning Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Yinghui and Qu, Litao and Zhang, Shizhou and Xu, Di and Yang, Yingkun and Zhang, Yanning}, title = {Dual-Granularity Semantic Guided Sparse Routing Diffusion Model for General Pansharpening}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12658-12668} }
OmniMMI: A Comprehensive Multi-modal Interaction Benchmark in Streaming Video Contexts: Yuxuan Wang,

Yueqian Wang,

Bo Chen,

Tong Wu,

Dongyan Zhao,

Zilong Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuxuan and Wang, Yueqian and Chen, Bo and Wu, Tong and Zhao, Dongyan and Zheng, Zilong}, title = {OmniMMI: A Comprehensive Multi-modal Interaction Benchmark in Streaming Video Contexts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18925-18935} }
SOAP: Vision-Centric 3D Semantic Scene Completion with Scene-Adaptive Decoder and Occluded Region-Aware View Projection: Hyo-Jun Lee,

Yeong Jun Koh,

Hanul Kim,

Hyunseop Kim,

Yonguk Lee,

Jinu Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Hyo-Jun and Koh, Yeong Jun and Kim, Hanul and Kim, Hyunseop and Lee, Yonguk and Lee, Jinu}, title = {SOAP: Vision-Centric 3D Semantic Scene Completion with Scene-Adaptive Decoder and Occluded Region-Aware View Projection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17145-17154} }
SimVS: Simulating World Inconsistencies for Robust View Synthesis: Alex Trevithick,

Roni Paiss,

Philipp Henzler,

Dor Verbin,

Rundi Wu,

Hadi Alzayer,

Ruiqi Gao,

Ben Poole,

Jonathan T. Barron,

Aleksander Holynski,

Ravi Ramamoorthi,

Pratul P. Srinivasan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Trevithick_2025_CVPR, author = {Trevithick, Alex and Paiss, Roni and Henzler, Philipp and Verbin, Dor and Wu, Rundi and Alzayer, Hadi and Gao, Ruiqi and Poole, Ben and Barron, Jonathan T. and Holynski, Aleksander and Ramamoorthi, Ravi and Srinivasan, Pratul P.}, title = {SimVS: Simulating World Inconsistencies for Robust View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16464-16474} }
From Zero to Detail: Deconstructing Ultra-High-Definition Image Restoration from Progressive Spectral Perspective: Chen Zhao,

Zhizhou Chen,

Yunzhe Xu,

Enxuan Gu,

Jian Li,

Zili Yi,

Qian Wang,

Jian Yang,

Ying Tai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Chen and Chen, Zhizhou and Xu, Yunzhe and Gu, Enxuan and Li, Jian and Yi, Zili and Wang, Qian and Yang, Jian and Tai, Ying}, title = {From Zero to Detail: Deconstructing Ultra-High-Definition Image Restoration from Progressive Spectral Perspective}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17935-17946} }
COSMOS: Cross-Modality Self-Distillation for Vision Language Pre-training: Sanghwan Kim,

Rui Xiao,

Mariana-Iuliana Georgescu,

Stephan Alaniz,

Zeynep Akata; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Sanghwan and Xiao, Rui and Georgescu, Mariana-Iuliana and Alaniz, Stephan and Akata, Zeynep}, title = {COSMOS: Cross-Modality Self-Distillation for Vision Language Pre-training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14690-14700} }
Lifting Motion to the 3D World via 2D Diffusion: Jiaman Li,

C. Karen Liu,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiaman and Liu, C. Karen and Wu, Jiajun}, title = {Lifting Motion to the 3D World via 2D Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17518-17528} }
TAPT: Test-Time Adversarial Prompt Tuning for Robust Inference in Vision-Language Models: Xin Wang,

Kai Chen,

Jiaming Zhang,

Jingjing Chen,

Xingjun Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xin and Chen, Kai and Zhang, Jiaming and Chen, Jingjing and Ma, Xingjun}, title = {TAPT: Test-Time Adversarial Prompt Tuning for Robust Inference in Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19910-19920} }
Active Data Curation Effectively Distills Large-Scale Multimodal Models: Vishaal Udandarao,

Nikhil Parthasarathy,

Muhammad Ferjad Naeem,

Talfan Evans,

Samuel Albanie,

Federico Tombari,

Yongqin Xian,

Alessio Tonioni,

Olivier J. Henaff; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Udandarao_2025_CVPR, author = {Udandarao, Vishaal and Parthasarathy, Nikhil and Naeem, Muhammad Ferjad and Evans, Talfan and Albanie, Samuel and Tombari, Federico and Xian, Yongqin and Tonioni, Alessio and Henaff, Olivier J.}, title = {Active Data Curation Effectively Distills Large-Scale Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14422-14437} }
SCSA: A Plug-and-Play Semantic Continuous-Sparse Attention for Arbitrary Semantic Style Transfer: Chunnan Shang,

Zhizhong Wang,

Hongwei Wang,

Xiangming Meng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shang_2025_CVPR, author = {Shang, Chunnan and Wang, Zhizhong and Wang, Hongwei and Meng, Xiangming}, title = {SCSA: A Plug-and-Play Semantic Continuous-Sparse Attention for Arbitrary Semantic Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13051-13060} }
Can't Slow Me Down: Learning Robust and Hardware-Adaptive Object Detectors against Latency Attacks for Edge Devices: Tianyi Wang,

Zichen Wang,

Cong Wang,

Yuanchao Shu,

Ruilong Deng,

Peng Cheng,

Jiming Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Tianyi and Wang, Zichen and Wang, Cong and Shu, Yuanchao and Deng, Ruilong and Cheng, Peng and Chen, Jiming}, title = {Can't Slow Me Down: Learning Robust and Hardware-Adaptive Object Detectors against Latency Attacks for Edge Devices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19230-19240} }
SAM2Object: Consolidating View Consistency via SAM2 for Zero-Shot 3D Instance Segmentation: Jihuai Zhao,

Junbao Zhuo,

Jiansheng Chen,

Huimin Ma; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jihuai and Zhuo, Junbao and Chen, Jiansheng and Ma, Huimin}, title = {SAM2Object: Consolidating View Consistency via SAM2 for Zero-Shot 3D Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19325-19334} }
CDI: Copyrighted Data Identification in Diffusion Models: Jan Dubiński,

Antoni Kowalczuk,

Franziska Boenisch,

Adam Dziedzic; [pdf] [supp]
[bibtex]
@InProceedings{Dubinski_2025_CVPR, author = {Dubi\'nski, Jan and Kowalczuk, Antoni and Boenisch, Franziska and Dziedzic, Adam}, title = {CDI: Copyrighted Data Identification in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18674-18684} }
CRISP: Object Pose and Shape Estimation with Test-Time Adaptation: Jingnan Shi,

Rajat Talak,

Harry Zhang,

David Jin,

Luca Carlone; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Jingnan and Talak, Rajat and Zhang, Harry and Jin, David and Carlone, Luca}, title = {CRISP: Object Pose and Shape Estimation with Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11644-11653} }
Creating Your Editable 3D Photorealistic Avatar with Tetrahedron-constrained Gaussian Splatting: Hanxi Liu,

Yifang Men,

Zhouhui Lian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Hanxi and Men, Yifang and Lian, Zhouhui}, title = {Creating Your Editable 3D Photorealistic Avatar with Tetrahedron-constrained Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15976-15986} }
Sim-to-Real Causal Transfer: A Metric Learning Approach to Causally-Aware Interaction Representations: Ahmad Rahimi,

Po-Chien Luan,

Yuejiang Liu,

Frano Rajič,

Alexandre Alahi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rahimi_2025_CVPR, author = {Rahimi, Ahmad and Luan, Po-Chien and Liu, Yuejiang and Raji\v{c}, Frano and Alahi, Alexandre}, title = {Sim-to-Real Causal Transfer: A Metric Learning Approach to Causally-Aware Interaction Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17271-17281} }
Tripartite Weight-Space Ensemble for Few-Shot Class-Incremental Learning: Juntae Lee,

Munawar Hayat,

Sungrack Yun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Juntae and Hayat, Munawar and Yun, Sungrack}, title = {Tripartite Weight-Space Ensemble for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15329-15338} }
PerLA: Perceptive 3D Language Assistant: Guofeng Mei,

Wei Lin,

Luigi Riz,

Yujiao Wu,

Fabio Poiesi,

Yiming Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mei_2025_CVPR, author = {Mei, Guofeng and Lin, Wei and Riz, Luigi and Wu, Yujiao and Poiesi, Fabio and Wang, Yiming}, title = {PerLA: Perceptive 3D Language Assistant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14369-14379} }
PhyT2V: LLM-Guided Iterative Self-Refinement for Physics-Grounded Text-to-Video Generation: Qiyao Xue,

Xiangyu Yin,

Boyuan Yang,

Wei Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2025_CVPR, author = {Xue, Qiyao and Yin, Xiangyu and Yang, Boyuan and Gao, Wei}, title = {PhyT2V: LLM-Guided Iterative Self-Refinement for Physics-Grounded Text-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18826-18836} }
Mask^2DiT: Dual Mask-based Diffusion Transformer for Multi-Scene Long Video Generation: Tianhao Qi,

Jianlong Yuan,

Wanquan Feng,

Shancheng Fang,

Jiawei Liu,

SiYu Zhou,

Qian He,

Hongtao Xie,

Yongdong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2025_CVPR, author = {Qi, Tianhao and Yuan, Jianlong and Feng, Wanquan and Fang, Shancheng and Liu, Jiawei and Zhou, SiYu and He, Qian and Xie, Hongtao and Zhang, Yongdong}, title = {Mask{\textasciicircum}2DiT: Dual Mask-based Diffusion Transformer for Multi-Scene Long Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18837-18846} }
JamMa: Ultra-lightweight Local Feature Matching with Joint Mamba: Xiaoyong Lu,

Songlin Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Xiaoyong and Du, Songlin}, title = {JamMa: Ultra-lightweight Local Feature Matching with Joint Mamba}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14934-14943} }
DyCoke: Dynamic Compression of Tokens for Fast Video Large Language Models: Keda Tao,

Can Qin,

Haoxuan You,

Yang Sui,

Huan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2025_CVPR, author = {Tao, Keda and Qin, Can and You, Haoxuan and Sui, Yang and Wang, Huan}, title = {DyCoke: Dynamic Compression of Tokens for Fast Video Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18992-19001} }
MammAlps: A Multi-view Video Behavior Monitoring Dataset of Wild Mammals in the Swiss Alps: Valentin Gabeff,

Haozhe Qi,

Brendan Flaherty,

Gencer Sumbul,

Alexander Mathis,

Devis Tuia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gabeff_2025_CVPR, author = {Gabeff, Valentin and Qi, Haozhe and Flaherty, Brendan and Sumbul, Gencer and Mathis, Alexander and Tuia, Devis}, title = {MammAlps: A Multi-view Video Behavior Monitoring Dataset of Wild Mammals in the Swiss Alps}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13854-13864} }
Diffusion-based Realistic Listening Head Generation via Hybrid Motion Modeling: Yinuo Wang,

Yanbo Fan,

Xuan Wang,

Guo Yu,

Fei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yinuo and Fan, Yanbo and Wang, Xuan and Yu, Guo and Wang, Fei}, title = {Diffusion-based Realistic Listening Head Generation via Hybrid Motion Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15885-15895} }
SAT-HMR: Real-Time Multi-Person 3D Mesh Estimation via Scale-Adaptive Tokens: Chi Su,

Xiaoxuan Ma,

Jiajun Su,

Yizhou Wang; [pdf] [supp]
[bibtex]
@InProceedings{Su_2025_CVPR, author = {Su, Chi and Ma, Xiaoxuan and Su, Jiajun and Wang, Yizhou}, title = {SAT-HMR: Real-Time Multi-Person 3D Mesh Estimation via Scale-Adaptive Tokens}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16796-16806} }
UniScene: Unified Occupancy-centric Driving Scene Generation: Bohan Li,

Jiazhe Guo,

Hongsi Liu,

Yingshuang Zou,

Yikang Ding,

Xiwu Chen,

Hu Zhu,

Feiyang Tan,

Chi Zhang,

Tiancai Wang,

Shuchang Zhou,

Li Zhang,

Xiaojuan Qi,

Hao Zhao,

Mu Yang,

Wenjun Zeng,

Xin Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Bohan and Guo, Jiazhe and Liu, Hongsi and Zou, Yingshuang and Ding, Yikang and Chen, Xiwu and Zhu, Hu and Tan, Feiyang and Zhang, Chi and Wang, Tiancai and Zhou, Shuchang and Zhang, Li and Qi, Xiaojuan and Zhao, Hao and Yang, Mu and Zeng, Wenjun and Jin, Xin}, title = {UniScene: Unified Occupancy-centric Driving Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11971-11981} }
Learning from Streaming Video with Orthogonal Gradients: Tengda Han,

Dilara Gokay,

Joseph Heyward,

Chuhan Zhang,

Daniel Zoran,

Viorica Patraucean,

Joao Carreira,

Dima Damen,

Andrew Zisserman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Tengda and Gokay, Dilara and Heyward, Joseph and Zhang, Chuhan and Zoran, Daniel and Patraucean, Viorica and Carreira, Joao and Damen, Dima and Zisserman, Andrew}, title = {Learning from Streaming Video with Orthogonal Gradients}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13651-13660} }
Classifier-to-Bias: Toward Unsupervised Automatic Bias Detection for Visual Classifiers: Quentin Guimard,

Moreno D'Incà,

Massimiliano Mancini,

Elisa Ricci; [pdf] [supp]
[bibtex]
@InProceedings{Guimard_2025_CVPR, author = {Guimard, Quentin and D'Inc\`a, Moreno and Mancini, Massimiliano and Ricci, Elisa}, title = {Classifier-to-Bias: Toward Unsupervised Automatic Bias Detection for Visual Classifiers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15151-15161} }
An Image-like Diffusion Method for Human-Object Interaction Detection: Xiaofei Hui,

Haoxuan Qu,

Hossein Rahmani,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hui_2025_CVPR, author = {Hui, Xiaofei and Qu, Haoxuan and Rahmani, Hossein and Liu, Jun}, title = {An Image-like Diffusion Method for Human-Object Interaction Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14002-14012} }
COB-GS: Clear Object Boundaries in 3DGS Segmentation Based on Boundary-Adaptive Gaussian Splitting: Jiaxin Zhang,

Junjun Jiang,

Youyu Chen,

Kui Jiang,

Xianming Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiaxin and Jiang, Junjun and Chen, Youyu and Jiang, Kui and Liu, Xianming}, title = {COB-GS: Clear Object Boundaries in 3DGS Segmentation Based on Boundary-Adaptive Gaussian Splitting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19335-19344} }
PEER Pressure: Model-to-Model Regularization for Single Source Domain Generalization: Dong Kyu Cho,

Inwoo Hwang,

Sanghack Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cho_2025_CVPR, author = {Cho, Dong Kyu and Hwang, Inwoo and Lee, Sanghack}, title = {PEER Pressure: Model-to-Model Regularization for Single Source Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15360-15370} }
Revisiting Fairness in Multitask Learning: A Performance-Driven Approach for Variance Reduction: Xiaohan Qin,

Xiaoxing Wang,

Junchi Yan; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Xiaohan and Wang, Xiaoxing and Yan, Junchi}, title = {Revisiting Fairness in Multitask Learning: A Performance-Driven Approach for Variance Reduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20492-20501} }
VideoMage: Multi-Subject and Motion Customization of Text-to-Video Diffusion Models: Chi-Pin Huang,

Yen-Siang Wu,

Hung-Kai Chung,

Kai-Po Chang,

Fu-En Yang,

Yu-Chiang Frank Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Chi-Pin and Wu, Yen-Siang and Chung, Hung-Kai and Chang, Kai-Po and Yang, Fu-En and Wang, Yu-Chiang Frank}, title = {VideoMage: Multi-Subject and Motion Customization of Text-to-Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17603-17612} }
Compositional Caching for Training-free Open-vocabulary Attribute Detection: Marco Garosi,

Alessandro Conti,

Gaowen Liu,

Elisa Ricci,

Massimiliano Mancini; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Garosi_2025_CVPR, author = {Garosi, Marco and Conti, Alessandro and Liu, Gaowen and Ricci, Elisa and Mancini, Massimiliano}, title = {Compositional Caching for Training-free Open-vocabulary Attribute Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15098-15107} }
VI^3NR: Variance Informed Initialization for Implicit Neural Representations: Chamin Hewa Koneputugodage,

Yizhak Ben-Shabat,

Sameera Ramasinghe,

Stephen Gould; [pdf] [supp]
[bibtex]
@InProceedings{Koneputugodage_2025_CVPR, author = {Koneputugodage, Chamin Hewa and Ben-Shabat, Yizhak and Ramasinghe, Sameera and Gould, Stephen}, title = {VI{\textasciicircum}3NR: Variance Informed Initialization for Implicit Neural Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13477-13486} }
M-LLM Based Video Frame Selection for Efficient Video Understanding: Kai Hu,

Feng Gao,

Xiaohan Nie,

Peng Zhou,

Son Tran,

Tal Neiman,

Lingyun Wang,

Mubarak Shah,

Raffay Hamid,

Bing Yin,

Trishul Chilimbi; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Kai and Gao, Feng and Nie, Xiaohan and Zhou, Peng and Tran, Son and Neiman, Tal and Wang, Lingyun and Shah, Mubarak and Hamid, Raffay and Yin, Bing and Chilimbi, Trishul}, title = {M-LLM Based Video Frame Selection for Efficient Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13702-13712} }
Search and Detect: Training-Free Long Tail Object Detection via Web-Image Retrieval: Mankeerat Sidhu,

Hetarth Chopra,

Ansel Blume,

Jeonghwan Kim,

Revanth Gangi Reddy,

Heng Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sidhu_2025_CVPR, author = {Sidhu, Mankeerat and Chopra, Hetarth and Blume, Ansel and Kim, Jeonghwan and Reddy, Revanth Gangi and Ji, Heng}, title = {Search and Detect: Training-Free Long Tail Object Detection via Web-Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15129-15138} }
Unleashing the Potential of Multi-modal Foundation Models and Video Diffusion for 4D Dynamic Physical Scene Simulation: Zhuoman Liu,

Weicai Ye,

Yan Luximon,

Pengfei Wan,

Di Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhuoman and Ye, Weicai and Luximon, Yan and Wan, Pengfei and Zhang, Di}, title = {Unleashing the Potential of Multi-modal Foundation Models and Video Diffusion for 4D Dynamic Physical Scene Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11016-11025} }
Diffusion Model is Effectively Its Own Teacher: Xinyin Ma,

Runpeng Yu,

Songhua Liu,

Gongfan Fang,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Xinyin and Yu, Runpeng and Liu, Songhua and Fang, Gongfan and Wang, Xinchao}, title = {Diffusion Model is Effectively Its Own Teacher}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12901-12911} }
UnCommon Objects in 3D: Xingchen Liu,

Piyush Tayal,

Jianyuan Wang,

Jesus Zarzar,

Tom Monnier,

Konstantinos Tertikas,

Jiali Duan,

Antoine Toisoul,

Jason Y. Zhang,

Natalia Neverova,

Andrea Vedaldi,

Roman Shapovalov,

David Novotny; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xingchen and Tayal, Piyush and Wang, Jianyuan and Zarzar, Jesus and Monnier, Tom and Tertikas, Konstantinos and Duan, Jiali and Toisoul, Antoine and Zhang, Jason Y. and Neverova, Natalia and Vedaldi, Andrea and Shapovalov, Roman and Novotny, David}, title = {UnCommon Objects in 3D}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14102-14113} }
Learning Textual Prompts for Open-World Semi-Supervised Learning: Yuxin Fan,

Junbiao Cui,

Jiye Liang; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Yuxin and Cui, Junbiao and Liang, Jiye}, title = {Learning Textual Prompts for Open-World Semi-Supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14756-14765} }
LongDiff: Training-Free Long Video Generation in One Go: Zhuoling Li,

Hossein Rahmani,

Qiuhong Ke,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zhuoling and Rahmani, Hossein and Ke, Qiuhong and Liu, Jun}, title = {LongDiff: Training-Free Long Video Generation in One Go}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17789-17798} }
Mask-Adapter: The Devil is in the Masks for Open-Vocabulary Segmentation: Yongkang Li,

Tianheng Cheng,

Bin Feng,

Wenyu Liu,

Xinggang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yongkang and Cheng, Tianheng and Feng, Bin and Liu, Wenyu and Wang, Xinggang}, title = {Mask-Adapter: The Devil is in the Masks for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14998-15008} }
MPDrive: Improving Spatial Understanding with Marker-Based Prompt Learning for Autonomous Driving: Zhiyuan Zhang,

Xiaofan Li,

Zhihao Xu,

Wenjie Peng,

Zijian Zhou,

Miaojing Shi,

Shuangping Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhiyuan and Li, Xiaofan and Xu, Zhihao and Peng, Wenjie and Zhou, Zijian and Shi, Miaojing and Huang, Shuangping}, title = {MPDrive: Improving Spatial Understanding with Marker-Based Prompt Learning for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12089-12099} }
ByTheWay: Boost Your Text-to-Video Generation Model to Higher Quality in a Training-free Way: Jiazi Bu,

Pengyang Ling,

Pan Zhang,

Tong Wu,

Xiaoyi Dong,

Yuhang Zang,

Yuhang Cao,

Dahua Lin,

Jiaqi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bu_2025_CVPR, author = {Bu, Jiazi and Ling, Pengyang and Zhang, Pan and Wu, Tong and Dong, Xiaoyi and Zang, Yuhang and Cao, Yuhang and Lin, Dahua and Wang, Jiaqi}, title = {ByTheWay: Boost Your Text-to-Video Generation Model to Higher Quality in a Training-free Way}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12999-13008} }
Masked Point-Entity Contrast for Open-Vocabulary 3D Scene Understanding: Yan Wang,

Baoxiong Jia,

Ziyu Zhu,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yan and Jia, Baoxiong and Zhu, Ziyu and Huang, Siyuan}, title = {Masked Point-Entity Contrast for Open-Vocabulary 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14125-14136} }
On the Generalization of Handwritten Text Recognition Models: Carlos Garrido-Munoz,

Jorge Calvo-Zaragoza; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Garrido-Munoz_2025_CVPR, author = {Garrido-Munoz, Carlos and Calvo-Zaragoza, Jorge}, title = {On the Generalization of Handwritten Text Recognition Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15275-15286} }
InsTaG: Learning Personalized 3D Talking Head from Few-Second Video: Jiahe Li,

Jiawei Zhang,

Xiao Bai,

Jin Zheng,

Jun Zhou,

Lin Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiahe and Zhang, Jiawei and Bai, Xiao and Zheng, Jin and Zhou, Jun and Gu, Lin}, title = {InsTaG: Learning Personalized 3D Talking Head from Few-Second Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10690-10700} }
Benchmarking Large Vision-Language Models via Directed Scene Graph for Comprehensive Image Captioning: Fan Lu,

Wei Wu,

Kecheng Zheng,

Shuailei Ma,

Biao Gong,

Jiawei Liu,

Wei Zhai,

Yang Cao,

Yujun Shen,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Fan and Wu, Wei and Zheng, Kecheng and Ma, Shuailei and Gong, Biao and Liu, Jiawei and Zhai, Wei and Cao, Yang and Shen, Yujun and Zha, Zheng-Jun}, title = {Benchmarking Large Vision-Language Models via Directed Scene Graph for Comprehensive Image Captioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19618-19627} }
Rotation-Equivariant Self-Supervised Method in Image Denoising: Hanze Liu,

Jiahong Fu,

Qi Xie,

Deyu Meng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Hanze and Fu, Jiahong and Xie, Qi and Meng, Deyu}, title = {Rotation-Equivariant Self-Supervised Method in Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12720-12730} }
FlashSloth : Lightning Multimodal Large Language Models via Embedded Visual Compression: Bo Tong,

Bokai Lai,

Yiyi Zhou,

Gen Luo,

Yunhang Shen,

Ke Li,

Xiaoshuai Sun,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tong_2025_CVPR, author = {Tong, Bo and Lai, Bokai and Zhou, Yiyi and Luo, Gen and Shen, Yunhang and Li, Ke and Sun, Xiaoshuai and Ji, Rongrong}, title = {FlashSloth : Lightning Multimodal Large Language Models via Embedded Visual Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14570-14581} }
T2SG: Traffic Topology Scene Graph for Topology Reasoning in Autonomous Driving: Changsheng Lv,

Mengshi Qi,

Liang Liu,

Huadong Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Lv_2025_CVPR, author = {Lv, Changsheng and Qi, Mengshi and Liu, Liang and Ma, Huadong}, title = {T2SG: Traffic Topology Scene Graph for Topology Reasoning in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17197-17206} }
RealEdit: Reddit Edits As a Large-scale Empirical Dataset for Image Transformations: Peter Sushko,

Ayana Bharadwaj,

Zhi Yang Lim,

Vasily Ilin,

Ben Caffee,

Dongping Chen,

Mohammadreza Salehi,

Cheng-Yu Hsieh,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sushko_2025_CVPR, author = {Sushko, Peter and Bharadwaj, Ayana and Lim, Zhi Yang and Ilin, Vasily and Caffee, Ben and Chen, Dongping and Salehi, Mohammadreza and Hsieh, Cheng-Yu and Krishna, Ranjay}, title = {RealEdit: Reddit Edits As a Large-scale Empirical Dataset for Image Transformations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13403-13413} }
VideoScene: Distilling Video Diffusion Model to Generate 3D Scenes in One Step: Hanyang Wang,

Fangfu Liu,

Jiawei Chi,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Hanyang and Liu, Fangfu and Chi, Jiawei and Duan, Yueqi}, title = {VideoScene: Distilling Video Diffusion Model to Generate 3D Scenes in One Step}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16475-16485} }
3D-HGS: 3D Half-Gaussian Splatting: Haolin Li,

Jinyang Liu,

Mario Sznaier,

Octavia Camps; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Haolin and Liu, Jinyang and Sznaier, Mario and Camps, Octavia}, title = {3D-HGS: 3D Half-Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10996-11005} }
Scale Efficient Training for Large Datasets: Qing Zhou,

Junyu Gao,

Qi Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Qing and Gao, Junyu and Wang, Qi}, title = {Scale Efficient Training for Large Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20458-20467} }
Decoder Gradient Shield: Provable and High-Fidelity Prevention of Gradient-Based Box-Free Watermark Removal: Haonan An,

Guang Hua,

Zhengru Fang,

Guowen Xu,

Susanto Rahardja,

Yuguang Fang; [pdf] [arXiv]
[bibtex]
@InProceedings{An_2025_CVPR, author = {An, Haonan and Hua, Guang and Fang, Zhengru and Xu, Guowen and Rahardja, Susanto and Fang, Yuguang}, title = {Decoder Gradient Shield: Provable and High-Fidelity Prevention of Gradient-Based Box-Free Watermark Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13424-13433} }
Convex Combination Star Shape Prior for Data-driven Image Semantic Segmentation: Xinyu Zhao,

Jun Xie,

Shengzhe Chen,

Jun Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Xinyu and Xie, Jun and Chen, Shengzhe and Liu, Jun}, title = {Convex Combination Star Shape Prior for Data-driven Image Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14068-14077} }
Parameter-efficient Fine-tuning in Hyperspherical Space for Open-vocabulary Semantic Segmentation: Zelin Peng,

Zhengqin Xu,

Zhilin Zeng,

Yu Huang,

Yaoming Wang,

Wei Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Zelin and Xu, Zhengqin and Zeng, Zhilin and Huang, Yu and Wang, Yaoming and Shen, Wei}, title = {Parameter-efficient Fine-tuning in Hyperspherical Space for Open-vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15009-15020} }
Relative Pose Estimation through Affine Corrections of Monocular Depth Priors: Yifan Yu,

Shaohui Liu,

Rémi Pautrat,

Marc Pollefeys,

Viktor Larsson; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Yifan and Liu, Shaohui and Pautrat, R\'emi and Pollefeys, Marc and Larsson, Viktor}, title = {Relative Pose Estimation through Affine Corrections of Monocular Depth Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16706-16716} }
Zero-1-to-A: Zero-Shot One Image to Animatable Head Avatars Using Video Diffusion: Zhenglin Zhou,

Fan Ma,

Hehe Fan,

Tat-Seng Chua; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zhenglin and Ma, Fan and Fan, Hehe and Chua, Tat-Seng}, title = {Zero-1-to-A: Zero-Shot One Image to Animatable Head Avatars Using Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15941-15952} }
Occlusion-aware Text-Image-Point Cloud Pretraining for Open-World 3D Object Recognition: Khanh Nguyen,

Ghulam Mubashar Hassan,

Ajmal Mian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Khanh and Hassan, Ghulam Mubashar and Mian, Ajmal}, title = {Occlusion-aware Text-Image-Point Cloud Pretraining for Open-World 3D Object Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16965-16975} }
Conical Visual Concentration for Efficient Large Vision-Language Models: Long Xing,

Qidong Huang,

Xiaoyi Dong,

Jiajie Lu,

Pan Zhang,

Yuhang Zang,

Yuhang Cao,

Conghui He,

Jiaqi Wang,

Feng Wu,

Dahua Lin; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Long and Huang, Qidong and Dong, Xiaoyi and Lu, Jiajie and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and He, Conghui and Wang, Jiaqi and Wu, Feng and Lin, Dahua}, title = {Conical Visual Concentration for Efficient Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14593-14603} }
Foundations of the Theory of Performance-Based Ranking: Sébastien Piérard,

Anaïs Halin,

Anthony Cioppa,

Adrien Deliege,

Marc Van Droogenbroeck; [pdf] [supp]
[bibtex]
@InProceedings{Pierard_2025_CVPR, author = {Pi\'erard, S\'ebastien and Halin, Ana{\"\i}s and Cioppa, Anthony and Deliege, Adrien and Van Droogenbroeck, Marc}, title = {Foundations of the Theory of Performance-Based Ranking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14293-14302} }
BIGS: Bimanual Category-agnostic Interaction Reconstruction from Monocular Videos via 3D Gaussian Splatting: Jeongwan On,

Kyeonghwan Gwak,

Gunyoung Kang,

Junuk Cha,

Soohyun Hwang,

Hyein Hwang,

Seungryul Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{On_2025_CVPR, author = {On, Jeongwan and Gwak, Kyeonghwan and Kang, Gunyoung and Cha, Junuk and Hwang, Soohyun and Hwang, Hyein and Baek, Seungryul}, title = {BIGS: Bimanual Category-agnostic Interaction Reconstruction from Monocular Videos via 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17437-17447} }
Frequency-Biased Synergistic Design for Image Compression and Compensation: Jiaming Liu,

Qi Zheng,

Zihao Liu,

Yilian Zhong,

Peiye Liu,

Tao Liu,

Shusong Xu,

Yanheng Lu,

Sicheng Li,

Dimin Niu,

Yibo Fan; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jiaming and Zheng, Qi and Liu, Zihao and Zhong, Yilian and Liu, Peiye and Liu, Tao and Xu, Shusong and Lu, Yanheng and Li, Sicheng and Niu, Dimin and Fan, Yibo}, title = {Frequency-Biased Synergistic Design for Image Compression and Compensation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12820-12829} }
Sparse Voxels Rasterization: Real-time High-fidelity Radiance Field Rendering: Cheng Sun,

Jaesung Choe,

Charles Loop,

Wei-Chiu Ma,

Yu-Chiang Frank Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Cheng and Choe, Jaesung and Loop, Charles and Ma, Wei-Chiu and Wang, Yu-Chiang Frank}, title = {Sparse Voxels Rasterization: Real-time High-fidelity Radiance Field Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16187-16196} }
MambaIC: State Space Models for High-Performance Learned Image Compression: Fanhu Zeng,

Hao Tang,

Yihua Shao,

Siyu Chen,

Ling Shao,

Yan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_CVPR, author = {Zeng, Fanhu and Tang, Hao and Shao, Yihua and Chen, Siyu and Shao, Ling and Wang, Yan}, title = {MambaIC: State Space Models for High-Performance Learned Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18041-18050} }
Instant Gaussian Stream: Fast and Generalizable Streaming of Dynamic Scene Reconstruction via Gaussian Splatting: Jinbo Yan,

Rui Peng,

Zhiyan Wang,

Luyang Tang,

Jiayu Yang,

Jie Liang,

Jiahao Wu,

Ronggang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Jinbo and Peng, Rui and Wang, Zhiyan and Tang, Luyang and Yang, Jiayu and Liang, Jie and Wu, Jiahao and Wang, Ronggang}, title = {Instant Gaussian Stream: Fast and Generalizable Streaming of Dynamic Scene Reconstruction via Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16520-16531} }
Locality-Aware Zero-Shot Human-Object Interaction Detection: Sanghyun Kim,

Deunsol Jung,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Sanghyun and Jung, Deunsol and Cho, Minsu}, title = {Locality-Aware Zero-Shot Human-Object Interaction Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20190-20200} }
Two by Two: Learning Multi-Task Pairwise Objects Assembly for Generalizable Robot Manipulation: Yu Qi,

Yuanchen Ju,

Tianming Wei,

Chi Chu,

Lawson L.S. Wong,

Huazhe Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2025_CVPR, author = {Qi, Yu and Ju, Yuanchen and Wei, Tianming and Chu, Chi and Wong, Lawson L.S. and Xu, Huazhe}, title = {Two by Two: Learning Multi-Task Pairwise Objects Assembly for Generalizable Robot Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17383-17393} }
SGFormer: Satellite-Ground Fusion for 3D Semantic Scene Completion: Xiyue Guo,

Jiarui Hu,

Junjie Hu,

Hujun Bao,

Guofeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Xiyue and Hu, Jiarui and Hu, Junjie and Bao, Hujun and Zhang, Guofeng}, title = {SGFormer: Satellite-Ground Fusion for 3D Semantic Scene Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11929-11938} }
Random Conditioning for Diffusion Model Compression with Distillation: Dohyun Kim,

Sehwan Park,

Geonhee Han,

Seung Wook Kim,

Paul Hongsuck Seo; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Dohyun and Park, Sehwan and Han, Geonhee and Kim, Seung Wook and Seo, Paul Hongsuck}, title = {Random Conditioning for Diffusion Model Compression with Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18607-18618} }
Hierarchical Gaussian Mixture Model Splatting for Efficient and Part Controllable 3D Generation: Qitong Yang,

Mingtao Feng,

Zijie Wu,

Weisheng Dong,

Fangfang Wu,

Yaonan Wang,

Ajmal Mian; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Qitong and Feng, Mingtao and Wu, Zijie and Dong, Weisheng and Wu, Fangfang and Wang, Yaonan and Mian, Ajmal}, title = {Hierarchical Gaussian Mixture Model Splatting for Efficient and Part Controllable 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11104-11114} }
Heterogeneous Skeleton-Based Action Representation Learning: Hongsong Wang,

Xiaoyan Ma,

Jidong Kuang,

Jie Gui; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Hongsong and Ma, Xiaoyan and Kuang, Jidong and Gui, Jie}, title = {Heterogeneous Skeleton-Based Action Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19154-19164} }
AnyMap: Learning a General Camera Model for Structure-from-Motion with Unknown Distortion in Dynamic Scenes: Andrea Porfiri Dal Cin,

Georgi Dikov,

Jihong Ju,

Mohsen Ghafoorian; [pdf] [supp]
[bibtex]
@InProceedings{Cin_2025_CVPR, author = {Cin, Andrea Porfiri Dal and Dikov, Georgi and Ju, Jihong and Ghafoorian, Mohsen}, title = {AnyMap: Learning a General Camera Model for Structure-from-Motion with Unknown Distortion in Dynamic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16674-16684} }
Language Guided Concept Bottleneck Models for Interpretable Continual Learning: Lu Yu,

Haoyu Han,

Zhe Tao,

Hantao Yao,

Changsheng Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Lu and Han, Haoyu and Tao, Zhe and Yao, Hantao and Xu, Changsheng}, title = {Language Guided Concept Bottleneck Models for Interpretable Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14976-14986} }
Re-HOLD: Video Hand Object Interaction Reenactment via adaptive Layout-instructed Diffusion Model: Yingying Fan,

Quanwei Yang,

Kaisiyuan Wang,

Hang Zhou,

Yingying Li,

Haocheng Feng,

Errui Ding,

Yu Wu,

Jingdong Wang; [pdf]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Yingying and Yang, Quanwei and Wang, Kaisiyuan and Zhou, Hang and Li, Yingying and Feng, Haocheng and Ding, Errui and Wu, Yu and Wang, Jingdong}, title = {Re-HOLD: Video Hand Object Interaction Reenactment via adaptive Layout-instructed Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17550-17560} }
Odd-One-Out: Anomaly Detection by Comparing with Neighbors: Ankan Bhunia,

Changjian Li,

Hakan Bilen; [pdf] [supp]
[bibtex]
@InProceedings{Bhunia_2025_CVPR, author = {Bhunia, Ankan and Li, Changjian and Bilen, Hakan}, title = {Odd-One-Out: Anomaly Detection by Comparing with Neighbors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20395-20404} }
D^3CTTA: Domain-Dependent Decorrelation for Continual Test-Time Adaption of 3D LiDAR Segmentation: Jichun Zhao,

Haiyong Jiang,

Haoxuan Song,

Jun Xiao,

Dong Gong; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jichun and Jiang, Haiyong and Song, Haoxuan and Xiao, Jun and Gong, Dong}, title = {D{\textasciicircum}3CTTA: Domain-Dependent Decorrelation for Continual Test-Time Adaption of 3D LiDAR Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11864-11874} }
A Closer Look at Time Steps is Worthy of Triple Speed-Up for Diffusion Model Training: Kai Wang,

Mingjia Shi,

Yukun Zhou,

Zekai Li,

Zhihang Yuan,

Yuzhang Shang,

Xiaojiang Peng,

Hanwang Zhang,

Yang You; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Kai and Shi, Mingjia and Zhou, Yukun and Li, Zekai and Yuan, Zhihang and Shang, Yuzhang and Peng, Xiaojiang and Zhang, Hanwang and You, Yang}, title = {A Closer Look at Time Steps is Worthy of Triple Speed-Up for Diffusion Model Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12934-12944} }
Empowering LLMs to Understand and Generate Complex Vector Graphics: Ximing Xing,

Juncheng Hu,

Guotao Liang,

Jing Zhang,

Dong Xu,

Qian Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Ximing and Hu, Juncheng and Liang, Guotao and Zhang, Jing and Xu, Dong and Yu, Qian}, title = {Empowering LLMs to Understand and Generate Complex Vector Graphics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19487-19497} }
PanoGS: Gaussian-based Panoptic Segmentation for 3D Open Vocabulary Scene Understanding: Hongjia Zhai,

Hai Li,

Zhenzhe Li,

Xiaokun Pan,

Yijia He,

Guofeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhai_2025_CVPR, author = {Zhai, Hongjia and Li, Hai and Li, Zhenzhe and Pan, Xiaokun and He, Yijia and Zhang, Guofeng}, title = {PanoGS: Gaussian-based Panoptic Segmentation for 3D Open Vocabulary Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14114-14124} }
MLVU: Benchmarking Multi-task Long Video Understanding: Junjie Zhou,

Yan Shu,

Bo Zhao,

Boya Wu,

Zhengyang Liang,

Shitao Xiao,

Minghao Qin,

Xi Yang,

Yongping Xiong,

Bo Zhang,

Tiejun Huang,

Zheng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Junjie and Shu, Yan and Zhao, Bo and Wu, Boya and Liang, Zhengyang and Xiao, Shitao and Qin, Minghao and Yang, Xi and Xiong, Yongping and Zhang, Bo and Huang, Tiejun and Liu, Zheng}, title = {MLVU: Benchmarking Multi-task Long Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13691-13701} }
Recovering Dynamic 3D Sketches from Videos: Jaeah Lee,

Changwoon Choi,

Young Min Kim,

Jaesik Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Jaeah and Choi, Changwoon and Kim, Young Min and Park, Jaesik}, title = {Recovering Dynamic 3D Sketches from Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12423-12432} }
EigenGS Representation: From Eigenspace to Gaussian Image Space: Lo-Wei Tai,

Ching-En Li,

Cheng-Lin Chen,

Chih-Jung Tsai,

Hwann-Tzong Chen,

Tyng-Luh Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tai_2025_CVPR, author = {Tai, Lo-Wei and Li, Ching-En and Chen, Cheng-Lin and Tsai, Chih-Jung and Chen, Hwann-Tzong and Liu, Tyng-Luh}, title = {EigenGS Representation: From Eigenspace to Gaussian Image Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13487-13496} }
MaSS13K: A Matting-level Semantic Segmentation Benchmark: Chenxi Xie,

Minghan Li,

Hui Zeng,

Jun Luo,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Chenxi and Li, Minghan and Zeng, Hui and Luo, Jun and Zhang, Lei}, title = {MaSS13K: A Matting-level Semantic Segmentation Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14046-14056} }
Enhancing Testing-Time Robustness for Trusted Multi-View Classification in the Wild: Wei Liu,

Yufei Chen,

Xiaodong Yue; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Wei and Chen, Yufei and Yue, Xiaodong}, title = {Enhancing Testing-Time Robustness for Trusted Multi-View Classification in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15508-15517} }
ROD-MLLM: Towards More Reliable Object Detection in Multimodal Large Language Models: Heng Yin,

Yuqiang Ren,

Ke Yan,

Shouhong Ding,

Yongtao Hao; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Heng and Ren, Yuqiang and Yan, Ke and Ding, Shouhong and Hao, Yongtao}, title = {ROD-MLLM: Towards More Reliable Object Detection in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14358-14368} }
nnWNet: Rethinking the Use of Transformers in Biomedical Image Segmentation and Calling for a Unified Evaluation Benchmark: Yanfeng Zhou,

Lingrui Li,

Le Lu,

Minfeng Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yanfeng and Li, Lingrui and Lu, Le and Xu, Minfeng}, title = {nnWNet: Rethinking the Use of Transformers in Biomedical Image Segmentation and Calling for a Unified Evaluation Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20852-20862} }
VELOCITI: Benchmarking Video-Language Compositional Reasoning with Strict Entailment: Darshana Saravanan,

Varun Gupta,

Darshan Singh,

Zeeshan Khan,

Vineet Gandhi,

Makarand Tapaswi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saravanan_2025_CVPR, author = {Saravanan, Darshana and Gupta, Varun and Singh, Darshan and Khan, Zeeshan and Gandhi, Vineet and Tapaswi, Makarand}, title = {VELOCITI: Benchmarking Video-Language Compositional Reasoning with Strict Entailment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18914-18924} }
Seeing is Not Believing: Adversarial Natural Object Optimization for Hard-Label 3D Scene Attacks: Daizong Liu,

Wei Hu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Daizong and Hu, Wei}, title = {Seeing is Not Believing: Adversarial Natural Object Optimization for Hard-Label 3D Scene Attacks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11886-11897} }
Lessons and Insights from a Unifying Study of Parameter-Efficient Fine-Tuning (PEFT) in Visual Recognition: Zheda Mai,

Ping Zhang,

Cheng-Hao Tu,

Hong-You Chen,

Quang-Huy Nguyen,

Li Zhang,

Wei-Lun Chao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mai_2025_CVPR, author = {Mai, Zheda and Zhang, Ping and Tu, Cheng-Hao and Chen, Hong-You and Nguyen, Quang-Huy and Zhang, Li and Chao, Wei-Lun}, title = {Lessons and Insights from a Unifying Study of Parameter-Efficient Fine-Tuning (PEFT) in Visual Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14845-14857} }
Pippo: High-Resolution Multi-View Humans from a Single Image: Yash Kant,

Ethan Weber,

Jin Kyu Kim,

Rawal Khirodkar,

Su Zhaoen,

Julieta Martinez,

Igor Gilitschenski,

Shunsuke Saito,

Timur Bagautdinov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kant_2025_CVPR, author = {Kant, Yash and Weber, Ethan and Kim, Jin Kyu and Khirodkar, Rawal and Zhaoen, Su and Martinez, Julieta and Gilitschenski, Igor and Saito, Shunsuke and Bagautdinov, Timur}, title = {Pippo: High-Resolution Multi-View Humans from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16418-16429} }
H2ST: Hierarchical Two-Sample Tests for Continual Out-of-Distribution Detection: Yuhang Liu,

Wenjie Zhao,

Yunhui Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuhang and Zhao, Wenjie and Guo, Yunhui}, title = {H2ST: Hierarchical Two-Sample Tests for Continual Out-of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15413-15423} }
MoVE-KD: Knowledge Distillation for VLMs with Mixture of Visual Encoders: Jiajun Cao,

Yuan Zhang,

Tao Huang,

Ming Lu,

Qizhe Zhang,

Ruichuan An,

Ningning Ma,

Shanghang Zhang; [pdf]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Jiajun and Zhang, Yuan and Huang, Tao and Lu, Ming and Zhang, Qizhe and An, Ruichuan and Ma, Ningning and Zhang, Shanghang}, title = {MoVE-KD: Knowledge Distillation for VLMs with Mixture of Visual Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19846-19856} }
CamFreeDiff: Camera-free Image to Panorama Generation with Diffusion Model: Xiaoding Yuan,

Shitao Tang,

Kejie Li,

Peng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Xiaoding and Tang, Shitao and Li, Kejie and Wang, Peng}, title = {CamFreeDiff: Camera-free Image to Panorama Generation with Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16408-16417} }
Improving Visual and Downstream Performance of Low-Light Enhancer with Vision Foundation Models Collaboration: Yuxuan Gu,

Haoxuan Wang,

Pengyang Ling,

Zhixiang Wei,

Huaian Chen,

Yi Jin,

Enhong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Yuxuan and Wang, Haoxuan and Ling, Pengyang and Wei, Zhixiang and Chen, Huaian and Jin, Yi and Chen, Enhong}, title = {Improving Visual and Downstream Performance of Low-Light Enhancer with Vision Foundation Models Collaboration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16071-16080} }
FineLIP: Extending CLIP's Reach via Fine-Grained Alignment with Longer Text Inputs: Mothilal Asokan,

Kebin Wu,

Fatima Albreiki; [pdf] [supp]
[bibtex]
@InProceedings{Asokan_2025_CVPR, author = {Asokan, Mothilal and Wu, Kebin and Albreiki, Fatima}, title = {FineLIP: Extending CLIP's Reach via Fine-Grained Alignment with Longer Text Inputs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14495-14504} }
Divot: Diffusion Powers Video Tokenizer for Comprehension and Generation: Yuying Ge,

Yizhuo Li,

Yixiao Ge,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2025_CVPR, author = {Ge, Yuying and Li, Yizhuo and Ge, Yixiao and Shan, Ying}, title = {Divot: Diffusion Powers Video Tokenizer for Comprehension and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13606-13617} }
Towards Zero-Shot Anomaly Detection and Reasoning with Multimodal Large Language Models: Jiacong Xu,

Shao-Yuan Lo,

Bardia Safaei,

Vishal M. Patel,

Isht Dwivedi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Jiacong and Lo, Shao-Yuan and Safaei, Bardia and Patel, Vishal M. and Dwivedi, Isht}, title = {Towards Zero-Shot Anomaly Detection and Reasoning with Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20370-20382} }
Video-Guided Foley Sound Generation with Multimodal Controls: Ziyang Chen,

Prem Seetharaman,

Bryan Russell,

Oriol Nieto,

David Bourgin,

Andrew Owens,

Justin Salamon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Ziyang and Seetharaman, Prem and Russell, Bryan and Nieto, Oriol and Bourgin, David and Owens, Andrew and Salamon, Justin}, title = {Video-Guided Foley Sound Generation with Multimodal Controls}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18770-18781} }
F^3OCUS - Federated Finetuning of Vision-Language Foundation Models with Optimal Client Layer Updating Strategy via Multi-objective Meta-Heuristics: Pramit Saha,

Felix Wagner,

Divyanshu Mishra,

Can Peng,

Anshul Thakur,

David A. Clifton,

Konstantinos Kamnitsas,

J. Alison Noble; [pdf] [supp]
[bibtex]
@InProceedings{Saha_2025_CVPR, author = {Saha, Pramit and Wagner, Felix and Mishra, Divyanshu and Peng, Can and Thakur, Anshul and Clifton, David A. and Kamnitsas, Konstantinos and Noble, J. Alison}, title = {F{\textasciicircum}3OCUS - Federated Finetuning of Vision-Language Foundation Models with Optimal Client Layer Updating Strategy via Multi-objective Meta-Heuristics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20006-20017} }
3D Occupancy Prediction with Low-Resolution Queries via Prototype-aware View Transformation: Gyeongrok Oh,

Sungjune Kim,

Heeju Ko,

Hyung-gun Chi,

Jinkyu Kim,

Dongwook Lee,

Daehyun Ji,

Sungjoon Choi,

Sujin Jang,

Sangpil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Oh_2025_CVPR, author = {Oh, Gyeongrok and Kim, Sungjune and Ko, Heeju and Chi, Hyung-gun and Kim, Jinkyu and Lee, Dongwook and Ji, Daehyun and Choi, Sungjoon and Jang, Sujin and Kim, Sangpil}, title = {3D Occupancy Prediction with Low-Resolution Queries via Prototype-aware View Transformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17134-17144} }
Can Large Vision-Language Models Correct Semantic Grounding Errors By Themselves?: Yuan-Hong Liao,

Rafid Mahmood,

Sanja Fidler,

David Acuna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Yuan-Hong and Mahmood, Rafid and Fidler, Sanja and Acuna, David}, title = {Can Large Vision-Language Models Correct Semantic Grounding Errors By Themselves?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14667-14678} }
g3D-LF: Generalizable 3D-Language Feature Fields for Embodied Tasks: Zihan Wang,

Gim Hee Lee; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zihan and Lee, Gim Hee}, title = {g3D-LF: Generalizable 3D-Language Feature Fields for Embodied Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14191-14202} }
UniReal: Universal Image Generation and Editing via Learning Real-world Dynamics: Xi Chen,

Zhifei Zhang,

He Zhang,

Yuqian Zhou,

Soo Ye Kim,

Qing Liu,

Yijun Li,

Jianming Zhang,

Nanxuan Zhao,

Yilin Wang,

Hui Ding,

Zhe Lin,

Hengshuang Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xi and Zhang, Zhifei and Zhang, He and Zhou, Yuqian and Kim, Soo Ye and Liu, Qing and Li, Yijun and Zhang, Jianming and Zhao, Nanxuan and Wang, Yilin and Ding, Hui and Lin, Zhe and Zhao, Hengshuang}, title = {UniReal: Universal Image Generation and Editing via Learning Real-world Dynamics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12501-12511} }
Exploring Contextual Attribute Density in Referring Expression Counting: Zhicheng Wang,

Zhiyu Pan,

Zhan Peng,

Jian Cheng,

Liwen Xiao,

Wei Jiang,

Zhiguo Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhicheng and Pan, Zhiyu and Peng, Zhan and Cheng, Jian and Xiao, Liwen and Jiang, Wei and Cao, Zhiguo}, title = {Exploring Contextual Attribute Density in Referring Expression Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19587-19596} }
SegMAN: Omni-scale Context Modeling with State Space Models and Local Attention for Semantic Segmentation: Yunxiang Fu,

Meng Lou,

Yizhou Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Yunxiang and Lou, Meng and Yu, Yizhou}, title = {SegMAN: Omni-scale Context Modeling with State Space Models and Local Attention for Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19077-19087} }
OmniFlow: Any-to-Any Generation with Multi-Modal Rectified Flows: Shufan Li,

Konstantinos Kallidromitis,

Akash Gokul,

Zichun Liao,

Yusuke Kato,

Kazuki Kozuka,

Aditya Grover; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Shufan and Kallidromitis, Konstantinos and Gokul, Akash and Liao, Zichun and Kato, Yusuke and Kozuka, Kazuki and Grover, Aditya}, title = {OmniFlow: Any-to-Any Generation with Multi-Modal Rectified Flows}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13178-13188} }
SLAM3R: Real-Time Dense Scene Reconstruction from Monocular RGB Videos: Yuzheng Liu,

Siyan Dong,

Shuzhe Wang,

Yingda Yin,

Yanchao Yang,

Qingnan Fan,

Baoquan Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuzheng and Dong, Siyan and Wang, Shuzhe and Yin, Yingda and Yang, Yanchao and Fan, Qingnan and Chen, Baoquan}, title = {SLAM3R: Real-Time Dense Scene Reconstruction from Monocular RGB Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16651-16662} }
SemGeoMo: Dynamic Contextual Human Motion Generation with Semantic and Geometric Guidance: Peishan Cong,

Ziyi Wang,

Yuexin Ma,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cong_2025_CVPR, author = {Cong, Peishan and Wang, Ziyi and Ma, Yuexin and Yue, Xiangyu}, title = {SemGeoMo: Dynamic Contextual Human Motion Generation with Semantic and Geometric Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17561-17570} }
Detecting Open World Objects via Partial Attribute Assignment: Muli Yang,

Gabriel James Goenawan,

Huaiyuan Qin,

Kai Han,

Xi Peng,

Yanhua Yang,

Hongyuan Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Muli and Goenawan, Gabriel James and Qin, Huaiyuan and Han, Kai and Peng, Xi and Yang, Yanhua and Zhu, Hongyuan}, title = {Detecting Open World Objects via Partial Attribute Assignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20318-20328} }
Neural Inverse Rendering from Propagating Light: Anagh Malik,

Benjamin Attal,

Andrew Xie,

Matthew O'Toole,

David B. Lindell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Malik_2025_CVPR, author = {Malik, Anagh and Attal, Benjamin and Xie, Andrew and O'Toole, Matthew and Lindell, David B.}, title = {Neural Inverse Rendering from Propagating Light}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10534-10544} }
DecoupledGaussian: Object-Scene Decoupling for Physics-Based Interaction: Miaowei Wang,

Yibo Zhang,

Weiwei Xu,

Rui Ma,

Changqing Zou,

Daniel Morris; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Miaowei and Zhang, Yibo and Xu, Weiwei and Ma, Rui and Zou, Changqing and Morris, Daniel}, title = {DecoupledGaussian: Object-Scene Decoupling for Physics-Based Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11361-11372} }
DashGaussian: Optimizing 3D Gaussian Splatting in 200 Seconds: Youyu Chen,

Junjun Jiang,

Kui Jiang,

Xiao Tang,

Zhihao Li,

Xianming Liu,

Yinyu Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Youyu and Jiang, Junjun and Jiang, Kui and Tang, Xiao and Li, Zhihao and Liu, Xianming and Nie, Yinyu}, title = {DashGaussian: Optimizing 3D Gaussian Splatting in 200 Seconds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11146-11155} }
PACT: Pruning and Clustering-Based Token Reduction for Faster Visual Language Models: Mohamed Dhouib,

Davide Buscaldi,

Sonia Vanier,

Aymen Shabou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dhouib_2025_CVPR, author = {Dhouib, Mohamed and Buscaldi, Davide and Vanier, Sonia and Shabou, Aymen}, title = {PACT: Pruning and Clustering-Based Token Reduction for Faster Visual Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14582-14592} }
R-SCoRe: Revisiting Scene Coordinate Regression for Robust Large-Scale Visual Localization: Xudong Jiang,

Fangjinhua Wang,

Silvano Galliani,

Christoph Vogel,

Marc Pollefeys; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Xudong and Wang, Fangjinhua and Galliani, Silvano and Vogel, Christoph and Pollefeys, Marc}, title = {R-SCoRe: Revisiting Scene Coordinate Regression for Robust Large-Scale Visual Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11536-11546} }
Style Evolving along Chain-of-Thought for Unknown-Domain Object Detection: Zihao Zhang,

Aming Wu,

Yahong Han; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zihao and Wu, Aming and Han, Yahong}, title = {Style Evolving along Chain-of-Thought for Unknown-Domain Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14225-14234} }
OmniSplat: Taming Feed-Forward 3D Gaussian Splatting for Omnidirectional Images with Editable Capabilities: Suyoung Lee,

Jaeyoung Chung,

Kihoon Kim,

Jaeyoo Huh,

Gunhee Lee,

Minsoo Lee,

Kyoung Mu Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Suyoung and Chung, Jaeyoung and Kim, Kihoon and Huh, Jaeyoo and Lee, Gunhee and Lee, Minsoo and Lee, Kyoung Mu}, title = {OmniSplat: Taming Feed-Forward 3D Gaussian Splatting for Omnidirectional Images with Editable Capabilities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16356-16365} }
UniVAD: A Training-free Unified Model for Few-shot Visual Anomaly Detection: Zhaopeng Gu,

Bingke Zhu,

Guibo Zhu,

Yingying Chen,

Ming Tang,

Jinqiao Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Zhaopeng and Zhu, Bingke and Zhu, Guibo and Chen, Yingying and Tang, Ming and Wang, Jinqiao}, title = {UniVAD: A Training-free Unified Model for Few-shot Visual Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15194-15203} }
Remote Photoplethysmography in Real-World and Extreme Lighting Scenarios: Hang Shao,

Lei Luo,

Jianjun Qian,

Mengkai Yan,

Shuo Chen,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2025_CVPR, author = {Shao, Hang and Luo, Lei and Qian, Jianjun and Yan, Mengkai and Chen, Shuo and Yang, Jian}, title = {Remote Photoplethysmography in Real-World and Extreme Lighting Scenarios}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10858-10867} }
Multi-Modal Contrastive Masked Autoencoders: A Two-Stage Progressive Pre-training Approach for RGBD Datasets: Muhammad Abdullah Jamal,

Omid Mohareri; [pdf] [supp]
[bibtex]
@InProceedings{Jamal_2025_CVPR, author = {Jamal, Muhammad Abdullah and Mohareri, Omid}, title = {Multi-Modal Contrastive Masked Autoencoders: A Two-Stage Progressive Pre-training Approach for RGBD Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17947-17957} }
Font-Agent: Enhancing Font Understanding with Large Language Models: Yingxin Lai,

Cuijie Xu,

Haitian Shi,

Guoqing Yang,

Xiaoning Li,

Zhiming Luo,

Shaozi Li; [pdf]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Yingxin and Xu, Cuijie and Shi, Haitian and Yang, Guoqing and Li, Xiaoning and Luo, Zhiming and Li, Shaozi}, title = {Font-Agent: Enhancing Font Understanding with Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19670-19680} }
Secret Lies in Color: Enhancing AI-Generated Images Detection with Color Distribution Analysis: Zexi Jia,

Chuanwei Huang,

Yeshuang Zhu,

Hongyan Fei,

Xiaoyue Duan,

Zhiqiang Yuan,

Ying Deng,

Jiapei Zhang,

Jinchao Zhang,

Jie Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2025_CVPR, author = {Jia, Zexi and Huang, Chuanwei and Zhu, Yeshuang and Fei, Hongyan and Duan, Xiaoyue and Yuan, Zhiqiang and Deng, Ying and Zhang, Jiapei and Zhang, Jinchao and Zhou, Jie}, title = {Secret Lies in Color: Enhancing AI-Generated Images Detection with Color Distribution Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13445-13454} }
Cross-Modal and Uncertainty-Aware Agglomeration for Open-Vocabulary 3D Scene Understanding: Jinlong Li,

Cristiano Saltori,

Fabio Poiesi,

Nicu Sebe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jinlong and Saltori, Cristiano and Poiesi, Fabio and Sebe, Nicu}, title = {Cross-Modal and Uncertainty-Aware Agglomeration for Open-Vocabulary 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19390-19400} }
SVLTA: Benchmarking Vision-Language Temporal Alignment via Synthetic Video Situation: Hao Du,

Bo Wu,

Yan Lu,

Zhendong Mao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Hao and Wu, Bo and Lu, Yan and Mao, Zhendong}, title = {SVLTA: Benchmarking Vision-Language Temporal Alignment via Synthetic Video Situation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13798-13809} }
Mixture of Submodules for Domain Adaptive Person Search: Minsu Kim,

Seungryong Kim,

Kwanghoon Sohn; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Minsu and Kim, Seungryong and Sohn, Kwanghoon}, title = {Mixture of Submodules for Domain Adaptive Person Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13990-14001} }
SharpDepth: Sharpening Metric Depth Predictions Using Diffusion Distillation: Duc-Hai Pham,

Tung Do,

Phong Nguyen,

Binh-Son Hua,

Khoi Nguyen,

Rang Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pham_2025_CVPR, author = {Pham, Duc-Hai and Do, Tung and Nguyen, Phong and Hua, Binh-Son and Nguyen, Khoi and Nguyen, Rang}, title = {SharpDepth: Sharpening Metric Depth Predictions Using Diffusion Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17060-17069} }
EvEnhancer: Empowering Effectiveness, Efficiency and Generalizability for Continuous Space-Time Video Super-Resolution with Events: Shuoyan Wei,

Feng Li,

Shengeng Tang,

Yao Zhao,

Huihui Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Shuoyan and Li, Feng and Tang, Shengeng and Zhao, Yao and Bai, Huihui}, title = {EvEnhancer: Empowering Effectiveness, Efficiency and Generalizability for Continuous Space-Time Video Super-Resolution with Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17755-17766} }
Seeing A 3D World in A Grain of Sand: Yufan Zhang,

Yu Ji,

Yu Guo,

Jinwei Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yufan and Ji, Yu and Guo, Yu and Ye, Jinwei}, title = {Seeing A 3D World in A Grain of Sand}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11187-11196} }
MoFlow: One-Step Flow Matching for Human Trajectory Forecasting via Implicit Maximum Likelihood Estimation based Distillation: Yuxiang Fu,

Qi Yan,

Lele Wang,

Ke Li,

Renjie Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Yuxiang and Yan, Qi and Wang, Lele and Li, Ke and Liao, Renjie}, title = {MoFlow: One-Step Flow Matching for Human Trajectory Forecasting via Implicit Maximum Likelihood Estimation based Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17282-17293} }
Reason-before-Retrieve: One-Stage Reflective Chain-of-Thoughts for Training-Free Zero-Shot Composed Image Retrieval: Yuanmin Tang,

Jue Zhang,

Xiaoting Qin,

Jing Yu,

Gaopeng Gou,

Gang Xiong,

Qingwei Lin,

Saravan Rajmohan,

Dongmei Zhang,

Qi Wu; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Yuanmin and Zhang, Jue and Qin, Xiaoting and Yu, Jing and Gou, Gaopeng and Xiong, Gang and Lin, Qingwei and Rajmohan, Saravan and Zhang, Dongmei and Wu, Qi}, title = {Reason-before-Retrieve: One-Stage Reflective Chain-of-Thoughts for Training-Free Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14400-14410} }
UniGraspTransformer: Simplified Policy Distillation for Scalable Dexterous Robotic Grasping: Wenbo Wang,

Fangyun Wei,

Lei Zhou,

Xi Chen,

Lin Luo,

Xiaohan Yi,

Yizhong Zhang,

Yaobo Liang,

Chang Xu,

Yan Lu,

Jiaolong Yang,

Baining Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Wenbo and Wei, Fangyun and Zhou, Lei and Chen, Xi and Luo, Lin and Yi, Xiaohan and Zhang, Yizhong and Liang, Yaobo and Xu, Chang and Lu, Yan and Yang, Jiaolong and Guo, Baining}, title = {UniGraspTransformer: Simplified Policy Distillation for Scalable Dexterous Robotic Grasping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12199-12208} }
Apollo: An Exploration of Video Understanding in Large Multimodal Models: Orr Zohar,

Xiaohan Wang,

Yann Dubois,

Nikhil Mehta,

Tong Xiao,

Philippe Hansen-Estruch,

Licheng Yu,

Xiaofang Wang,

Felix Juefei-Xu,

Ning Zhang,

Serena Yeung-Levy,

Xide Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zohar_2025_CVPR, author = {Zohar, Orr and Wang, Xiaohan and Dubois, Yann and Mehta, Nikhil and Xiao, Tong and Hansen-Estruch, Philippe and Yu, Licheng and Wang, Xiaofang and Juefei-Xu, Felix and Zhang, Ning and Yeung-Levy, Serena and Xia, Xide}, title = {Apollo: An Exploration of Video Understanding in Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18891-18901} }
Skip Tuning: Pre-trained Vision-Language Models are Effective and Efficient Adapters Themselves: Shihan Wu,

Ji Zhang,

Pengpeng Zeng,

Lianli Gao,

Jingkuan Song,

Heng Tao Shen; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Shihan and Zhang, Ji and Zeng, Pengpeng and Gao, Lianli and Song, Jingkuan and Shen, Heng Tao}, title = {Skip Tuning: Pre-trained Vision-Language Models are Effective and Efficient Adapters Themselves}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14723-14732} }
PatchDPO: Patch-level DPO for Finetuning-free Personalized Image Generation: Qihan Huang,

Long Chan,

Jinlong Liu,

Wanggui He,

Hao Jiang,

Mingli Song,

Jie Song; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Qihan and Chan, Long and Liu, Jinlong and He, Wanggui and Jiang, Hao and Song, Mingli and Song, Jie}, title = {PatchDPO: Patch-level DPO for Finetuning-free Personalized Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18369-18378} }
MegaSaM: Accurate, Fast and Robust Structure and Motion from Casual Dynamic Videos: Zhengqi Li,

Richard Tucker,

Forrester Cole,

Qianqian Wang,

Linyi Jin,

Vickie Ye,

Angjoo Kanazawa,

Aleksander Holynski,

Noah Snavely; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zhengqi and Tucker, Richard and Cole, Forrester and Wang, Qianqian and Jin, Linyi and Ye, Vickie and Kanazawa, Angjoo and Holynski, Aleksander and Snavely, Noah}, title = {MegaSaM: Accurate, Fast and Robust Structure and Motion from Casual Dynamic Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10486-10496} }
Robust-MVTON: Learning Cross-Pose Feature Alignment and Fusion for Robust Multi-View Virtual Try-On: Nannan Zhang,

Yijiang Li,

Dong Du,

Zheng Chong,

Zhengwentai Sun,

Jianhao Zeng,

Yusheng Dai,

Zhengyu Xie,

Hairui Zhu,

Xiaoguang Han; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Nannan and Li, Yijiang and Du, Dong and Chong, Zheng and Sun, Zhengwentai and Zeng, Jianhao and Dai, Yusheng and Xie, Zhengyu and Zhu, Hairui and Han, Xiaoguang}, title = {Robust-MVTON: Learning Cross-Pose Feature Alignment and Fusion for Robust Multi-View Virtual Try-On}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16029-16039} }
Identity-Preserving Text-to-Video Generation by Frequency Decomposition: Shenghai Yuan,

Jinfa Huang,

Xianyi He,

Yunyang Ge,

Yujun Shi,

Liuhan Chen,

Jiebo Luo,

Li Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Shenghai and Huang, Jinfa and He, Xianyi and Ge, Yunyang and Shi, Yujun and Chen, Liuhan and Luo, Jiebo and Yuan, Li}, title = {Identity-Preserving Text-to-Video Generation by Frequency Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12978-12988} }
FreeGave: 3D Physics Learning from Dynamic Videos by Gaussian Velocity: Jinxi Li,

Ziyang Song,

Siyuan Zhou,

Bo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jinxi and Song, Ziyang and Zhou, Siyuan and Yang, Bo}, title = {FreeGave: 3D Physics Learning from Dynamic Videos by Gaussian Velocity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12433-12443} }
MOS: Modeling Object-Scene Associations in Generalized Category Discovery: Zhengyuan Peng,

Jinpeng Ma,

Zhimin Sun,

Ran Yi,

Haichuan Song,

Xin Tan,

Lizhuang Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Zhengyuan and Ma, Jinpeng and Sun, Zhimin and Yi, Ran and Song, Haichuan and Tan, Xin and Ma, Lizhuang}, title = {MOS: Modeling Object-Scene Associations in Generalized Category Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15118-15128} }
Test-time Augmentation Improves Efficiency in Conformal Prediction: Divya Shanmugam,

Helen Lu,

Swami Sankaranarayanan,

John Guttag; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shanmugam_2025_CVPR, author = {Shanmugam, Divya and Lu, Helen and Sankaranarayanan, Swami and Guttag, John}, title = {Test-time Augmentation Improves Efficiency in Conformal Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20622-20631} }
StoryGPT-V: Large Language Models as Consistent Story Visualizers: Xiaoqian Shen,

Mohamed Elhoseiny; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Xiaoqian and Elhoseiny, Mohamed}, title = {StoryGPT-V: Large Language Models as Consistent Story Visualizers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13273-13283} }
Edge-SD-SR: Low Latency and Parameter Efficient On-device Super-Resolution with Stable Diffusion via Bidirectional Conditioning: Isma Hadji,

Mehdi Noroozi,

Victor Escorcia,

Anestis Zaganidis,

Brais Martinez,

Georgios Tzimiropoulos; [pdf]
[bibtex]
@InProceedings{Hadji_2025_CVPR, author = {Hadji, Isma and Noroozi, Mehdi and Escorcia, Victor and Zaganidis, Anestis and Martinez, Brais and Tzimiropoulos, Georgios}, title = {Edge-SD-SR: Low Latency and Parameter Efficient On-device Super-Resolution with Stable Diffusion via Bidirectional Conditioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12789-12798} }
INFP: Audio-Driven Interactive Head Generation in Dyadic Conversations: Yongming Zhu,

Longhao Zhang,

Zhengkun Rong,

Tianshu Hu,

Shuang Liang,

Zhipeng Ge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yongming and Zhang, Longhao and Rong, Zhengkun and Hu, Tianshu and Liang, Shuang and Ge, Zhipeng}, title = {INFP: Audio-Driven Interactive Head Generation in Dyadic Conversations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10667-10677} }
EVPGS: Enhanced View Prior Guidance for Splatting-based Extrapolated View Synthesis: Jiahe Li,

Feiyu Wang,

Xiaochao Qu,

Chengjing Wu,

Luoqi Liu,

Ting Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiahe and Wang, Feiyu and Qu, Xiaochao and Wu, Chengjing and Liu, Luoqi and Liu, Ting}, title = {EVPGS: Enhanced View Prior Guidance for Splatting-based Extrapolated View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16398-16407} }
GREAT: Geometry-Intention Collaborative Inference for Open-Vocabulary 3D Object Affordance Grounding: Yawen Shao,

Wei Zhai,

Yuhang Yang,

Hongchen Luo,

Yang Cao,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2025_CVPR, author = {Shao, Yawen and Zhai, Wei and Yang, Yuhang and Luo, Hongchen and Cao, Yang and Zha, Zheng-Jun}, title = {GREAT: Geometry-Intention Collaborative Inference for Open-Vocabulary 3D Object Affordance Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17326-17336} }
Adapting Pre-trained 3D Models for Point Cloud Video Understanding via Cross-frame Spatio-temporal Perception: Baixuan Lv,

Yaohua Zha,

Tao Dai,

Xue Yuerong,

Ke Chen,

Shu-Tao Xia; [pdf] [supp]
[bibtex]
@InProceedings{Lv_2025_CVPR, author = {Lv, Baixuan and Zha, Yaohua and Dai, Tao and Yuerong, Xue and Chen, Ke and Xia, Shu-Tao}, title = {Adapting Pre-trained 3D Models for Point Cloud Video Understanding via Cross-frame Spatio-temporal Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12413-12422} }
MASH-VLM: Mitigating Action-Scene Hallucination in Video-LLMs through Disentangled Spatial-Temporal Representations: Kyungho Bae,

Jinhyung Kim,

Sihaeng Lee,

Soonyoung Lee,

Gunhee Lee,

Jinwoo Choi; [pdf] [supp]
[bibtex]
@InProceedings{Bae_2025_CVPR, author = {Bae, Kyungho and Kim, Jinhyung and Lee, Sihaeng and Lee, Soonyoung and Lee, Gunhee and Choi, Jinwoo}, title = {MASH-VLM: Mitigating Action-Scene Hallucination in Video-LLMs through Disentangled Spatial-Temporal Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13744-13753} }
UWAV: Uncertainty-weighted Weakly-supervised Audio-Visual Video Parsing: Yung-Hsuan Lai,

Janek Ebbers,

Yu-Chiang Frank Wang,

François Germain,

Michael Jeffrey Jones,

Moitreya Chatterjee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Yung-Hsuan and Ebbers, Janek and Wang, Yu-Chiang Frank and Germain, Fran\c{c}ois and Jones, Michael Jeffrey and Chatterjee, Moitreya}, title = {UWAV: Uncertainty-weighted Weakly-supervised Audio-Visual Video Parsing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13561-13570} }
Mosaic of Modalities: A Comprehensive Benchmark for Multimodal Graph Learning: Jing Zhu,

Yuhang Zhou,

Shengyi Qian,

Zhongmou He,

Tong Zhao,

Neil Shah,

Danai Koutra; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jing and Zhou, Yuhang and Qian, Shengyi and He, Zhongmou and Zhao, Tong and Shah, Neil and Koutra, Danai}, title = {Mosaic of Modalities: A Comprehensive Benchmark for Multimodal Graph Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14215-14224} }
FirePlace: Geometric Refinements of LLM Common Sense Reasoning for 3D Object Placement: Ian Huang,

Yanan Bao,

Karen Truong,

Howard Zhou,

Cordelia Schmid,

Leonidas Guibas,

Alireza Fathi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Ian and Bao, Yanan and Truong, Karen and Zhou, Howard and Schmid, Cordelia and Guibas, Leonidas and Fathi, Alireza}, title = {FirePlace: Geometric Refinements of LLM Common Sense Reasoning for 3D Object Placement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13466-13476} }
End-to-End Implicit Neural Representations for Classification: Alexander Gielisse,

Jan van Gemert; [pdf] [arXiv]
[bibtex]
@InProceedings{Gielisse_2025_CVPR, author = {Gielisse, Alexander and van Gemert, Jan}, title = {End-to-End Implicit Neural Representations for Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18728-18737} }
UNICL-SAM: Uncertainty-Driven In-Context Segmentation with Part Prototype Discovery: Dianmo Sheng,

Dongdong Chen,

Zhentao Tan,

Qiankun Liu,

Qi Chu,

Tao Gong,

Bin Liu,

Jing Han,

Wenbin Tu,

Shengwei Xu,

Nenghai Yu; [pdf] [supp]
[bibtex]
@InProceedings{Sheng_2025_CVPR, author = {Sheng, Dianmo and Chen, Dongdong and Tan, Zhentao and Liu, Qiankun and Chu, Qi and Gong, Tao and Liu, Bin and Han, Jing and Tu, Wenbin and Xu, Shengwei and Yu, Nenghai}, title = {UNICL-SAM: Uncertainty-Driven In-Context Segmentation with Part Prototype Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20201-20211} }
Layered Motion Fusion: Lifting Motion Segmentation to 3D in Egocentric Videos: Vadim Tschernezki,

Diane Larlus,

Iro Laina,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tschernezki_2025_CVPR, author = {Tschernezki, Vadim and Larlus, Diane and Laina, Iro and Vedaldi, Andrea}, title = {Layered Motion Fusion: Lifting Motion Segmentation to 3D in Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17637-17648} }
Diffusion Self-Distillation for Zero-Shot Customized Image Generation: Shengqu Cai,

Eric Ryan Chan,

Yunzhi Zhang,

Leonidas Guibas,

Jiajun Wu,

Gordon Wetzstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Shengqu and Chan, Eric Ryan and Zhang, Yunzhi and Guibas, Leonidas and Wu, Jiajun and Wetzstein, Gordon}, title = {Diffusion Self-Distillation for Zero-Shot Customized Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18434-18443} }
Uncertainty-guided Perturbation for Image Super-Resolution Diffusion Model: Leheng Zhang,

Weiyi You,

Kexuan Shi,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Leheng and You, Weiyi and Shi, Kexuan and Gu, Shuhang}, title = {Uncertainty-guided Perturbation for Image Super-Resolution Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17980-17989} }
Towards Human-Understandable Multi-Dimensional Concept Discovery: Arne Grobrügge,

Niklas Kühl,

Gerhard Satzger,

Philipp Spitzer; [pdf] [supp]
[bibtex]
@InProceedings{Grobrugge_2025_CVPR, author = {Grobr\"ugge, Arne and K\"uhl, Niklas and Satzger, Gerhard and Spitzer, Philipp}, title = {Towards Human-Understandable Multi-Dimensional Concept Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20018-20027} }
ConText-CIR: Learning from Concepts in Text for Composed Image Retrieval: Eric Xing,

Pranavi Kolouju,

Robert Pless,

Abby Stylianou,

Nathan Jacobs; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Eric and Kolouju, Pranavi and Pless, Robert and Stylianou, Abby and Jacobs, Nathan}, title = {ConText-CIR: Learning from Concepts in Text for Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19638-19648} }
Perturb-and-Revise: Flexible 3D Editing with Generative Trajectories: Susung Hong,

Johanna Karras,

Ricardo Martin-Brualla,

Ira Kemelmacher-Shlizerman; [pdf] [supp]
[bibtex]
@InProceedings{Hong_2025_CVPR, author = {Hong, Susung and Karras, Johanna and Martin-Brualla, Ricardo and Kemelmacher-Shlizerman, Ira}, title = {Perturb-and-Revise: Flexible 3D Editing with Generative Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16293-16303} }
Learning Compatible Multi-Prize Subnetworks for Asymmetric Retrieval: Yushuai Sun,

Zikun Zhou,

Dongmei Jiang,

Yaowei Wang,

Jun Yu,

Guangming Lu,

Wenjie Pei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Yushuai and Zhou, Zikun and Jiang, Dongmei and Wang, Yaowei and Yu, Jun and Lu, Guangming and Pei, Wenjie}, title = {Learning Compatible Multi-Prize Subnetworks for Asymmetric Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15255-15264} }
LoRACLR: Contrastive Adaptation for Customization of Diffusion Models: Enis Simsar,

Thomas Hofmann,

Federico Tombari,

Pinar Yanardag; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Simsar_2025_CVPR, author = {Simsar, Enis and Hofmann, Thomas and Tombari, Federico and Yanardag, Pinar}, title = {LoRACLR: Contrastive Adaptation for Customization of Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13189-13198} }
Opportunistic Single-Photon Time of Flight: Sotiris Nousias,

Mian Wei,

Howard Xiao,

Maxx Wu,

Shahmeer Athar,

Kevin J. Wang,

Anagh Malik,

David A. Barmherzig,

David B. Lindell,

Kyros N. Kutulakos; [pdf] [supp]
[bibtex]
@InProceedings{Nousias_2025_CVPR, author = {Nousias, Sotiris and Wei, Mian and Xiao, Howard and Wu, Maxx and Athar, Shahmeer and Wang, Kevin J. and Malik, Anagh and Barmherzig, David A. and Lindell, David B. and Kutulakos, Kyros N.}, title = {Opportunistic Single-Photon Time of Flight}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15852-15862} }
Argus: Vision-Centric Reasoning with Grounded Chain-of-Thought: Yunze Man,

De-An Huang,

Guilin Liu,

Shiwei Sheng,

Shilong Liu,

Liang-Yan Gui,

Jan Kautz,

Yu-Xiong Wang,

Zhiding Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Man_2025_CVPR, author = {Man, Yunze and Huang, De-An and Liu, Guilin and Sheng, Shiwei and Liu, Shilong and Gui, Liang-Yan and Kautz, Jan and Wang, Yu-Xiong and Yu, Zhiding}, title = {Argus: Vision-Centric Reasoning with Grounded Chain-of-Thought}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14268-14280} }
Bootstrap Your Own Views: Masked Ego-Exo Modeling for Fine-grained View-invariant Video Representations: Jungin Park,

Jiyoung Lee,

Kwanghoon Sohn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Jungin and Lee, Jiyoung and Sohn, Kwanghoon}, title = {Bootstrap Your Own Views: Masked Ego-Exo Modeling for Fine-grained View-invariant Video Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13661-13670} }
Encapsulated Composition of Text-to-Image and Text-to-Video Models for High-Quality Video Synthesis: Tongtong Su,

Chengyu Wang,

Bingyan Liu,

Jun Huang,

Dongming Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_CVPR, author = {Su, Tongtong and Wang, Chengyu and Liu, Bingyan and Huang, Jun and Lu, Dongming}, title = {Encapsulated Composition of Text-to-Image and Text-to-Video Models for High-Quality Video Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18209-18218} }
Retrieving Semantics from the Deep: an RAG Solution for Gesture Synthesis: M. Hamza Mughal,

Rishabh Dabral,

Merel C.J. Scholman,

Vera Demberg,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mughal_2025_CVPR, author = {Mughal, M. Hamza and Dabral, Rishabh and Scholman, Merel C.J. and Demberg, Vera and Theobalt, Christian}, title = {Retrieving Semantics from the Deep: an RAG Solution for Gesture Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16578-16588} }
Improving Personalized Search with Regularized Low-Rank Parameter Updates: Fiona Ryan,

Josef Sivic,

Fabian Caba Heilbron,

Judy Hoffman,

James M. Rehg,

Bryan Russell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ryan_2025_CVPR, author = {Ryan, Fiona and Sivic, Josef and Heilbron, Fabian Caba and Hoffman, Judy and Rehg, James M. and Russell, Bryan}, title = {Improving Personalized Search with Regularized Low-Rank Parameter Updates}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19748-19757} }
HyperLoRA: Parameter-Efficient Adaptive Generation for Portrait Synthesis: Mengtian Li,

Jinshu Chen,

Wanquan Feng,

Bingchuan Li,

Fei Dai,

Songtao Zhao,

Qian He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Mengtian and Chen, Jinshu and Feng, Wanquan and Li, Bingchuan and Dai, Fei and Zhao, Songtao and He, Qian}, title = {HyperLoRA: Parameter-Efficient Adaptive Generation for Portrait Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13114-13123} }
EchoMatch: Partial-to-Partial Shape Matching via Correspondence Reflection: Yizheng Xie,

Viktoria Ehm,

Paul Roetzer,

Nafie El Amrani,

Maolin Gao,

Florian Bernard,

Daniel Cremers; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Yizheng and Ehm, Viktoria and Roetzer, Paul and El Amrani, Nafie and Gao, Maolin and Bernard, Florian and Cremers, Daniel}, title = {EchoMatch: Partial-to-Partial Shape Matching via Correspondence Reflection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11665-11675} }
Cross-Modal Interactive Perception Network with Mamba for Lung Tumor Segmentation in PET-CT Images: Jie Mei,

Chenyu Lin,

Yu Qiu,

Yaonan Wang,

Hui Zhang,

Ziyang Wang,

Dong Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mei_2025_CVPR, author = {Mei, Jie and Lin, Chenyu and Qiu, Yu and Wang, Yaonan and Zhang, Hui and Wang, Ziyang and Dai, Dong}, title = {Cross-Modal Interactive Perception Network with Mamba for Lung Tumor Segmentation in PET-CT Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15653-15662} }
SuperPC: A Single Diffusion Model for Point Cloud Completion, Upsampling, Denoising, and Colorization: Yi Du,

Zhipeng Zhao,

Shaoshu Su,

Sharath Golluri,

Haoze Zheng,

Runmao Yao,

Chen Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Yi and Zhao, Zhipeng and Su, Shaoshu and Golluri, Sharath and Zheng, Haoze and Yao, Runmao and Wang, Chen}, title = {SuperPC: A Single Diffusion Model for Point Cloud Completion, Upsampling, Denoising, and Colorization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16953-16964} }
Maintaining Consistent Inter-Class Topology in Continual Test-Time Adaptation: Chenggong Ni,

Fan Lyu,

Jiayao Tan,

Fuyuan Hu,

Rui Yao,

Tao Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Ni_2025_CVPR, author = {Ni, Chenggong and Lyu, Fan and Tan, Jiayao and Hu, Fuyuan and Yao, Rui and Zhou, Tao}, title = {Maintaining Consistent Inter-Class Topology in Continual Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15319-15328} }
Generalized Gaussian Entropy Model for Point Cloud Attribute Compression with Dynamic Likelihood Intervals: Changhao Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Changhao}, title = {Generalized Gaussian Entropy Model for Point Cloud Attribute Compression with Dynamic Likelihood Intervals}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11779-11788} }
Self-Learning Hyperspectral and Multispectral Image Fusion via Adaptive Residual Guided Subspace Diffusion Model: Jian Zhu,

He Wang,

Yang Xu,

Zebin Wu,

Zhihui Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jian and Wang, He and Xu, Yang and Wu, Zebin and Wei, Zhihui}, title = {Self-Learning Hyperspectral and Multispectral Image Fusion via Adaptive Residual Guided Subspace Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17862-17871} }
StickMotion: Generating 3D Human Motions by Drawing a Stickman: Tao Wang,

Zhihua Wu,

Qiaozhi He,

Jiaming Chu,

Ling Qian,

Yu Cheng,

Junliang Xing,

Jian Zhao,

Lei Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Tao and Wu, Zhihua and He, Qiaozhi and Chu, Jiaming and Qian, Ling and Cheng, Yu and Xing, Junliang and Zhao, Jian and Jin, Lei}, title = {StickMotion: Generating 3D Human Motions by Drawing a Stickman}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12370-12379} }
Enduring, Efficient and Robust Trajectory Prediction Attack in Autonomous Driving via Optimization-Driven Multi-Frame Perturbation Framework: Yi Yu,

Weizhen Han,

Libing Wu,

Bingyi Liu,

Enshu Wang,

Zhuangzhuang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Yi and Han, Weizhen and Wu, Libing and Liu, Bingyi and Wang, Enshu and Zhang, Zhuangzhuang}, title = {Enduring, Efficient and Robust Trajectory Prediction Attack in Autonomous Driving via Optimization-Driven Multi-Frame Perturbation Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17229-17238} }
Toward Generalized Image Quality Assessment: Relaxing the Perfect Reference Quality Assumption: Du Chen,

Tianhe Wu,

Kede Ma,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Du and Wu, Tianhe and Ma, Kede and Zhang, Lei}, title = {Toward Generalized Image Quality Assessment: Relaxing the Perfect Reference Quality Assumption}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12742-12752} }
Hazy Low-Quality Satellite Video Restoration Via Learning Optimal Joint Degradation Patterns and Continuous-Scale Super-Resolution Reconstruction: Ning Ni,

Libao Zhang; [pdf]
[bibtex]
@InProceedings{Ni_2025_CVPR, author = {Ni, Ning and Zhang, Libao}, title = {Hazy Low-Quality Satellite Video Restoration Via Learning Optimal Joint Degradation Patterns and Continuous-Scale Super-Resolution Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12690-12699} }
Overcoming Shortcut Problem in VLM for Robust Out-of-Distribution Detection: Zhuo Xu,

Xiang Xiang,

Yifan Liang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Zhuo and Xiang, Xiang and Liang, Yifan}, title = {Overcoming Shortcut Problem in VLM for Robust Out-of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15402-15412} }
RoboSpatial: Teaching Spatial Understanding to 2D and 3D Vision-Language Models for Robotics: Chan Hee Song,

Valts Blukis,

Jonathan Tremblay,

Stephen Tyree,

Yu Su,

Stan Birchfield; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Chan Hee and Blukis, Valts and Tremblay, Jonathan and Tyree, Stephen and Su, Yu and Birchfield, Stan}, title = {RoboSpatial: Teaching Spatial Understanding to 2D and 3D Vision-Language Models for Robotics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15768-15780} }
BG-Triangle: Bezier Gaussian Triangle for 3D Vectorization and Rendering: Minye Wu,

Haizhao Dai,

Kaixin Yao,

Tinne Tuytelaars,

Jingyi Yu; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Minye and Dai, Haizhao and Yao, Kaixin and Tuytelaars, Tinne and Yu, Jingyi}, title = {BG-Triangle: Bezier Gaussian Triangle for 3D Vectorization and Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16197-16207} }
TKG-DM: Training-free Chroma Key Content Generation Diffusion Model: Ryugo Morita,

Stanislav Frolov,

Brian Bernhard Moser,

Takahiro Shirakawa,

Ko Watanabe,

Andreas Dengel,

Jinjia Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Morita_2025_CVPR, author = {Morita, Ryugo and Frolov, Stanislav and Moser, Brian Bernhard and Shirakawa, Takahiro and Watanabe, Ko and Dengel, Andreas and Zhou, Jinjia}, title = {TKG-DM: Training-free Chroma Key Content Generation Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13031-13040} }
Lift3D Policy: Lifting 2D Foundation Models for Robust 3D Robotic Manipulation: Yueru Jia,

Jiaming Liu,

Sixiang Chen,

Chenyang Gu,

Zhilve Wang,

Longzan Luo,

Xiaoqi Li,

Pengwei Wang,

Zhongyuan Wang,

Renrui Zhang,

Shanghang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2025_CVPR, author = {Jia, Yueru and Liu, Jiaming and Chen, Sixiang and Gu, Chenyang and Wang, Zhilve and Luo, Longzan and Li, Xiaoqi and Wang, Pengwei and Wang, Zhongyuan and Zhang, Renrui and Zhang, Shanghang}, title = {Lift3D Policy: Lifting 2D Foundation Models for Robust 3D Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17347-17358} }
Multi-View Pose-Agnostic Change Localization with Zero Labels: Chamuditha Jayanga Galappaththige,

Jason Lai,

Lloyd Windrim,

Donald Dansereau,

Niko Sunderhauf,

Dimity Miller; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Galappaththige_2025_CVPR, author = {Galappaththige, Chamuditha Jayanga and Lai, Jason and Windrim, Lloyd and Dansereau, Donald and Sunderhauf, Niko and Miller, Dimity}, title = {Multi-View Pose-Agnostic Change Localization with Zero Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11600-11610} }
Accelerating Diffusion Transformer via Increment-Calibrated Caching with Channel-Aware Singular Value Decomposition: Zhiyuan Chen,

Keyi Li,

Yifan Jia,

Le Ye,

Yufei Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zhiyuan and Li, Keyi and Jia, Yifan and Ye, Le and Ma, Yufei}, title = {Accelerating Diffusion Transformer via Increment-Calibrated Caching with Channel-Aware Singular Value Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18011-18020} }
A Simple yet Effective Layout Token in Large Language Models for Document Understanding: Zhaoqing Zhu,

Chuwei Luo,

Zirui Shao,

Feiyu Gao,

Hangdi Xing,

Qi Zheng,

Ji Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Zhaoqing and Luo, Chuwei and Shao, Zirui and Gao, Feiyu and Xing, Hangdi and Zheng, Qi and Zhang, Ji}, title = {A Simple yet Effective Layout Token in Large Language Models for Document Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14472-14482} }
Reconstruction vs. Generation: Taming Optimization Dilemma in Latent Diffusion Models: Jingfeng Yao,

Bin Yang,

Xinggang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_CVPR, author = {Yao, Jingfeng and Yang, Bin and Wang, Xinggang}, title = {Reconstruction vs. Generation: Taming Optimization Dilemma in Latent Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15703-15712} }
MEAT: Multiview Diffusion Model for Human Generation on Megapixels with Mesh Attention: Yuhan Wang,

Fangzhou Hong,

Shuai Yang,

Liming Jiang,

Wayne Wu,

Chen Change Loy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuhan and Hong, Fangzhou and Yang, Shuai and Jiang, Liming and Wu, Wayne and Loy, Chen Change}, title = {MEAT: Multiview Diffusion Model for Human Generation on Megapixels with Mesh Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11297-11306} }
Free Lunch Enhancements for Multi-modal Crowd Counting: Haoliang Meng,

Xiaopeng Hong,

Zhengqin Lai,

Miao Shang; [pdf] [supp]
[bibtex]
@InProceedings{Meng_2025_CVPR, author = {Meng, Haoliang and Hong, Xiaopeng and Lai, Zhengqin and Shang, Miao}, title = {Free Lunch Enhancements for Multi-modal Crowd Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14013-14023} }
EVolSplat: Efficient Volume-based Gaussian Splatting for Urban View Synthesis: Sheng Miao,

Jiaxin Huang,

Dongfeng Bai,

Xu Yan,

Hongyu Zhou,

Yue Wang,

Bingbing Liu,

Andreas Geiger,

Yiyi Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miao_2025_CVPR, author = {Miao, Sheng and Huang, Jiaxin and Bai, Dongfeng and Yan, Xu and Zhou, Hongyu and Wang, Yue and Liu, Bingbing and Geiger, Andreas and Liao, Yiyi}, title = {EVolSplat: Efficient Volume-based Gaussian Splatting for Urban View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11286-11296} }
PIDSR: Complementary Polarized Image Demosaicing and Super-Resolution: Shuangfan Zhou,

Chu Zhou,

Youwei Lyu,

Heng Guo,

Zhanyu Ma,

Boxin Shi,

Imari Sato; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Shuangfan and Zhou, Chu and Lyu, Youwei and Guo, Heng and Ma, Zhanyu and Shi, Boxin and Sato, Imari}, title = {PIDSR: Complementary Polarized Image Demosaicing and Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16081-16090} }
MegaSynth: Scaling Up 3D Scene Reconstruction with Synthesized Data: Hanwen Jiang,

Zexiang Xu,

Desai Xie,

Ziwen Chen,

Haian Jin,

Fujun Luan,

Zhixin Shu,

Kai Zhang,

Sai Bi,

Xin Sun,

Jiuxiang Gu,

Qixing Huang,

Georgios Pavlakos,

Hao Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Hanwen and Xu, Zexiang and Xie, Desai and Chen, Ziwen and Jin, Haian and Luan, Fujun and Shu, Zhixin and Zhang, Kai and Bi, Sai and Sun, Xin and Gu, Jiuxiang and Huang, Qixing and Pavlakos, Georgios and Tan, Hao}, title = {MegaSynth: Scaling Up 3D Scene Reconstruction with Synthesized Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16441-16452} }
HandOS: 3D Hand Reconstruction in One Stage: Xingyu Chen,

Zhuheng Song,

Xiaoke Jiang,

Yaoqing Hu,

Junzhi Yu,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xingyu and Song, Zhuheng and Jiang, Xiaoke and Hu, Yaoqing and Yu, Junzhi and Zhang, Lei}, title = {HandOS: 3D Hand Reconstruction in One Stage}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17304-17314} }
All-Day Multi-Camera Multi-Target Tracking: Huijie Fan,

Yu Qiao,

Yihao Zhen,

Tinghui Zhao,

Baojie Fan,

Qiang Wang; [pdf]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Huijie and Qiao, Yu and Zhen, Yihao and Zhao, Tinghui and Fan, Baojie and Wang, Qiang}, title = {All-Day Multi-Camera Multi-Target Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16892-16901} }
EnergyMoGen: Compositional Human Motion Generation with Energy-Based Diffusion Model in Latent Space: Jianrong Zhang,

Hehe Fan,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jianrong and Fan, Hehe and Yang, Yi}, title = {EnergyMoGen: Compositional Human Motion Generation with Energy-Based Diffusion Model in Latent Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17592-17602} }
StarVector: Generating Scalable Vector Graphics Code from Images and Text: Juan A. Rodriguez,

Abhay Puri,

Shubham Agarwal,

Issam H. Laradji,

Pau Rodriguez,

Sai Rajeswar,

David Vazquez,

Christopher Pal,

Marco Pedersoli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rodriguez_2025_CVPR, author = {Rodriguez, Juan A. and Puri, Abhay and Agarwal, Shubham and Laradji, Issam H. and Rodriguez, Pau and Rajeswar, Sai and Vazquez, David and Pal, Christopher and Pedersoli, Marco}, title = {StarVector: Generating Scalable Vector Graphics Code from Images and Text}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16175-16186} }
Explaining in Diffusion: Explaining a Classifier with Diffusion Semantics: Tahira Kazimi,

Ritika Allada,

Pinar Yanardag; [pdf] [supp]
[bibtex]
@InProceedings{Kazimi_2025_CVPR, author = {Kazimi, Tahira and Allada, Ritika and Yanardag, Pinar}, title = {Explaining in Diffusion: Explaining a Classifier with Diffusion Semantics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14799-14809} }
Attention Distillation: A Unified Approach to Visual Characteristics Transfer: Yang Zhou,

Xu Gao,

Zichong Chen,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yang and Gao, Xu and Chen, Zichong and Huang, Hui}, title = {Attention Distillation: A Unified Approach to Visual Characteristics Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18270-18280} }
From Words to Structured Visuals: A Benchmark and Framework for Text-to-Diagram Generation and Editing: Jingxuan Wei,

Cheng Tan,

Qi Chen,

Gaowei Wu,

Siyuan Li,

Zhangyang Gao,

Linzhuang Sun,

Bihui Yu,

Ruifeng Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Jingxuan and Tan, Cheng and Chen, Qi and Wu, Gaowei and Li, Siyuan and Gao, Zhangyang and Sun, Linzhuang and Yu, Bihui and Guo, Ruifeng}, title = {From Words to Structured Visuals: A Benchmark and Framework for Text-to-Diagram Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13315-13325} }
DreamRelation: Bridging Customization and Relation Generation: Qingyu Shi,

Lu Qi,

Jianzong Wu,

Jinbin Bai,

Jingbo Wang,

Yunhai Tong,

Xiangtai Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Qingyu and Qi, Lu and Wu, Jianzong and Bai, Jinbin and Wang, Jingbo and Tong, Yunhai and Li, Xiangtai}, title = {DreamRelation: Bridging Customization and Relation Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15723-15732} }
Depth-Guided Bundle Sampling for Efficient Generalizable Neural Radiance Field Reconstruction: Li Fang,

Hao Zhu,

Longlong Chen,

Fei Hu,

Long Ye,

Zhan Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Li and Zhu, Hao and Chen, Longlong and Hu, Fei and Ye, Long and Ma, Zhan}, title = {Depth-Guided Bundle Sampling for Efficient Generalizable Neural Radiance Field Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11217-11226} }
TinyFusion: Diffusion Transformers Learned Shallow: Gongfan Fang,

Kunjun Li,

Xinyin Ma,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Gongfan and Li, Kunjun and Ma, Xinyin and Wang, Xinchao}, title = {TinyFusion: Diffusion Transformers Learned Shallow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18144-18154} }
SVG-IR: Spatially-Varying Gaussian Splatting for Inverse Rendering: Hanxiao Sun,

Yupeng Gao,

Jin Xie,

Jian Yang,

Beibei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Hanxiao and Gao, Yupeng and Xie, Jin and Yang, Jian and Wang, Beibei}, title = {SVG-IR: Spatially-Varying Gaussian Splatting for Inverse Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16143-16152} }
Scaling Mesh Generation via Compressive Tokenization: Haohan Weng,

Zibo Zhao,

Biwen Lei,

Xianghui Yang,

Jian Liu,

Zeqiang Lai,

Zhuo Chen,

Yuhong Liu,

Jie Jiang,

Chunchao Guo,

Tong Zhang,

Shenghua Gao,

C.L. Philip Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Weng_2025_CVPR, author = {Weng, Haohan and Zhao, Zibo and Lei, Biwen and Yang, Xianghui and Liu, Jian and Lai, Zeqiang and Chen, Zhuo and Liu, Yuhong and Jiang, Jie and Guo, Chunchao and Zhang, Tong and Gao, Shenghua and Chen, C.L. Philip}, title = {Scaling Mesh Generation via Compressive Tokenization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11093-11103} }
Towards Optimizing Large-Scale Multi-Graph Matching in Bioimaging: Max Kahl,

Sebastian Stricker,

Lisa Hutschenreiter,

Florian Bernard,

Carsten Rother,

Bogdan Savchynskyy; [pdf] [supp]
[bibtex]
@InProceedings{Kahl_2025_CVPR, author = {Kahl, Max and Stricker, Sebastian and Hutschenreiter, Lisa and Bernard, Florian and Rother, Carsten and Savchynskyy, Bogdan}, title = {Towards Optimizing Large-Scale Multi-Graph Matching in Bioimaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11569-11578} }
PS-Diffusion: Photorealistic Subject-Driven Image Editing with Disentangled Control and Attention: Weicheng Wang,

Guoli Jia,

Zhongqi Zhang,

Liang Lin,

Jufeng Yang; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Weicheng and Jia, Guoli and Zhang, Zhongqi and Lin, Liang and Yang, Jufeng}, title = {PS-Diffusion: Photorealistic Subject-Driven Image Editing with Disentangled Control and Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18302-18312} }
Exploring Visual Vulnerabilities via Multi-Loss Adversarial Search for Jailbreaking Vision-Language Models: Shuyang Hao,

Bryan Hooi,

Jun Liu,

Kai-Wei Chang,

Zi Huang,

Yujun Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hao_2025_CVPR, author = {Hao, Shuyang and Hooi, Bryan and Liu, Jun and Chang, Kai-Wei and Huang, Zi and Cai, Yujun}, title = {Exploring Visual Vulnerabilities via Multi-Loss Adversarial Search for Jailbreaking Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19890-19899} }
Few-shot Implicit Function Generation via Equivariance: Suizhi Huang,

Xingyi Yang,

Hongtao Lu,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Suizhi and Yang, Xingyi and Lu, Hongtao and Wang, Xinchao}, title = {Few-shot Implicit Function Generation via Equivariance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16262-16272} }
vesselFM: A Foundation Model for Universal 3D Blood Vessel Segmentation: Bastian Wittmann,

Yannick Wattenberg,

Tamaz Amiranashvili,

Suprosanna Shit,

Bjoern Menze; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wittmann_2025_CVPR, author = {Wittmann, Bastian and Wattenberg, Yannick and Amiranashvili, Tamaz and Shit, Suprosanna and Menze, Bjoern}, title = {vesselFM: A Foundation Model for Universal 3D Blood Vessel Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20874-20884} }
Classifier-Free Guidance Inside the Attraction Basin May Cause Memorization: Anubhav Jain,

Yuya Kobayashi,

Takashi Shibuya,

Yuhta Takida,

Nasir Memon,

Julian Togelius,

Yuki Mitsufuji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jain_2025_CVPR, author = {Jain, Anubhav and Kobayashi, Yuya and Shibuya, Takashi and Takida, Yuhta and Memon, Nasir and Togelius, Julian and Mitsufuji, Yuki}, title = {Classifier-Free Guidance Inside the Attraction Basin May Cause Memorization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12871-12879} }
Magma: A Foundation Model for Multimodal AI Agents: Jianwei Yang,

Reuben Tan,

Qianhui Wu,

Ruijie Zheng,

Baolin Peng,

Yongyuan Liang,

Yu Gu,

Mu Cai,

Seonghyeon Ye,

Joel Jang,

Yuquan Deng,

Jianfeng Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jianwei and Tan, Reuben and Wu, Qianhui and Zheng, Ruijie and Peng, Baolin and Liang, Yongyuan and Gu, Yu and Cai, Mu and Ye, Seonghyeon and Jang, Joel and Deng, Yuquan and Gao, Jianfeng}, title = {Magma: A Foundation Model for Multimodal AI Agents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14203-14214} }
Volume Tells: Dual Cycle-Consistent Diffusion for 3D Fluorescence Microscopy De-noising and Super-Resolution: Zelin Li,

Chenwei Wang,

Zhaoke Huang,

Yiming Ma,

Cunming Zhao,

Zhongying Zhao,

Hong Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zelin and Wang, Chenwei and Huang, Zhaoke and Ma, Yiming and Zhao, Cunming and Zhao, Zhongying and Yan, Hong}, title = {Volume Tells: Dual Cycle-Consistent Diffusion for 3D Fluorescence Microscopy De-noising and Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16091-16100} }
Matrix3D: Large Photogrammetry Model All-in-One: Yuanxun Lu,

Jingyang Zhang,

Tian Fang,

Jean-Daniel Nahmias,

Yanghai Tsin,

Long Quan,

Xun Cao,

Yao Yao,

Shiwei Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Yuanxun and Zhang, Jingyang and Fang, Tian and Nahmias, Jean-Daniel and Tsin, Yanghai and Quan, Long and Cao, Xun and Yao, Yao and Li, Shiwei}, title = {Matrix3D: Large Photogrammetry Model All-in-One}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11250-11263} }
3DEnhancer: Consistent Multi-View Diffusion for 3D Enhancement: Yihang Luo,

Shangchen Zhou,

Yushi Lan,

Xingang Pan,

Chen Change Loy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Yihang and Zhou, Shangchen and Lan, Yushi and Pan, Xingang and Loy, Chen Change}, title = {3DEnhancer: Consistent Multi-View Diffusion for 3D Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16430-16440} }
Investigating the Role of Weight Decay in Enhancing Nonconvex SGD: Tao Sun,

Yuhao Huang,

Li Shen,

Kele Xu,

Bao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Tao and Huang, Yuhao and Shen, Li and Xu, Kele and Wang, Bao}, title = {Investigating the Role of Weight Decay in Enhancing Nonconvex SGD}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15287-15296} }
MarkushGrapher: Joint Visual and Textual Recognition of Markush Structures: Lucas Morin,

Valery Weber,

Ahmed Nassar,

Gerhard Ingmar Meijer,

Luc Van Gool,

Yawei Li,

Peter Staar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Morin_2025_CVPR, author = {Morin, Lucas and Weber, Valery and Nassar, Ahmed and Meijer, Gerhard Ingmar and Van Gool, Luc and Li, Yawei and Staar, Peter}, title = {MarkushGrapher: Joint Visual and Textual Recognition of Markush Structures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14505-14515} }
Detecting Backdoor Attacks in Federated Learning via Direction Alignment Inspection: Jiahao Xu,

Zikai Zhang,

Rui Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Jiahao and Zhang, Zikai and Hu, Rui}, title = {Detecting Backdoor Attacks in Federated Learning via Direction Alignment Inspection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20654-20664} }
BlockDance: Reuse Structurally Similar Spatio-Temporal Features to Accelerate Diffusion Transformers: Hui Zhang,

Tingwei Gao,

Jie Shao,

Zuxuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Hui and Gao, Tingwei and Shao, Jie and Wu, Zuxuan}, title = {BlockDance: Reuse Structurally Similar Spatio-Temporal Features to Accelerate Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12891-12900} }
Mamba-Adaptor: State Space Model Adaptor for Visual Recognition: Fei Xie,

Jiahao Nie,

Yujin Tang,

Wenkang Zhang,

Hongshen Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Fei and Nie, Jiahao and Tang, Yujin and Zhang, Wenkang and Zhao, Hongshen}, title = {Mamba-Adaptor: State Space Model Adaptor for Visual Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20124-20134} }
Robust Message Embedding via Attention Flow-Based Steganography: Huayuan Ye,

Shenzhuo Zhang,

Shiqi Jiang,

Jing Liao,

Shuhang Gu,

Dejun Zheng,

Changbo Wang,

Chenhui Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Huayuan and Zhang, Shenzhuo and Jiang, Shiqi and Liao, Jing and Gu, Shuhang and Zheng, Dejun and Wang, Changbo and Li, Chenhui}, title = {Robust Message Embedding via Attention Flow-Based Steganography}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12840-12849} }
Compositional Targeted Multi-Label Universal Perturbations: Hassan Mahmood,

Ehsan Elhamifar; [pdf] [supp]
[bibtex]
@InProceedings{Mahmood_2025_CVPR, author = {Mahmood, Hassan and Elhamifar, Ehsan}, title = {Compositional Targeted Multi-Label Universal Perturbations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20580-20591} }
PatchGuard: Adversarially Robust Anomaly Detection and Localization through Vision Transformers and Pseudo Anomalies: Mojtaba Nafez,

Amirhossein Koochakian,

Arad Maleki,

Jafar Habibi,

Mohammad Hossein Rohban; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nafez_2025_CVPR, author = {Nafez, Mojtaba and Koochakian, Amirhossein and Maleki, Arad and Habibi, Jafar and Rohban, Mohammad Hossein}, title = {PatchGuard: Adversarially Robust Anomaly Detection and Localization through Vision Transformers and Pseudo Anomalies}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20383-20394} }
Neural Video Compression with Context Modulation: Chuanbo Tang,

Zhuoyuan Li,

Yifan Bian,

Li Li,

Dong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Chuanbo and Li, Zhuoyuan and Bian, Yifan and Li, Li and Liu, Dong}, title = {Neural Video Compression with Context Modulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12553-12563} }
On-Device Self-Supervised Learning of Low-Latency Monocular Depth from Only Events: Jesse J. Hagenaars,

Yilun Wu,

Federico Paredes-Valles,

Stein Stroobants,

Guido C.H.E. de Croon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hagenaars_2025_CVPR, author = {Hagenaars, Jesse J. and Wu, Yilun and Paredes-Valles, Federico and Stroobants, Stein and de Croon, Guido C.H.E.}, title = {On-Device Self-Supervised Learning of Low-Latency Monocular Depth from Only Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17114-17123} }
Learning with Noisy Triplet Correspondence for Composed Image Retrieval: Shuxian Li,

Changhao He,

Xiting Liu,

Joey Tianyi Zhou,

Xi Peng,

Peng Hu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Shuxian and He, Changhao and Liu, Xiting and Zhou, Joey Tianyi and Peng, Xi and Hu, Peng}, title = {Learning with Noisy Triplet Correspondence for Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19628-19637} }
Parallelized Autoregressive Visual Generation: Yuqing Wang,

Shuhuai Ren,

Zhijie Lin,

Yujin Han,

Haoyuan Guo,

Zhenheng Yang,

Difan Zou,

Jiashi Feng,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuqing and Ren, Shuhuai and Lin, Zhijie and Han, Yujin and Guo, Haoyuan and Yang, Zhenheng and Zou, Difan and Feng, Jiashi and Liu, Xihui}, title = {Parallelized Autoregressive Visual Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12955-12965} }
CGMatch: A Different Perspective of Semi-supervised Learning: Bo Cheng,

Jueqing Lu,

Yuan Tian,

Haifeng Zhao,

Yi Chang,

Lan Du; [pdf] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Bo and Lu, Jueqing and Tian, Yuan and Zhao, Haifeng and Chang, Yi and Du, Lan}, title = {CGMatch: A Different Perspective of Semi-supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15381-15391} }
FIction: 4D Future Interaction Prediction from Video: Kumar Ashutosh,

Georgios Pavlakos,

Kristen Grauman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ashutosh_2025_CVPR, author = {Ashutosh, Kumar and Pavlakos, Georgios and Grauman, Kristen}, title = {FIction: 4D Future Interaction Prediction from Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17613-17625} }
D^2iT: Dynamic Diffusion Transformer for Accurate Image Generation: Weinan Jia,

Mengqi Huang,

Nan Chen,

Lei Zhang,

Zhendong Mao; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2025_CVPR, author = {Jia, Weinan and Huang, Mengqi and Chen, Nan and Zhang, Lei and Mao, Zhendong}, title = {D{\textasciicircum}2iT: Dynamic Diffusion Transformer for Accurate Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12860-12870} }
AniDoc: Animation Creation Made Easier: Yihao Meng,

Hao Ouyang,

Hanlin Wang,

Qiuyu Wang,

Wen Wang,

Ka Leong Cheng,

Zhiheng Liu,

Yujun Shen,

Huamin Qu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2025_CVPR, author = {Meng, Yihao and Ouyang, Hao and Wang, Hanlin and Wang, Qiuyu and Wang, Wen and Cheng, Ka Leong and Liu, Zhiheng and Shen, Yujun and Qu, Huamin}, title = {AniDoc: Animation Creation Made Easier}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18187-18197} }
LiSu: A Dataset and Method for LiDAR Surface Normal Estimation: Dušan Malić,

Christian Fruhwirth-Reisinger,

Samuel Schulter,

Horst Possegger; [pdf] [supp]
[bibtex]
@InProceedings{Malic_2025_CVPR, author = {Mali\'c, Du\v{s}an and Fruhwirth-Reisinger, Christian and Schulter, Samuel and Possegger, Horst}, title = {LiSu: A Dataset and Method for LiDAR Surface Normal Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17039-17049} }
Spk2SRImgNet: Super-Resolve Dynamic Scene from Spike Stream via Motion Aligned Collaborative Filtering: Yuanlin Wang,

Yiyang Zhang,

Ruiqin Xiong,

Jing Zhao,

Jian Zhang,

Xiaopeng Fan,

Tiejun Huang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuanlin and Zhang, Yiyang and Xiong, Ruiqin and Zhao, Jing and Zhang, Jian and Fan, Xiaopeng and Huang, Tiejun}, title = {Spk2SRImgNet: Super-Resolve Dynamic Scene from Spike Stream via Motion Aligned Collaborative Filtering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11416-11426} }
VideoGLaMM : A Large Multimodal Model for Pixel-Level Visual Grounding in Videos: Shehan Munasinghe,

Hanan Gani,

Wenqi Zhu,

Jiale Cao,

Eric Xing,

Fahad Shahbaz Khan,

Salman Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Munasinghe_2025_CVPR, author = {Munasinghe, Shehan and Gani, Hanan and Zhu, Wenqi and Cao, Jiale and Xing, Eric and Khan, Fahad Shahbaz and Khan, Salman}, title = {VideoGLaMM : A Large Multimodal Model for Pixel-Level Visual Grounding in Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19036-19046} }
ZeroGrasp: Zero-Shot Shape Reconstruction Enabled Robotic Grasping: Shun Iwase,

Muhammad Zubair Irshad,

Katherine Liu,

Vitor Guizilini,

Robert Lee,

Takuya Ikeda,

Ayako Amma,

Koichi Nishiwaki,

Kris Kitani,

Rares Ambrus,

Sergey Zakharov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Iwase_2025_CVPR, author = {Iwase, Shun and Irshad, Muhammad Zubair and Liu, Katherine and Guizilini, Vitor and Lee, Robert and Ikeda, Takuya and Amma, Ayako and Nishiwaki, Koichi and Kitani, Kris and Ambrus, Rares and Zakharov, Sergey}, title = {ZeroGrasp: Zero-Shot Shape Reconstruction Enabled Robotic Grasping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17405-17415} }
PDFactor: Learning Tri-Perspective View Policy Diffusion Field for Multi-Task Robotic Manipulation: Jingyi Tian,

Le Wang,

Sanping Zhou,

Sen Wang,

Jiayi Li,

Haowen Sun,

Wei Tang; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Jingyi and Wang, Le and Zhou, Sanping and Wang, Sen and Li, Jiayi and Sun, Haowen and Tang, Wei}, title = {PDFactor: Learning Tri-Perspective View Policy Diffusion Field for Multi-Task Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15757-15767} }
Dense Dispersed Structured Light for Hyperspectral 3D Imaging of Dynamic Scenes: Suhyun Shin,

Seungwoo Yoon,

Ryota Maeda,

Seung-Hwan Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shin_2025_CVPR, author = {Shin, Suhyun and Yoon, Seungwoo and Maeda, Ryota and Baek, Seung-Hwan}, title = {Dense Dispersed Structured Light for Hyperspectral 3D Imaging of Dynamic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16589-16598} }
MM-OR: A Large Multimodal Operating Room Dataset for Semantic Understanding of High-Intensity Surgical Environments: Ege Özsoy,

Chantal Pellegrini,

Tobias Czempiel,

Felix Tristram,

Kun Yuan,

David Bani-Harouni,

Ulrich Eck,

Benjamin Busam,

Matthias Keicher,

Nassir Navab; [pdf] [supp]
[bibtex]
@InProceedings{Ozsoy_2025_CVPR, author = {\"Ozsoy, Ege and Pellegrini, Chantal and Czempiel, Tobias and Tristram, Felix and Yuan, Kun and Bani-Harouni, David and Eck, Ulrich and Busam, Benjamin and Keicher, Matthias and Navab, Nassir}, title = {MM-OR: A Large Multimodal Operating Room Dataset for Semantic Understanding of High-Intensity Surgical Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19378-19389} }
Dora: Sampling and Benchmarking for 3D Shape Variational Auto-Encoders: Rui Chen,

Jianfeng Zhang,

Yixun Liang,

Guan Luo,

Weiyu Li,

Jiarui Liu,

Xiu Li,

Xiaoxiao Long,

Jiashi Feng,

Ping Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Rui and Zhang, Jianfeng and Liang, Yixun and Luo, Guan and Li, Weiyu and Liu, Jiarui and Li, Xiu and Long, Xiaoxiao and Feng, Jiashi and Tan, Ping}, title = {Dora: Sampling and Benchmarking for 3D Shape Variational Auto-Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16251-16261} }
Once-Tuning-Multiple-Variants: Tuning Once and Expanded as Multiple Vision-Language Model Variants: Chong Yu,

Tao Chen,

Zhongxue Gan; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Chong and Chen, Tao and Gan, Zhongxue}, title = {Once-Tuning-Multiple-Variants: Tuning Once and Expanded as Multiple Vision-Language Model Variants}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14712-14722} }
Reconstructing Animals and the Wild: Peter Kulits,

Michael J. Black,

Silvia Zuffi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kulits_2025_CVPR, author = {Kulits, Peter and Black, Michael J. and Zuffi, Silvia}, title = {Reconstructing Animals and the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16565-16577} }
DiffusionDrive: Truncated Diffusion Model for End-to-End Autonomous Driving: Bencheng Liao,

Shaoyu Chen,

Haoran Yin,

Bo Jiang,

Cheng Wang,

Sixu Yan,

Xinbang Zhang,

Xiangyu Li,

Ying Zhang,

Qian Zhang,

Xinggang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Bencheng and Chen, Shaoyu and Yin, Haoran and Jiang, Bo and Wang, Cheng and Yan, Sixu and Zhang, Xinbang and Li, Xiangyu and Zhang, Ying and Zhang, Qian and Wang, Xinggang}, title = {DiffusionDrive: Truncated Diffusion Model for End-to-End Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12037-12047} }
DVHGNN: Multi-Scale Dilated Vision HGNN for Efficient Vision Recognition: Caoshuo Li,

Tanzhe Li,

Xiaobin Hu,

Donghao Luo,

Taisong Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Caoshuo and Li, Tanzhe and Hu, Xiaobin and Luo, Donghao and Jin, Taisong}, title = {DVHGNN: Multi-Scale Dilated Vision HGNN for Efficient Vision Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20158-20168} }
Reconstructing In-the-Wild Open-Vocabulary Human-Object Interactions: Boran Wen,

Dingbang Huang,

Zichen Zhang,

Jiahong Zhou,

Jianbin Deng,

Jingyu Gong,

Yulong Chen,

Lizhuang Ma,

Yong-Lu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2025_CVPR, author = {Wen, Boran and Huang, Dingbang and Zhang, Zichen and Zhou, Jiahong and Deng, Jianbin and Gong, Jingyu and Chen, Yulong and Ma, Lizhuang and Li, Yong-Lu}, title = {Reconstructing In-the-Wild Open-Vocabulary Human-Object Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17426-17436} }
GROVE: A Generalized Reward for Learning Open-Vocabulary Physical Skill: Jieming Cui,

Tengyu Liu,

Ziyu Meng,

Jiale Yu,

Ran Song,

Wei Zhang,

Yixin Zhu,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2025_CVPR, author = {Cui, Jieming and Liu, Tengyu and Meng, Ziyu and Yu, Jiale and Song, Ran and Zhang, Wei and Zhu, Yixin and Huang, Siyuan}, title = {GROVE: A Generalized Reward for Learning Open-Vocabulary Physical Skill}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15781-15790} }
GauSTAR: Gaussian Surface Tracking and Reconstruction: Chengwei Zheng,

Lixin Xue,

Juan Zarate,

Jie Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Chengwei and Xue, Lixin and Zarate, Juan and Song, Jie}, title = {GauSTAR: Gaussian Surface Tracking and Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16543-16553} }
Training-free Dense-Aligned Diffusion Guidance for Modular Conditional Image Synthesis: Zixuan Wang,

Duo Peng,

Feng Chen,

Yuwei Yang,

Yinjie Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zixuan and Peng, Duo and Chen, Feng and Yang, Yuwei and Lei, Yinjie}, title = {Training-free Dense-Aligned Diffusion Guidance for Modular Conditional Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13135-13145} }
TADFormer: Task-Adaptive Dynamic TransFormer for Efficient Multi-Task Learning: Seungmin Baek,

Soyul Lee,

Hayeon Jo,

Hyesong Choi,

Dongbo Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Baek_2025_CVPR, author = {Baek, Seungmin and Lee, Soyul and Jo, Hayeon and Choi, Hyesong and Min, Dongbo}, title = {TADFormer: Task-Adaptive Dynamic TransFormer for Efficient Multi-Task Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14858-14868} }
CamPoint: Boosting Point Cloud Segmentation with Virtual Camera: Jianhui Zhang,

Yizhi Luo,

Zicheng Zhang,

Xuecheng Nie,

Bonan Li; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jianhui and Luo, Yizhi and Zhang, Zicheng and Nie, Xuecheng and Li, Bonan}, title = {CamPoint: Boosting Point Cloud Segmentation with Virtual Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11822-11832} }
MERGE: Multi-faceted Hierarchical Graph-based GNN for Gene Expression Prediction from Whole Slide Histopathology Images: Aniruddha Ganguly,

Debolina Chatterjee,

Wentao Huang,

Jie Zhang,

Alisa Yurovsky,

Travis Steele Johnson,

Chao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ganguly_2025_CVPR, author = {Ganguly, Aniruddha and Chatterjee, Debolina and Huang, Wentao and Zhang, Jie and Yurovsky, Alisa and Johnson, Travis Steele and Chen, Chao}, title = {MERGE: Multi-faceted Hierarchical Graph-based GNN for Gene Expression Prediction from Whole Slide Histopathology Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15611-15620} }
SPA-VL: A Comprehensive Safety Preference Alignment Dataset for Vision Language Models: Yongting Zhang,

Lu Chen,

Guodong Zheng,

Yifeng Gao,

Rui Zheng,

Jinlan Fu,

Zhenfei Yin,

Senjie Jin,

Yu Qiao,

Xuanjing Huang,

Feng Zhao,

Tao Gui,

Jing Shao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yongting and Chen, Lu and Zheng, Guodong and Gao, Yifeng and Zheng, Rui and Fu, Jinlan and Yin, Zhenfei and Jin, Senjie and Qiao, Yu and Huang, Xuanjing and Zhao, Feng and Gui, Tao and Shao, Jing}, title = {SPA-VL: A Comprehensive Safety Preference Alignment Dataset for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19867-19878} }
PTDiffusion: Free Lunch for Generating Optical Illusion Hidden Pictures with Phase-Transferred Diffusion Model: Xiang Gao,

Shuai Yang,

Jiaying Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Xiang and Yang, Shuai and Liu, Jiaying}, title = {PTDiffusion: Free Lunch for Generating Optical Illusion Hidden Pictures with Phase-Transferred Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18240-18249} }
Preserving Clusters in Prompt Learning for Unsupervised Domain Adaptation: Tung-Long Vuong,

Hoang Phan,

Vy Vo,

Anh Bui,

Thanh-Toan Do,

Trung Le,

Dinh Phung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vuong_2025_CVPR, author = {Vuong, Tung-Long and Phan, Hoang and Vo, Vy and Bui, Anh and Do, Thanh-Toan and Le, Trung and Phung, Dinh}, title = {Preserving Clusters in Prompt Learning for Unsupervised Domain Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19974-19984} }
Attend to Not Attended: Structure-then-Detail Token Merging for Post-training DiT Acceleration: Haipeng Fang,

Sheng Tang,

Juan Cao,

Enshuo Zhang,

Fan Tang,

Tong-Yee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Haipeng and Tang, Sheng and Cao, Juan and Zhang, Enshuo and Tang, Fan and Lee, Tong-Yee}, title = {Attend to Not Attended: Structure-then-Detail Token Merging for Post-training DiT Acceleration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18083-18092} }
FlowRAM: Grounding Flow Matching Policy with Region-Aware Mamba Framework for Robotic Manipulation: Sen Wang,

Le Wang,

Sanping Zhou,

Jingyi Tian,

Jiayi Li,

Haowen Sun,

Wei Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Sen and Wang, Le and Zhou, Sanping and Tian, Jingyi and Li, Jiayi and Sun, Haowen and Tang, Wei}, title = {FlowRAM: Grounding Flow Matching Policy with Region-Aware Mamba Framework for Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12176-12186} }
Visual Lexicon: Rich Image Features in Language Space: XuDong Wang,

Xingyi Zhou,

Alireza Fathi,

Trevor Darrell,

Cordelia Schmid; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, XuDong and Zhou, Xingyi and Fathi, Alireza and Darrell, Trevor and Schmid, Cordelia}, title = {Visual Lexicon: Rich Image Features in Language Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19736-19747} }
Test-Time Visual In-Context Tuning: Jiahao Xie,

Alessio Tonioni,

Nathalie Rauschmayr,

Federico Tombari,

Bernt Schiele; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Jiahao and Tonioni, Alessio and Rauschmayr, Nathalie and Tombari, Federico and Schiele, Bernt}, title = {Test-Time Visual In-Context Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19996-20005} }
Prior Does Matter: Visual Navigation via Denoising Diffusion Bridge Models: Hao Ren,

Yiming Zeng,

Zetong Bi,

Zhaoliang Wan,

Junlong Huang,

Hui Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Hao and Zeng, Yiming and Bi, Zetong and Wan, Zhaoliang and Huang, Junlong and Cheng, Hui}, title = {Prior Does Matter: Visual Navigation via Denoising Diffusion Bridge Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12100-12110} }
SegEarth-OV: Towards Training-Free Open-Vocabulary Segmentation for Remote Sensing Images: Kaiyu Li,

Ruixun Liu,

Xiangyong Cao,

Xueru Bai,

Feng Zhou,

Deyu Meng,

Zhi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Kaiyu and Liu, Ruixun and Cao, Xiangyong and Bai, Xueru and Zhou, Feng and Meng, Deyu and Wang, Zhi}, title = {SegEarth-OV: Towards Training-Free Open-Vocabulary Segmentation for Remote Sensing Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10545-10556} }
Do We Really Need Curated Malicious Data for Safety Alignment in Multi-modal Large Language Models?: Yanbo Wang,

Jiyang Guan,

Jian Liang,

Ran He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yanbo and Guan, Jiyang and Liang, Jian and He, Ran}, title = {Do We Really Need Curated Malicious Data for Safety Alignment in Multi-modal Large Language Models?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19879-19889} }
Harnessing Global-Local Collaborative Adversarial Perturbation for Anti-Customization: Long Xu,

Jiakai Wang,

Haojie Hao,

Haotong Qin,

Jiejie Zhao,

Xianglong Liu; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Long and Wang, Jiakai and Hao, Haojie and Qin, Haotong and Zhao, Jiejie and Liu, Xianglong}, title = {Harnessing Global-Local Collaborative Adversarial Perturbation for Anti-Customization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13414-13423} }
Acc3D: Accelerating Single Image to 3D Diffusion Models via Edge Consistency Guided Score Distillation: Kendong Liu,

Zhiyu Zhu,

Hui Liu,

Junhui Hou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Kendong and Zhu, Zhiyu and Liu, Hui and Hou, Junhui}, title = {Acc3D: Accelerating Single Image to 3D Diffusion Models via Edge Consistency Guided Score Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18031-18040} }
Soft Self-labeling and Potts Relaxations for Weakly-supervised Segmentation: Zhongwen Zhang,

Yuri Boykov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhongwen and Boykov, Yuri}, title = {Soft Self-labeling and Potts Relaxations for Weakly-supervised Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20244-20253} }
MVSAnywhere: Zero-Shot Multi-View Stereo: Sergio Izquierdo,

Mohamed Sayed,

Michael Firman,

Guillermo Garcia-Hernando,

Daniyar Turmukhambetov,

Javier Civera,

Oisin Mac Aodha,

Gabriel Brostow,

Jamie Watson; [pdf] [arXiv]
[bibtex]
@InProceedings{Izquierdo_2025_CVPR, author = {Izquierdo, Sergio and Sayed, Mohamed and Firman, Michael and Garcia-Hernando, Guillermo and Turmukhambetov, Daniyar and Civera, Javier and Mac Aodha, Oisin and Brostow, Gabriel and Watson, Jamie}, title = {MVSAnywhere: Zero-Shot Multi-View Stereo}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11493-11504} }
Generating 6DoF Object Manipulation Trajectories from Action Description in Egocentric Vision: Tomoya Yoshida,

Shuhei Kurita,

Taichi Nishimura,

Shinsuke Mori; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoshida_2025_CVPR, author = {Yoshida, Tomoya and Kurita, Shuhei and Nishimura, Taichi and Mori, Shinsuke}, title = {Generating 6DoF Object Manipulation Trajectories from Action Description in Egocentric Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17370-17382} }
BIOMEDICA: An Open Biomedical Image-Caption Archive, Dataset, and Vision-Language Models Derived from Scientific Literature: Alejandro Lozano,

Min Woo Sun,

James Burgess,

Liangyu Chen,

Jeffrey J. Nirschl,

Jeffrey Gu,

Ivan Lopez,

Josiah Aklilu,

Anita Rau,

Austin Wolfgang Katzer,

Yuhui Zhang,

Collin Chiu,

Xiaohan Wang,

Alfred Seunghoon Song,

Robert Tibshirani,

Serena Yeung-Levy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lozano_2025_CVPR, author = {Lozano, Alejandro and Sun, Min Woo and Burgess, James and Chen, Liangyu and Nirschl, Jeffrey J. and Gu, Jeffrey and Lopez, Ivan and Aklilu, Josiah and Rau, Anita and Katzer, Austin Wolfgang and Zhang, Yuhui and Chiu, Collin and Wang, Xiaohan and Song, Alfred Seunghoon and Tibshirani, Robert and Yeung-Levy, Serena}, title = {BIOMEDICA: An Open Biomedical Image-Caption Archive, Dataset, and Vision-Language Models Derived from Scientific Literature}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19724-19735} }
Structure-Aware Correspondence Learning for Relative Pose Estimation: Yihan Chen,

Wenfei Yang,

Huan Ren,

Shifeng Zhang,

Tianzhu Zhang,

Feng Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Yihan and Yang, Wenfei and Ren, Huan and Zhang, Shifeng and Zhang, Tianzhu and Wu, Feng}, title = {Structure-Aware Correspondence Learning for Relative Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11611-11621} }
PyTorchGeoNodes: Enabling Differentiable Shape Programs for 3D Shape Reconstruction: Sinisa Stekovic,

Arslan Artykov,

Stefan Ainetter,

Mattia D'Urso,

Friedrich Fraundorfer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stekovic_2025_CVPR, author = {Stekovic, Sinisa and Artykov, Arslan and Ainetter, Stefan and D'Urso, Mattia and Fraundorfer, Friedrich}, title = {PyTorchGeoNodes: Enabling Differentiable Shape Programs for 3D Shape Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16283-16292} }
FIFA: Fine-grained Inter-frame Attention for Driver's Video Gaze Estimation: Daosong Hu,

Mingyue Cui,

Kai Huang; [pdf]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Daosong and Cui, Mingyue and Huang, Kai}, title = {FIFA: Fine-grained Inter-frame Attention for Driver's Video Gaze Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18760-18769} }
Shape Abstraction via Marching Differentiable Support Functions: Sunkyung Park,

Jeongmin Lee,

Dongjun Lee; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Sunkyung and Lee, Jeongmin and Lee, Dongjun}, title = {Shape Abstraction via Marching Differentiable Support Functions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16902-16911} }
Scaling Down Text Encoders of Text-to-Image Diffusion Models: Lifu Wang,

Daqing Liu,

Xinchen Liu,

Xiaodong He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Lifu and Liu, Daqing and Liu, Xinchen and He, Xiaodong}, title = {Scaling Down Text Encoders of Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18424-18433} }
POT: Prototypical Optimal Transport for Weakly Supervised Semantic Segmentation: Jian Wang,

Tianhong Dai,

Bingfeng Zhang,

Siyue Yu,

Eng Gee Lim,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jian and Dai, Tianhong and Zhang, Bingfeng and Yu, Siyue and Lim, Eng Gee and Xiao, Jimin}, title = {POT: Prototypical Optimal Transport for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15055-15064} }
SKE-Layout: Spatial Knowledge Enhanced Layout Generation with LLMs: Junsheng Wang,

Nieqing Cao,

Yan Ding,

Mengying Xie,

Fuqiang Gu,

Chao Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Junsheng and Cao, Nieqing and Ding, Yan and Xie, Mengying and Gu, Fuqiang and Chen, Chao}, title = {SKE-Layout: Spatial Knowledge Enhanced Layout Generation with LLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19414-19423} }
Gaussian Eigen Models for Human Heads: Wojciech Zielonka,

Timo Bolkart,

Thabo Beeler,

Justus Thies; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zielonka_2025_CVPR, author = {Zielonka, Wojciech and Bolkart, Timo and Beeler, Thabo and Thies, Justus}, title = {Gaussian Eigen Models for Human Heads}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15930-15940} }
4D-Fly: Fast 4D Reconstruction from a Single Monocular Video: Diankun Wu,

Fangfu Liu,

Yi-Hsin Hung,

Yue Qian,

Xiaohang Zhan,

Yueqi Duan; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Diankun and Liu, Fangfu and Hung, Yi-Hsin and Qian, Yue and Zhan, Xiaohang and Duan, Yueqi}, title = {4D-Fly: Fast 4D Reconstruction from a Single Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16663-16673} }
Complementary Advantages: Exploiting Cross-Field Frequency Correlation for NIR-Assisted Image Denoising: Yuchen Wang,

Hongyuan Wang,

Lizhi Wang,

Xin Wang,

Lin Zhu,

Wanxuan Lu,

Hua Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuchen and Wang, Hongyuan and Wang, Lizhi and Wang, Xin and Zhu, Lin and Lu, Wanxuan and Huang, Hua}, title = {Complementary Advantages: Exploiting Cross-Field Frequency Correlation for NIR-Assisted Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12679-12689} }
Eval3D: Interpretable and Fine-grained Evaluation for 3D Generation: Shivam Duggal,

Yushi Hu,

Oscar Michel,

Aniruddha Kembhavi,

William T. Freeman,

Noah A. Smith,

Ranjay Krishna,

Antonio Torralba,

Ali Farhadi,

Wei-Chiu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duggal_2025_CVPR, author = {Duggal, Shivam and Hu, Yushi and Michel, Oscar and Kembhavi, Aniruddha and Freeman, William T. and Smith, Noah A. and Krishna, Ranjay and Torralba, Antonio and Farhadi, Ali and Ma, Wei-Chiu}, title = {Eval3D: Interpretable and Fine-grained Evaluation for 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13326-13336} }
DiffLO: Semantic-Aware LiDAR Odometry with Diffusion-Based Refinement: Yongshu Huang,

Chen Liu,

Minghang Zhu,

Sheng Ao,

Chenglu Wen,

Cheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yongshu and Liu, Chen and Zhu, Minghang and Ao, Sheng and Wen, Chenglu and Wang, Cheng}, title = {DiffLO: Semantic-Aware LiDAR Odometry with Diffusion-Based Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17050-17059} }
Style-Editor: Text-driven Object-centric Style Editing: Jihun Park,

Jongmin Gim,

Kyoungmin Lee,

Seunghun Lee,

Sunghoon Im; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Jihun and Gim, Jongmin and Lee, Kyoungmin and Lee, Seunghun and Im, Sunghoon}, title = {Style-Editor: Text-driven Object-centric Style Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18281-18291} }
Transfer Your Perspective: Controllable 3D Generation from Any Viewpoint in a Driving Scene: Tai-Yu Pan,

Sooyoung Jeon,

Mengdi Fan,

Jinsu Yoo,

Zhenyang Feng,

Mark Campbell,

Kilian Q. Weinberger,

Bharath Hariharan,

Wei-Lun Chao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_CVPR, author = {Pan, Tai-Yu and Jeon, Sooyoung and Fan, Mengdi and Yoo, Jinsu and Feng, Zhenyang and Campbell, Mark and Weinberger, Kilian Q. and Hariharan, Bharath and Chao, Wei-Lun}, title = {Transfer Your Perspective: Controllable 3D Generation from Any Viewpoint in a Driving Scene}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12027-12036} }
FastVLM: Efficient Vision Encoding for Vision Language Models: Pavan Kumar Anasosalu Vasu,

Fartash Faghri,

Chun-Liang Li,

Cem Koc,

Nate True,

Albert Antony,

Gokula Santhanam,

James Gabriel,

Peter Grasch,

Oncel Tuzel,

Hadi Pouransari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vasu_2025_CVPR, author = {Vasu, Pavan Kumar Anasosalu and Faghri, Fartash and Li, Chun-Liang and Koc, Cem and True, Nate and Antony, Albert and Santhanam, Gokula and Gabriel, James and Grasch, Peter and Tuzel, Oncel and Pouransari, Hadi}, title = {FastVLM: Efficient Vision Encoding for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19769-19780} }
VISTA3D: A Unified Segmentation Foundation Model For 3D Medical Imaging: Yufan He,

Pengfei Guo,

Yucheng Tang,

Andriy Myronenko,

Vishwesh Nath,

Ziyue Xu,

Dong Yang,

Can Zhao,

Benjamin Simon,

Mason Belue,

Stephanie Harmon,

Baris Turkbey,

Daguang Xu,

Wenqi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Yufan and Guo, Pengfei and Tang, Yucheng and Myronenko, Andriy and Nath, Vishwesh and Xu, Ziyue and Yang, Dong and Zhao, Can and Simon, Benjamin and Belue, Mason and Harmon, Stephanie and Turkbey, Baris and Xu, Daguang and Li, Wenqi}, title = {VISTA3D: A Unified Segmentation Foundation Model For 3D Medical Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20863-20873} }
S2D-LFE: Sparse-to-Dense Light Field Event Generation: Yutong Liu,

Wenming Weng,

Yueyi Zhang,

Zhiwei Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yutong and Weng, Wenming and Zhang, Yueyi and Xiong, Zhiwei}, title = {S2D-LFE: Sparse-to-Dense Light Field Event Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11207-11216} }
The Art of Deception: Color Visual Illusions and Diffusion Models: Alexandra Gomez-Villa,

Kai Wang,

C.Alejandro Parraga,

Bartłomiej Twardowski,

Jesus Malo,

Javier Vazquez-Corral,

Joost van den Weijer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gomez-Villa_2025_CVPR, author = {Gomez-Villa, Alexandra and Wang, Kai and Parraga, C.Alejandro and Twardowski, Bart{\l}omiej and Malo, Jesus and Vazquez-Corral, Javier and van den Weijer, Joost}, title = {The Art of Deception: Color Visual Illusions and Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18642-18652} }
Progressive Rendering Distillation: Adapting Stable Diffusion for Instant Text-to-Mesh Generation without 3D Data: Zhiyuan Ma,

Xinyue Liang,

Rongyuan Wu,

Xiangyu Zhu,

Zhen Lei,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Zhiyuan and Liang, Xinyue and Wu, Rongyuan and Zhu, Xiangyu and Lei, Zhen and Zhang, Lei}, title = {Progressive Rendering Distillation: Adapting Stable Diffusion for Instant Text-to-Mesh Generation without 3D Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11036-11050} }
Do Computer Vision Foundation Models Learn the Low-level Characteristics of the Human Visual System?: Yancheng Cai,

Fei Yin,

Dounia Hammou,

Rafal Mantiuk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Yancheng and Yin, Fei and Hammou, Dounia and Mantiuk, Rafal}, title = {Do Computer Vision Foundation Models Learn the Low-level Characteristics of the Human Visual System?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20039-20048} }
Online Task-Free Continual Learning via Dynamic Expansionable Memory Distribution: Fei Ye,

Adrian G. Bors; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Fei and Bors, Adrian G.}, title = {Online Task-Free Continual Learning via Dynamic Expansionable Memory Distribution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20512-20522} }
Rethinking Token Reduction with Parameter-Efficient Fine-Tuning in ViT for Pixel-Level Tasks: Cheng Lei,

Ao Li,

Hu Yao,

Ce Zhu,

Le Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Lei_2025_CVPR, author = {Lei, Cheng and Li, Ao and Yao, Hu and Zhu, Ce and Zhang, Le}, title = {Rethinking Token Reduction with Parameter-Efficient Fine-Tuning in ViT for Pixel-Level Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14954-14964} }
SVDC: Consistent Direct Time-of-Flight Video Depth Completion with Frequency Selective Fusion: Xuan Zhu,

Jijun Xiang,

Xianqi Wang,

Longliang Liu,

Yu Wang,

Hong Zhang,

Fei Guo,

Xin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Xuan and Xiang, Jijun and Wang, Xianqi and Liu, Longliang and Wang, Yu and Zhang, Hong and Guo, Fei and Yang, Xin}, title = {SVDC: Consistent Direct Time-of-Flight Video Depth Completion with Frequency Selective Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16619-16628} }
Rethinking Training for De-biasing Text-to-Image Generation: Unlocking the Potential of Stable Diffusion: Eunji Kim,

Siwon Kim,

Minjun Park,

Rahim Entezari,

Sungroh Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Eunji and Kim, Siwon and Park, Minjun and Entezari, Rahim and Yoon, Sungroh}, title = {Rethinking Training for De-biasing Text-to-Image Generation: Unlocking the Potential of Stable Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13361-13370} }
Instant3dit: Multiview Inpainting for Fast Editing of 3D Objects: Amir Barda,

Matheus Gadelha,

Vladimir G. Kim,

Noam Aigerman,

Amit H. Bermano,

Thibault Groueix; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Barda_2025_CVPR, author = {Barda, Amir and Gadelha, Matheus and Kim, Vladimir G. and Aigerman, Noam and Bermano, Amit H. and Groueix, Thibault}, title = {Instant3dit: Multiview Inpainting for Fast Editing of 3D Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16273-16282} }
STDD: Spatio-Temporal Dual Diffusion for Video Generation: Shuaizhen Yao,

Xiaoya Zhang,

Xin Liu,

Mengyi Liu,

Zhen Cui; [pdf] [supp]
[bibtex]
@InProceedings{Yao_2025_CVPR, author = {Yao, Shuaizhen and Zhang, Xiaoya and Liu, Xin and Liu, Mengyi and Cui, Zhen}, title = {STDD: Spatio-Temporal Dual Diffusion for Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12575-12584} }
Implicit Correspondence Learning for Image-to-Point Cloud Registration: Xinjun Li,

Wenfei Yang,

Jiacheng Deng,

Zhixin Cheng,

Xu Zhou,

Tianzhu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xinjun and Yang, Wenfei and Deng, Jiacheng and Cheng, Zhixin and Zhou, Xu and Zhang, Tianzhu}, title = {Implicit Correspondence Learning for Image-to-Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16922-16931} }
ILIAS: Instance-Level Image retrieval At Scale: Giorgos Kordopatis-Zilos,

Vladan Stojnić,

Anna Manko,

Pavel Suma,

Nikolaos-Antonios Ypsilantis,

Nikos Efthymiadis,

Zakaria Laskar,

Jiri Matas,

Ondrej Chum,

Giorgos Tolias; [pdf] [supp]
[bibtex]
@InProceedings{Kordopatis-Zilos_2025_CVPR, author = {Kordopatis-Zilos, Giorgos and Stojni\'c, Vladan and Manko, Anna and Suma, Pavel and Ypsilantis, Nikolaos-Antonios and Efthymiadis, Nikos and Laskar, Zakaria and Matas, Jiri and Chum, Ondrej and Tolias, Giorgos}, title = {ILIAS: Instance-Level Image retrieval At Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14777-14787} }
GeoDepth: From Point-to-Depth to Plane-to-Depth Modeling for Self-Supervised Monocular Depth Estimation: Haifeng Wu,

Shuhang Gu,

Lixin Duan,

Wen Li; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Haifeng and Gu, Shuhang and Duan, Lixin and Li, Wen}, title = {GeoDepth: From Point-to-Depth to Plane-to-Depth Modeling for Self-Supervised Monocular Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11525-11535} }
SSHNet: Unsupervised Cross-modal Homography Estimation via Problem Reformulation and Split Optimization: Junchen Yu,

Si-Yuan Cao,

Runmin Zhang,

Chenghao Zhang,

Zhu Yu,

Shujie Chen,

Bailin Yang,

Hui-Liang Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Junchen and Cao, Si-Yuan and Zhang, Runmin and Zhang, Chenghao and Yu, Zhu and Chen, Shujie and Yang, Bailin and Shen, Hui-Liang}, title = {SSHNet: Unsupervised Cross-modal Homography Estimation via Problem Reformulation and Split Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16685-16694} }
USP-Gaussian: Unifying Spike-based Image Reconstruction, Pose Correction and Gaussian Splatting: Kang Chen,

Jiyuan Zhang,

Zecheng Hao,

Yajing Zheng,

Tiejun Huang,

Zhaofei Yu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Kang and Zhang, Jiyuan and Hao, Zecheng and Zheng, Yajing and Huang, Tiejun and Yu, Zhaofei}, title = {USP-Gaussian: Unifying Spike-based Image Reconstruction, Pose Correction and Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16609-16618} }
Holmes-VAU: Towards Long-term Video Anomaly Understanding at Any Granularity: Huaxin Zhang,

Xiaohao Xu,

Xiang Wang,

Jialong Zuo,

Xiaonan Huang,

Changxin Gao,

Shanjun Zhang,

Li Yu,

Nong Sang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Huaxin and Xu, Xiaohao and Wang, Xiang and Zuo, Jialong and Huang, Xiaonan and Gao, Changxin and Zhang, Shanjun and Yu, Li and Sang, Nong}, title = {Holmes-VAU: Towards Long-term Video Anomaly Understanding at Any Granularity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13843-13853} }
QuartDepth: Post-Training Quantization for Real-Time Depth Estimation on the Edge: Xuan Shen,

Weize Ma,

Jing Liu,

Changdi Yang,

Rui Ding,

Quanyi Wang,

Henghui Ding,

Wei Niu,

Yanzhi Wang,

Pu Zhao,

Jun Lin,

Jiuxiang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Xuan and Ma, Weize and Liu, Jing and Yang, Changdi and Ding, Rui and Wang, Quanyi and Ding, Henghui and Niu, Wei and Wang, Yanzhi and Zhao, Pu and Lin, Jun and Gu, Jiuxiang}, title = {QuartDepth: Post-Training Quantization for Real-Time Depth Estimation on the Edge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11448-11460} }
ReWind: Understanding Long Videos with Instructed Learnable Memory: Anxhelo Diko,

Tinghuai Wang,

Wassim Swaileh,

Shiyan Sun,

Ioannis Patras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Diko_2025_CVPR, author = {Diko, Anxhelo and Wang, Tinghuai and Swaileh, Wassim and Sun, Shiyan and Patras, Ioannis}, title = {ReWind: Understanding Long Videos with Instructed Learnable Memory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13734-13743} }
DirectTriGS: Triplane-based Gaussian Splatting Field Representation for 3D Generation: Xiaoliang Ju,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ju_2025_CVPR, author = {Ju, Xiaoliang and Li, Hongsheng}, title = {DirectTriGS: Triplane-based Gaussian Splatting Field Representation for 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16229-16239} }
Make-It-Animatable: An Efficient Framework for Authoring Animation-Ready 3D Characters: Zhiyang Guo,

Jinxu Xiang,

Kai Ma,

Wengang Zhou,

Houqiang Li,

Ran Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Zhiyang and Xiang, Jinxu and Ma, Kai and Zhou, Wengang and Li, Houqiang and Zhang, Ran}, title = {Make-It-Animatable: An Efficient Framework for Authoring Animation-Ready 3D Characters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10783-10792} }
Subspace Constraint and Contribution Estimation for Heterogeneous Federated Learning: Xiangtao Zhang,

Sheng Li,

Ao Li,

Yipeng Liu,

Fan Zhang,

Ce Zhu,

Le Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xiangtao and Li, Sheng and Li, Ao and Liu, Yipeng and Zhang, Fan and Zhu, Ce and Zhang, Le}, title = {Subspace Constraint and Contribution Estimation for Heterogeneous Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20632-20642} }
NeRFPrior: Learning Neural Radiance Field as a Prior for Indoor Scene Reconstruction: Wenyuan Zhang,

Emily Yue-ting Jia,

Junsheng Zhou,

Baorui Ma,

Kanle Shi,

Yu-Shen Liu,

Zhizhong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Wenyuan and Jia, Emily Yue-ting and Zhou, Junsheng and Ma, Baorui and Shi, Kanle and Liu, Yu-Shen and Han, Zhizhong}, title = {NeRFPrior: Learning Neural Radiance Field as a Prior for Indoor Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11317-11327} }
Towards Training-free Anomaly Detection with Vision and Language Foundation Models: Jinjin Zhang,

Guodong Wang,

Yizhou Jin,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinjin and Wang, Guodong and Jin, Yizhou and Huang, Di}, title = {Towards Training-free Anomaly Detection with Vision and Language Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15204-15213} }
Dynamic Content Prediction with Motion-aware Priors for Blind Face Video Restoration: Lianxin Xie,

Bingbing Zheng,

Si Wu,

Hau San Wong; [pdf]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Lianxin and Zheng, Bingbing and Wu, Si and Wong, Hau San}, title = {Dynamic Content Prediction with Motion-aware Priors for Blind Face Video Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17821-17830} }
Exploring Sparse MoE in GANs for Text-conditioned Image Synthesis: Jiapeng Zhu,

Ceyuan Yang,

Kecheng Zheng,

Yinghao Xu,

Zifan Shi,

Yifei Zhang,

Qifeng Chen,

Yujun Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jiapeng and Yang, Ceyuan and Zheng, Kecheng and Xu, Yinghao and Shi, Zifan and Zhang, Yifei and Chen, Qifeng and Shen, Yujun}, title = {Exploring Sparse MoE in GANs for Text-conditioned Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18411-18423} }
Efficient Event-Based Object Detection: A Hybrid Neural Network with Spatial and Temporal Attention: Soikat Hasan Ahmed,

Jan Finkbeiner,

Emre Neftci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ahmed_2025_CVPR, author = {Ahmed, Soikat Hasan and Finkbeiner, Jan and Neftci, Emre}, title = {Efficient Event-Based Object Detection: A Hybrid Neural Network with Spatial and Temporal Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13970-13979} }
HUNet: Homotopy Unfolding Network for Image Compressive Sensing: Feiyang Shen,

Hongping Gan; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Feiyang and Gan, Hongping}, title = {HUNet: Homotopy Unfolding Network for Image Compressive Sensing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12799-12808} }
See Further When Clear: Curriculum Consistency Model: Yunpeng Liu,

Boxiao Liu,

Yi Zhang,

Xingzhong Hou,

Guanglu Song,

Yu Liu,

Haihang You; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yunpeng and Liu, Boxiao and Zhang, Yi and Hou, Xingzhong and Song, Guanglu and Liu, Yu and You, Haihang}, title = {See Further When Clear: Curriculum Consistency Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18103-18112} }
PassionSR: Post-Training Quantization with Adaptive Scale in One-Step Diffusion based Image Super-Resolution: Libo Zhu,

Jianze Li,

Haotong Qin,

Wenbo Li,

Yulun Zhang,

Yong Guo,

Xiaokang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Libo and Li, Jianze and Qin, Haotong and Li, Wenbo and Zhang, Yulun and Guo, Yong and Yang, Xiaokang}, title = {PassionSR: Post-Training Quantization with Adaptive Scale in One-Step Diffusion based Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12778-12788} }
RainyGS: Efficient Rain Synthesis with Physically-Based Gaussian Splatting: Qiyu Dai,

Xingyu Ni,

Qianfan Shen,

Wenzheng Chen,

Baoquan Chen,

Mengyu Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Qiyu and Ni, Xingyu and Shen, Qianfan and Chen, Wenzheng and Chen, Baoquan and Chu, Mengyu}, title = {RainyGS: Efficient Rain Synthesis with Physically-Based Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16153-16162} }
Three-view Focal Length Recovery From Homographies: Yaqing Ding,

Viktor Kocur,

Zuzana Berger Haladova,

Qianliang Wu,

Shen Cai,

Jian Yang,

Zuzana Kukelova; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2025_CVPR, author = {Ding, Yaqing and Kocur, Viktor and Haladova, Zuzana Berger and Wu, Qianliang and Cai, Shen and Yang, Jian and Kukelova, Zuzana}, title = {Three-view Focal Length Recovery From Homographies}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11505-11514} }
RAP: Retrieval-Augmented Personalization for Multimodal Large Language Models: Haoran Hao,

Jiaming Han,

Changsheng Li,

Yu-Feng Li,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hao_2025_CVPR, author = {Hao, Haoran and Han, Jiaming and Li, Changsheng and Li, Yu-Feng and Yue, Xiangyu}, title = {RAP: Retrieval-Augmented Personalization for Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14538-14548} }
Distilling Spectral Graph for Object-Context Aware Open-Vocabulary Semantic Segmentation: Chanyoung Kim,

Dayun Ju,

Woojung Han,

Ming-Hsuan Yang,

Seong Jae Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Chanyoung and Ju, Dayun and Han, Woojung and Yang, Ming-Hsuan and Hwang, Seong Jae}, title = {Distilling Spectral Graph for Object-Context Aware Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15033-15042} }
Stereo4D: Learning How Things Move in 3D from Internet Stereo Videos: Linyi Jin,

Richard Tucker,

Zhengqi Li,

David Fouhey,

Noah Snavely,

Aleksander Holynski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Linyi and Tucker, Richard and Li, Zhengqi and Fouhey, David and Snavely, Noah and Holynski, Aleksander}, title = {Stereo4D: Learning How Things Move in 3D from Internet Stereo Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10497-10509} }
FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation: Kefan Chen,

Chaerin Min,

Linguang Zhang,

Shreyas Hampali,

Cem Keskin,

Srinath Sridhar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Kefan and Min, Chaerin and Zhang, Linguang and Hampali, Shreyas and Keskin, Cem and Sridhar, Srinath}, title = {FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17448-17460} }
InterDyn: Controllable Interactive Dynamics with Video Diffusion Models: Rick Akkerman,

Haiwen Feng,

Michael J. Black,

Dimitrios Tzionas,

Victoria Fernández Abrevaya; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Akkerman_2025_CVPR, author = {Akkerman, Rick and Feng, Haiwen and Black, Michael J. and Tzionas, Dimitrios and Abrevaya, Victoria Fern\'andez}, title = {InterDyn: Controllable Interactive Dynamics with Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12467-12479} }
LLMDet: Learning Strong Open-Vocabulary Object Detectors under the Supervision of Large Language Models: Shenghao Fu,

Qize Yang,

Qijie Mo,

Junkai Yan,

Xihan Wei,

Jingke Meng,

Xiaohua Xie,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Shenghao and Yang, Qize and Mo, Qijie and Yan, Junkai and Wei, Xihan and Meng, Jingke and Xie, Xiaohua and Zheng, Wei-Shi}, title = {LLMDet: Learning Strong Open-Vocabulary Object Detectors under the Supervision of Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14987-14997} }
MagicQuill: An Intelligent Interactive Image Editing System: Zichen Liu,

Yue Yu,

Hao Ouyang,

Qiuyu Wang,

Ka Leong Cheng,

Wen Wang,

Zhiheng Liu,

Qifeng Chen,

Yujun Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zichen and Yu, Yue and Ouyang, Hao and Wang, Qiuyu and Cheng, Ka Leong and Wang, Wen and Liu, Zhiheng and Chen, Qifeng and Shen, Yujun}, title = {MagicQuill: An Intelligent Interactive Image Editing System}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13072-13082} }
Open-Vocabulary Functional 3D Scene Graphs for Real-World Indoor Spaces: Chenyangguang Zhang,

Alexandros Delitzas,

Fangjinhua Wang,

Ruida Zhang,

Xiangyang Ji,

Marc Pollefeys,

Francis Engelmann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chenyangguang and Delitzas, Alexandros and Wang, Fangjinhua and Zhang, Ruida and Ji, Xiangyang and Pollefeys, Marc and Engelmann, Francis}, title = {Open-Vocabulary Functional 3D Scene Graphs for Real-World Indoor Spaces}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19401-19413} }
Boosting Adversarial Transferability through Augmentation in Hypothesis Space: Yu Guo,

Weiquan Liu,

Qingshan Xu,

Shijun Zheng,

Shujun Huang,

Yu Zang,

Siqi Shen,

Chenglu Wen,

Cheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Yu and Liu, Weiquan and Xu, Qingshan and Zheng, Shijun and Huang, Shujun and Zang, Yu and Shen, Siqi and Wen, Chenglu and Wang, Cheng}, title = {Boosting Adversarial Transferability through Augmentation in Hypothesis Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19175-19185} }
ViiNeuS: Volumetric Initialization for Implicit Neural Surface Reconstruction of Urban Scenes with Limited Image Overlap: Hala Djeghim,

Nathan Piasco,

Moussab Bennehar,

Luis Roldao,

Dzmitry Tsishkou,

Désiré Sidibé; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Djeghim_2025_CVPR, author = {Djeghim, Hala and Piasco, Nathan and Bennehar, Moussab and Roldao, Luis and Tsishkou, Dzmitry and Sidib\'e, D\'esir\'e}, title = {ViiNeuS: Volumetric Initialization for Implicit Neural Surface Reconstruction of Urban Scenes with Limited Image Overlap}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11854-11863} }
Model Diagnosis and Correction via Linguistic and Implicit Attribute Editing: Xuanbai Chen,

Xiang Xu,

Zhihua Li,

Tianchen Zhao,

Pietro Perona,

Qin Zhang,

Yifan Xing; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xuanbai and Xu, Xiang and Li, Zhihua and Zhao, Tianchen and Perona, Pietro and Zhang, Qin and Xing, Yifan}, title = {Model Diagnosis and Correction via Linguistic and Implicit Attribute Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14281-14292} }
UniPhy: Learning a Unified Constitutive Model for Inverse Physics Simulation: Himangi Mittal,

Peiye Zhuang,

Hsin-Ying Lee,

Shubham Tulsiani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mittal_2025_CVPR, author = {Mittal, Himangi and Zhuang, Peiye and Lee, Hsin-Ying and Tulsiani, Shubham}, title = {UniPhy: Learning a Unified Constitutive Model for Inverse Physics Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16208-16218} }
STAA-SNN: Spatial-Temporal Attention Aggregator for Spiking Neural Networks: Tianqing Zhang,

Kairong Yu,

Xian Zhong,

Hongwei Wang,

Qi Xu,

Qiang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Tianqing and Yu, Kairong and Zhong, Xian and Wang, Hongwei and Xu, Qi and Zhang, Qiang}, title = {STAA-SNN: Spatial-Temporal Attention Aggregator for Spiking Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13959-13969} }
Knowledge Memorization and Rumination for Pre-trained Model-based Class-Incremental Learning: Zijian Gao,

Wangwang Jia,

Xingxing Zhang,

Dulan Zhou,

Kele Xu,

Feng Dawei,

Yong Dou,

Xinjun Mao,

Huaimin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Zijian and Jia, Wangwang and Zhang, Xingxing and Zhou, Dulan and Xu, Kele and Dawei, Feng and Dou, Yong and Mao, Xinjun and Wang, Huaimin}, title = {Knowledge Memorization and Rumination for Pre-trained Model-based Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20523-20533} }
Video-ColBERT: Contextualized Late Interaction for Text-to-Video Retrieval: Arun Reddy,

Alexander Martin,

Eugene Yang,

Andrew Yates,

Kate Sanders,

Kenton Murray,

Reno Kriz,

Celso M. de Melo,

Benjamin Van Durme,

Rama Chellappa; [pdf] [supp]
[bibtex]
@InProceedings{Reddy_2025_CVPR, author = {Reddy, Arun and Martin, Alexander and Yang, Eugene and Yates, Andrew and Sanders, Kate and Murray, Kenton and Kriz, Reno and de Melo, Celso M. and Van Durme, Benjamin and Chellappa, Rama}, title = {Video-ColBERT: Contextualized Late Interaction for Text-to-Video Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19691-19701} }
Visual and Semantic Prompt Collaboration for Generalized Zero-Shot Learning: Huajie Jiang,

Zhengxian Li,

Xiaohan Yu,

Yongli Hu,

Baocai Yin,

Jian Yang,

Yuankai Qi; [pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Huajie and Li, Zhengxian and Yu, Xiaohan and Hu, Yongli and Yin, Baocai and Yang, Jian and Qi, Yuankai}, title = {Visual and Semantic Prompt Collaboration for Generalized Zero-Shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20275-20285} }
VidMuse: A Simple Video-to-Music Generation Framework with Long-Short-Term Modeling: Zeyue Tian,

Zhaoyang Liu,

Ruibin Yuan,

Jiahao Pan,

Qifeng Liu,

Xu Tan,

Qifeng Chen,

Wei Xue,

Yike Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Zeyue and Liu, Zhaoyang and Yuan, Ruibin and Pan, Jiahao and Liu, Qifeng and Tan, Xu and Chen, Qifeng and Xue, Wei and Guo, Yike}, title = {VidMuse: A Simple Video-to-Music Generation Framework with Long-Short-Term Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18782-18793} }
Human-centered Interactive Learning via MLLMs for Text-to-Image Person Re-identification: Yang Qin,

Chao Chen,

Zhihang Fu,

Dezhong Peng,

Xi Peng,

Peng Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Yang and Chen, Chao and Fu, Zhihang and Peng, Dezhong and Peng, Xi and Hu, Peng}, title = {Human-centered Interactive Learning via MLLMs for Text-to-Image Person Re-identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14390-14399} }
Exposure-slot: Exposure-centric Representations Learning with Slot-in-Slot Attention for Region-aware Exposure Correction: Donggoo Jung,

Daehyun Kim,

Guanghui Wang,

Tae Hyun Kim; [pdf] [supp]
[bibtex]
@InProceedings{Jung_2025_CVPR, author = {Jung, Donggoo and Kim, Daehyun and Wang, Guanghui and Kim, Tae Hyun}, title = {Exposure-slot: Exposure-centric Representations Learning with Slot-in-Slot Attention for Region-aware Exposure Correction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17892-17901} }
EdgeDiff: Edge-aware Diffusion Network for Building Reconstruction from Point Clouds: Yujun Liu,

Ruisheng Wang,

Shangfeng Huang,

Guorong Cai; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yujun and Wang, Ruisheng and Huang, Shangfeng and Cai, Guorong}, title = {EdgeDiff: Edge-aware Diffusion Network for Building Reconstruction from Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17008-17018} }
DeNVeR: Deformable Neural Vessel Representations for Unsupervised Video Vessel Segmentation: Chun-Hung Wu,

Shih-Hong Chen,

Chih-Yao Hu,

Hsin-Yu Wu,

Kai-Hsin Chen,

Yu-You Chen,

Chih-Hai Su,

Chih-Kuo Lee,

Yu-Lun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Chun-Hung and Chen, Shih-Hong and Hu, Chih-Yao and Wu, Hsin-Yu and Chen, Kai-Hsin and Chen, Yu-You and Su, Chih-Hai and Lee, Chih-Kuo and Liu, Yu-Lun}, title = {DeNVeR: Deformable Neural Vessel Representations for Unsupervised Video Vessel Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15682-15692} }
Task-Aware Clustering for Prompting Vision-Language Models: Fusheng Hao,

Fengxiang He,

Fuxiang Wu,

Tichao Wang,

Chengqun Song,

Jun Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Hao_2025_CVPR, author = {Hao, Fusheng and He, Fengxiang and Wu, Fuxiang and Wang, Tichao and Song, Chengqun and Cheng, Jun}, title = {Task-Aware Clustering for Prompting Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14745-14755} }
FSboard: Over 3 Million Characters of ASL Fingerspelling Collected via Smartphones: Manfred Georg,

Garrett Tanzer,

Esha Uboweja,

Saad Hassan,

Maximus Shengelia,

Sam Sepah,

Sean Forbes,

Thad Starner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Georg_2025_CVPR, author = {Georg, Manfred and Tanzer, Garrett and Uboweja, Esha and Hassan, Saad and Shengelia, Maximus and Sepah, Sam and Forbes, Sean and Starner, Thad}, title = {FSboard: Over 3 Million Characters of ASL Fingerspelling Collected via Smartphones}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13897-13906} }
Light Transport-aware Diffusion Posterior Sampling for Single-View Reconstruction of 3D Volumes: Ludwic Leonard,

Nils Thurey,

Rüdiger Westermann; [pdf] [supp]
[bibtex]
@InProceedings{Leonard_2025_CVPR, author = {Leonard, Ludwic and Thurey, Nils and Westermann, R\"udiger}, title = {Light Transport-aware Diffusion Posterior Sampling for Single-View Reconstruction of 3D Volumes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16163-16174} }
STiL: Semi-supervised Tabular-Image Learning for Comprehensive Task-Relevant Information Exploration in Multimodal Classification: Siyi Du,

Xinzhe Luo,

Declan P. O'Regan,

Chen Qin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Siyi and Luo, Xinzhe and O'Regan, Declan P. and Qin, Chen}, title = {STiL: Semi-supervised Tabular-Image Learning for Comprehensive Task-Relevant Information Exploration in Multimodal Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15549-15559} }
Auto Cherry-Picker: Learning from High-quality Generative Data Driven by Language: Yicheng Chen,

Xiangtai Li,

Yining Li,

Yanhong Zeng,

Jianzong Wu,

Xiangyu Zhao,

Kai Chen; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Yicheng and Li, Xiangtai and Li, Yining and Zeng, Yanhong and Wu, Jianzong and Zhao, Xiangyu and Chen, Kai}, title = {Auto Cherry-Picker: Learning from High-quality Generative Data Driven by Language}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19952-19962} }
ReRAW: RGB-to-RAW Image Reconstruction via Stratified Sampling for Efficient Object Detection on the Edge: Radu Berdan,

Beril Besbinar,

Christoph Reinders,

Junji Otsuka,

Daisuke Iso; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Berdan_2025_CVPR, author = {Berdan, Radu and Besbinar, Beril and Reinders, Christoph and Otsuka, Junji and Iso, Daisuke}, title = {ReRAW: RGB-to-RAW Image Reconstruction via Stratified Sampling for Efficient Object Detection on the Edge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11833-11843} }
HunyuanPortrait: Implicit Condition Control for Enhanced Portrait Animation: Zunnan Xu,

Zhentao Yu,

Zixiang Zhou,

Jun Zhou,

Xiaoyu Jin,

Fa-ting Hong,

Xiaozhong Ji,

Junwei Zhu,

Chengfei Cai,

Shiyu Tang,

Qin Lin,

Xiu Li,

Qinglin Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Zunnan and Yu, Zhentao and Zhou, Zixiang and Zhou, Jun and Jin, Xiaoyu and Hong, Fa-ting and Ji, Xiaozhong and Zhu, Junwei and Cai, Chengfei and Tang, Shiyu and Lin, Qin and Li, Xiu and Lu, Qinglin}, title = {HunyuanPortrait: Implicit Condition Control for Enhanced Portrait Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15909-15919} }
Zero-shot 3D Question Answering via Voxel-based Dynamic Token Compression: Hsiang-Wei Huang,

Fu-Chen Chen,

Wenhao Chai,

Che-Chun Su,

Lu Xia,

Sanghun Jung,

Cheng-Yen Yang,

Jenq-Neng Hwang,

Min Sun,

Cheng-Hao Kuo; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Hsiang-Wei and Chen, Fu-Chen and Chai, Wenhao and Su, Che-Chun and Xia, Lu and Jung, Sanghun and Yang, Cheng-Yen and Hwang, Jenq-Neng and Sun, Min and Kuo, Cheng-Hao}, title = {Zero-shot 3D Question Answering via Voxel-based Dynamic Token Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19424-19434} }
Enhanced then Progressive Fusion with View Graph for Multi-View Clustering: Zhibin Dong,

Meng Liu,

Siwei Wang,

Ke Liang,

Yi Zhang,

Suyuan Liu,

Jiaqi Jin,

Xinwang Liu,

En Zhu; [pdf]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Zhibin and Liu, Meng and Wang, Siwei and Liang, Ke and Zhang, Yi and Liu, Suyuan and Jin, Jiaqi and Liu, Xinwang and Zhu, En}, title = {Enhanced then Progressive Fusion with View Graph for Multi-View Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15518-15527} }
Correcting Deviations from Normality: A Reformulated Diffusion Model for Multi-Class Unsupervised Anomaly Detection: Farzad Beizaee,

Gregory A. Lodygensky,

Christian Desrosiers,

Jose Dolz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Beizaee_2025_CVPR, author = {Beizaee, Farzad and Lodygensky, Gregory A. and Desrosiers, Christian and Dolz, Jose}, title = {Correcting Deviations from Normality: A Reformulated Diffusion Model for Multi-Class Unsupervised Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19088-19097} }
Continuous 3D Perception Model with Persistent State: Qianqian Wang,

Yifei Zhang,

Aleksander Holynski,

Alexei A. Efros,

Angjoo Kanazawa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Qianqian and Zhang, Yifei and Holynski, Aleksander and Efros, Alexei A. and Kanazawa, Angjoo}, title = {Continuous 3D Perception Model with Persistent State}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10510-10522} }
LP-Diff: Towards Improved Restoration of Real-World Degraded License Plate: Haoyan Gong,

Zhenrong Zhang,

Yuzheng Feng,

Anh Nguyen,

Hongbin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2025_CVPR, author = {Gong, Haoyan and Zhang, Zhenrong and Feng, Yuzheng and Nguyen, Anh and Liu, Hongbin}, title = {LP-Diff: Towards Improved Restoration of Real-World Degraded License Plate}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17831-17840} }
FilmComposer: LLM-Driven Music Production for Silent Film Clips: Zhifeng Xie,

Qile He,

Youjia Zhu,

Qiwei He,

Mengtian Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Zhifeng and He, Qile and Zhu, Youjia and He, Qiwei and Li, Mengtian}, title = {FilmComposer: LLM-Driven Music Production for Silent Film Clips}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13519-13528} }
EventPSR: Surface Normal and Reflectance Estimation from Photometric Stereo Using an Event Camera: Bohan Yu,

Jin Han,

Boxin Shi,

Imari Sato; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Bohan and Han, Jin and Shi, Boxin and Sato, Imari}, title = {EventPSR: Surface Normal and Reflectance Estimation from Photometric Stereo Using an Event Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11427-11436} }
CASP: Consistency-aware Audio-induced Saliency Prediction Model for Omnidirectional Video: Zhaolin Wan,

Han Qin,

Zhiyang Li,

Xiaopeng Fan,

Wangmeng Zuo,

Debin Zhao; [pdf]
[bibtex]
@InProceedings{Wan_2025_CVPR, author = {Wan, Zhaolin and Qin, Han and Li, Zhiyang and Fan, Xiaopeng and Zuo, Wangmeng and Zhao, Debin}, title = {CASP: Consistency-aware Audio-induced Saliency Prediction Model for Omnidirectional Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12605-12614} }
Gazing at Rewards: Eye Movements as a Lens into Human and AI Decision-Making in Hybrid Visual Foraging: Bo Wang,

Dingwei Tan,

Yen-Ling Kuo,

Zhaowei Sun,

Jeremy M. Wolfe,

Tat-Jen Cham,

Mengmi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Bo and Tan, Dingwei and Kuo, Yen-Ling and Sun, Zhaowei and Wolfe, Jeremy M. and Cham, Tat-Jen and Zhang, Mengmi}, title = {Gazing at Rewards: Eye Movements as a Lens into Human and AI Decision-Making in Hybrid Visual Foraging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14810-14823} }
FOCUS: Knowledge-enhanced Adaptive Visual Compression for Few-shot Whole Slide Image Classification: Zhengrui Guo,

Conghao Xiong,

Jiabo Ma,

Qichen Sun,

Lishuang Feng,

Jinzhuo Wang,

Hao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Zhengrui and Xiong, Conghao and Ma, Jiabo and Sun, Qichen and Feng, Lishuang and Wang, Jinzhuo and Chen, Hao}, title = {FOCUS: Knowledge-enhanced Adaptive Visual Compression for Few-shot Whole Slide Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15590-15600} }
GRAE-3DMOT: Geometry Relation-Aware Encoder for Online 3D Multi-Object Tracking: Hyunseop Kim,

Hyo-Jun Lee,

Yonguk Lee,

Jinu Lee,

Hanul Kim,

Yeong Jun Koh; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Hyunseop and Lee, Hyo-Jun and Lee, Yonguk and Lee, Jinu and Kim, Hanul and Koh, Yeong Jun}, title = {GRAE-3DMOT: Geometry Relation-Aware Encoder for Online 3D Multi-Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11697-11706} }
Automatic Joint Structured Pruning and Quantization for Efficient Neural Network Training and Compression: Xiaoyi Qu,

David Aponte,

Colby Banbury,

Daniel P. Robinson,

Tianyu Ding,

Kazuhito Koishida,

Ilya Zharkov,

Tianyi Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Xiaoyi and Aponte, David and Banbury, Colby and Robinson, Daniel P. and Ding, Tianyu and Koishida, Kazuhito and Zharkov, Ilya and Chen, Tianyi}, title = {Automatic Joint Structured Pruning and Quantization for Efficient Neural Network Training and Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15234-15244} }
MAR-3D: Progressive Masked Auto-regressor for High-Resolution 3D Generation: Jinnan Chen,

Lingting Zhu,

Zeyu Hu,

Shengju Qian,

Yugang Chen,

Xin Wang,

Gim Hee Lee; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Jinnan and Zhu, Lingting and Hu, Zeyu and Qian, Shengju and Chen, Yugang and Wang, Xin and Lee, Gim Hee}, title = {MAR-3D: Progressive Masked Auto-regressor for High-Resolution 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11083-11092} }
Synthetic Prior for Few-Shot Drivable Head Avatar Inversion: Wojciech Zielonka,

Stephan J. Garbin,

Alexandros Lattas,

George Kopanas,

Paulo Gotardo,

Thabo Beeler,

Justus Thies,

Timo Bolkart; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zielonka_2025_CVPR, author = {Zielonka, Wojciech and Garbin, Stephan J. and Lattas, Alexandros and Kopanas, George and Gotardo, Paulo and Beeler, Thabo and Thies, Justus and Bolkart, Timo}, title = {Synthetic Prior for Few-Shot Drivable Head Avatar Inversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10735-10746} }
Reasoning in Visual Navigation of End-to-end Trained Agents: A Dynamical Systems Approach: Steeven Janny,

Hervé Poirier,

Leonid Antsfeld,

Guillaume Bono,

Gianluca Monaci,

Boris Chidlovskii,

Francesco Giuliari,

Alessio Del Bue,

Christian Wolf; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Janny_2025_CVPR, author = {Janny, Steeven and Poirier, Herv\'e and Antsfeld, Leonid and Bono, Guillaume and Monaci, Gianluca and Chidlovskii, Boris and Giuliari, Francesco and Del Bue, Alessio and Wolf, Christian}, title = {Reasoning in Visual Navigation of End-to-end Trained Agents: A Dynamical Systems Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12111-12121} }
DFormerv2: Geometry Self-Attention for RGBD Semantic Segmentation: Bo-Wen Yin,

Jiao-Long Cao,

Ming-Ming Cheng,

Qibin Hou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Bo-Wen and Cao, Jiao-Long and Cheng, Ming-Ming and Hou, Qibin}, title = {DFormerv2: Geometry Self-Attention for RGBD Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19345-19355} }
GroomLight: Hybrid Inverse Rendering for Relightable Human Hair Appearance Modeling: Yang Zheng,

Menglei Chai,

Delio Vicini,

Yuxiao Zhou,

Yinghao Xu,

Leonidas Guibas,

Gordon Wetzstein,

Thabo Beeler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Yang and Chai, Menglei and Vicini, Delio and Zhou, Yuxiao and Xu, Yinghao and Guibas, Leonidas and Wetzstein, Gordon and Beeler, Thabo}, title = {GroomLight: Hybrid Inverse Rendering for Relightable Human Hair Appearance Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16040-16050} }
Sea-ing in Low-light: Nisha Varghese,

A. N. Rajagopalan; [pdf] [supp]
[bibtex]
@InProceedings{Varghese_2025_CVPR, author = {Varghese, Nisha and Rajagopalan, A. N.}, title = {Sea-ing in Low-light}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16629-16640} }
Generative Modeling of Class Probability for Multi-Modal Representation Learning: JungKyoo Shin,

Bumsoo Kim,

Eunwoo Kim; [pdf] [arXiv]
[bibtex]
@InProceedings{Shin_2025_CVPR, author = {Shin, JungKyoo and Kim, Bumsoo and Kim, Eunwoo}, title = {Generative Modeling of Class Probability for Multi-Modal Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20737-20746} }
VisionZip: Longer is Better but Not Necessary in Vision Language Models: Senqiao Yang,

Yukang Chen,

Zhuotao Tian,

Chengyao Wang,

Jingyao Li,

Bei Yu,

Jiaya Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Senqiao and Chen, Yukang and Tian, Zhuotao and Wang, Chengyao and Li, Jingyao and Yu, Bei and Jia, Jiaya}, title = {VisionZip: Longer is Better but Not Necessary in Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19792-19802} }
BlenderGym: Benchmarking Foundational Model Systems for Graphics Editing: Yunqi Gu,

Ian Huang,

Jihyeon Je,

Guandao Yang,

Leonidas Guibas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Yunqi and Huang, Ian and Je, Jihyeon and Yang, Guandao and Guibas, Leonidas}, title = {BlenderGym: Benchmarking Foundational Model Systems for Graphics Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18574-18583} }
VoteFlow: Enforcing Local Rigidity in Self-Supervised Scene Flow: Yancong Lin,

Shiming Wang,

Liangliang Nan,

Julian Kooij,

Holger Caesar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Yancong and Wang, Shiming and Nan, Liangliang and Kooij, Julian and Caesar, Holger}, title = {VoteFlow: Enforcing Local Rigidity in Self-Supervised Scene Flow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17155-17164} }
Uncertainty Weighted Gradients for Model Calibration: Jinxu Lin,

Linwei Tao,

Minjing Dong,

Chang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Jinxu and Tao, Linwei and Dong, Minjing and Xu, Chang}, title = {Uncertainty Weighted Gradients for Model Calibration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15497-15507} }
FFaceNeRF: Few-shot Face Editing in Neural Radiance Fields: Kwan Yun,

Chaelin Kim,

Hangyeul Shin,

Junyong Noh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yun_2025_CVPR, author = {Yun, Kwan and Kim, Chaelin and Shin, Hangyeul and Noh, Junyong}, title = {FFaceNeRF: Few-shot Face Editing in Neural Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10825-10835} }
Minimizing Labeled, Maximizing Unlabeled: An Image-Driven Approach for Video Instance Segmentation: Fangyun Wei,

Jinjing Zhao,

Kun Yan,

Chang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Fangyun and Zhao, Jinjing and Yan, Kun and Xu, Chang}, title = {Minimizing Labeled, Maximizing Unlabeled: An Image-Driven Approach for Video Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19304-19314} }
Layer- and Timestep-Adaptive Differentiable Token Compression Ratios for Efficient Diffusion Transformers: Haoran You,

Connelly Barnes,

Yuqian Zhou,

Yan Kang,

Zhenbang Du,

Wei Zhou,

Lingzhi Zhang,

Yotam Nitzan,

Xiaoyang Liu,

Zhe Lin,

Eli Shechtman,

Sohrab Amirghodsi,

Yingyan Celine Lin; [pdf] [supp]
[bibtex]
@InProceedings{You_2025_CVPR, author = {You, Haoran and Barnes, Connelly and Zhou, Yuqian and Kang, Yan and Du, Zhenbang and Zhou, Wei and Zhang, Lingzhi and Nitzan, Yotam and Liu, Xiaoyang and Lin, Zhe and Shechtman, Eli and Amirghodsi, Sohrab and Lin, Yingyan Celine}, title = {Layer- and Timestep-Adaptive Differentiable Token Compression Ratios for Efficient Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18072-18082} }
Zero-shot RGB-D Point Cloud Registration with Pre-trained Large Vision Model: Haobo Jiang,

Jin Xie,

Jian Yang,

Liang Yu,

Jianmin Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Haobo and Xie, Jin and Yang, Jian and Yu, Liang and Zheng, Jianmin}, title = {Zero-shot RGB-D Point Cloud Registration with Pre-trained Large Vision Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16943-16952} }
DistinctAD: Distinctive Audio Description Generation in Contexts: Bo Fang,

Wenhao Wu,

Qiangqiang Wu,

Yuxin Song,

Antoni B. Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Bo and Wu, Wenhao and Wu, Qiangqiang and Song, Yuxin and Chan, Antoni B.}, title = {DistinctAD: Distinctive Audio Description Generation in Contexts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13571-13581} }
CL-MoE: Enhancing Multimodal Large Language Model with Dual Momentum Mixture-of-Experts for Continual Visual Question Answering: Tianyu Huai,

Jie Zhou,

Xingjiao Wu,

Qin Chen,

Qingchun Bai,

Ze Zhou,

Liang He; [pdf] [supp]
[bibtex]
@InProceedings{Huai_2025_CVPR, author = {Huai, Tianyu and Zhou, Jie and Wu, Xingjiao and Chen, Qin and Bai, Qingchun and Zhou, Ze and He, Liang}, title = {CL-MoE: Enhancing Multimodal Large Language Model with Dual Momentum Mixture-of-Experts for Continual Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19608-19617} }
Point Cloud Upsampling Using Conditional Diffusion Module with Adaptive Noise Suppression: Boqian Zhang,

Shen Yang,

Hao Chen,

Chao Yang,

Jing Jia,

Guang Jiang; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Boqian and Yang, Shen and Chen, Hao and Yang, Chao and Jia, Jing and Jiang, Guang}, title = {Point Cloud Upsampling Using Conditional Diffusion Module with Adaptive Noise Suppression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16987-16996} }
Trajectory Mamba: Efficient Attention-Mamba Forecasting Model Based on Selective SSM: Yizhou Huang,

Yihua Cheng,

Kezhi Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yizhou and Cheng, Yihua and Wang, Kezhi}, title = {Trajectory Mamba: Efficient Attention-Mamba Forecasting Model Based on Selective SSM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12058-12067} }
VLOGGER: Multimodal Diffusion for Embodied Avatar Synthesis: Enric Corona,

Andrei Zanfir,

Eduard Gabriel Bazavan,

Nikos Kolotouros,

Thiemo Alldieck,

Cristian Sminchisescu; [pdf] [arXiv]
[bibtex]
@InProceedings{Corona_2025_CVPR, author = {Corona, Enric and Zanfir, Andrei and Bazavan, Eduard Gabriel and Kolotouros, Nikos and Alldieck, Thiemo and Sminchisescu, Cristian}, title = {VLOGGER: Multimodal Diffusion for Embodied Avatar Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15896-15908} }
DEIM: DETR with Improved Matching for Fast Convergence: Shihua Huang,

Zhichao Lu,

Xiaodong Cun,

Yongjun Yu,

Xiao Zhou,

Xi Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Shihua and Lu, Zhichao and Cun, Xiaodong and Yu, Yongjun and Zhou, Xiao and Shen, Xi}, title = {DEIM: DETR with Improved Matching for Fast Convergence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15162-15171} }
Human Motion Instruction Tuning: Lei Li,

Sen Jia,

Jianhao Wang,

Zhongyu Jiang,

Feng Zhou,

Ju Dai,

Tianfang Zhang,

Zongkai Wu,

Jenq-Neng Hwang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Lei and Jia, Sen and Wang, Jianhao and Jiang, Zhongyu and Zhou, Feng and Dai, Ju and Zhang, Tianfang and Wu, Zongkai and Hwang, Jenq-Neng}, title = {Human Motion Instruction Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17582-17591} }
A Flag Decomposition for Hierarchical Datasets: Nathan Mankovich,

Ignacio Santamaria,

Gustau Camps-Valls,

Tolga Birdal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mankovich_2025_CVPR, author = {Mankovich, Nathan and Santamaria, Ignacio and Camps-Valls, Gustau and Birdal, Tolga}, title = {A Flag Decomposition for Hierarchical Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18738-18748} }
RCP-Bench: Benchmarking Robustness for Collaborative Perception Under Diverse Corruptions: Shihang Du,

Sanqing Qu,

Tianhang Wang,

Xudong Zhang,

Yunwei Zhu,

Jian Mao,

Fan Lu,

Qiao Lin,

Guang Chen; [pdf] [supp]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Shihang and Qu, Sanqing and Wang, Tianhang and Zhang, Xudong and Zhu, Yunwei and Mao, Jian and Lu, Fan and Lin, Qiao and Chen, Guang}, title = {RCP-Bench: Benchmarking Robustness for Collaborative Perception Under Diverse Corruptions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11908-11918} }
Olympus: A Universal Task Router for Computer Vision Tasks: Yuanze Lin,

Yunsheng Li,

Dongdong Chen,

Weijian Xu,

Ronald Clark,

Philip Torr; [pdf] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Yuanze and Li, Yunsheng and Chen, Dongdong and Xu, Weijian and Clark, Ronald and Torr, Philip}, title = {Olympus: A Universal Task Router for Computer Vision Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14235-14246} }
Circumventing Shortcuts in Audio-visual Deepfake Detection Datasets with Unsupervised Learning: Stefan Smeu,

Dragos-Alexandru Boldisor,

Dan Oneata,

Elisabeta Oneata; [pdf] [arXiv]
[bibtex]
@InProceedings{Smeu_2025_CVPR, author = {Smeu, Stefan and Boldisor, Dragos-Alexandru and Oneata, Dan and Oneata, Elisabeta}, title = {Circumventing Shortcuts in Audio-visual Deepfake Detection Datasets with Unsupervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18815-18825} }
Image Over Text: Transforming Formula Recognition Evaluation with Character Detection Matching: Bin Wang,

Fan Wu,

Linke Ouyang,

Zhuangcheng Gu,

Rui Zhang,

Renqiu Xia,

Botian Shi,

Bo Zhang,

Conghui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Bin and Wu, Fan and Ouyang, Linke and Gu, Zhuangcheng and Zhang, Rui and Xia, Renqiu and Shi, Botian and Zhang, Bo and He, Conghui}, title = {Image Over Text: Transforming Formula Recognition Evaluation with Character Detection Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19681-19690} }
Improving Semi-Supervised Semantic Segmentation with Sliced-Wasserstein Feature Alignment and Uniformity: Chen-Yi Lu,

Kasra Derakhshandeh,

Somali Chaterji; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Chen-Yi and Derakhshandeh, Kasra and Chaterji, Somali}, title = {Improving Semi-Supervised Semantic Segmentation with Sliced-Wasserstein Feature Alignment and Uniformity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20233-20243} }
3D-Mem: 3D Scene Memory for Embodied Exploration and Reasoning: Yuncong Yang,

Han Yang,

Jiachen Zhou,

Peihao Chen,

Hongxin Zhang,

Yilun Du,

Chuang Gan; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Yuncong and Yang, Han and Zhou, Jiachen and Chen, Peihao and Zhang, Hongxin and Du, Yilun and Gan, Chuang}, title = {3D-Mem: 3D Scene Memory for Embodied Exploration and Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17294-17303} }
Navigation World Models: Amir Bar,

Gaoyue Zhou,

Danny Tran,

Trevor Darrell,

Yann LeCun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bar_2025_CVPR, author = {Bar, Amir and Zhou, Gaoyue and Tran, Danny and Darrell, Trevor and LeCun, Yann}, title = {Navigation World Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15791-15801} }
Conformal Prediction and MLLM aided Uncertainty Quantification in Scene Graph Generation: Sayak Nag,

Udita Ghosh,

Calvin-Khang Ta,

Sarosij Bose,

Jiachen Li,

Amit K. Roy-Chowdhury; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nag_2025_CVPR, author = {Nag, Sayak and Ghosh, Udita and Ta, Calvin-Khang and Bose, Sarosij and Li, Jiachen and Roy-Chowdhury, Amit K.}, title = {Conformal Prediction and MLLM aided Uncertainty Quantification in Scene Graph Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11676-11686} }
Reconstructing Close Human Interaction with Appearance and Proxemics Reasoning: Buzhen Huang,

Chen Li,

Chongyang Xu,

Dongyue Lu,

Jinnan Chen,

Yangang Wang,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Buzhen and Li, Chen and Xu, Chongyang and Lu, Dongyue and Chen, Jinnan and Wang, Yangang and Lee, Gim Hee}, title = {Reconstructing Close Human Interaction with Appearance and Proxemics Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17475-17485} }
Scenario Dreamer: Vectorized Latent Diffusion for Generating Driving Simulation Environments: Luke Rowe,

Roger Girgis,

Anthony Gosselin,

Liam Paull,

Christopher Pal,

Felix Heide; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rowe_2025_CVPR, author = {Rowe, Luke and Girgis, Roger and Gosselin, Anthony and Paull, Liam and Pal, Christopher and Heide, Felix}, title = {Scenario Dreamer: Vectorized Latent Diffusion for Generating Driving Simulation Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17207-17218} }
Poly-Autoregressive Prediction for Modeling Interactions: Neerja Thakkar,

Tara Sadjadpour,

Jathushan Rajasegeran,

Shiry Ginosar,

Jitendra Malik; [pdf] [supp]
[bibtex]
@InProceedings{Thakkar_2025_CVPR, author = {Thakkar, Neerja and Sadjadpour, Tara and Rajasegeran, Jathushan and Ginosar, Shiry and Malik, Jitendra}, title = {Poly-Autoregressive Prediction for Modeling Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12402-12412} }
PoseBH: Prototypical Multi-Dataset Training Beyond Human Pose Estimation: Uyoung Jeong,

Jonathan Freer,

Seungryul Baek,

Hyung Jin Chang,

Kwang In Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_CVPR, author = {Jeong, Uyoung and Freer, Jonathan and Baek, Seungryul and Chang, Hyung Jin and Kim, Kwang In}, title = {PoseBH: Prototypical Multi-Dataset Training Beyond Human Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12278-12288} }
Decision SpikeFormer: Spike-Driven Transformer for Decision Making: Wei Huang,

Qinying Gu,

Nanyang Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Wei and Gu, Qinying and Ye, Nanyang}, title = {Decision SpikeFormer: Spike-Driven Transformer for Decision Making}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19241-19250} }
Theory-Inspired Deep Multi-View Multi-Label Learning with Incomplete Views and Noisy Labels: Quanjiang Li,

Tingjin Luo,

Jiahui Liao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Quanjiang and Luo, Tingjin and Liao, Jiahui}, title = {Theory-Inspired Deep Multi-View Multi-Label Learning with Incomplete Views and Noisy Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20706-20715} }
EMOE: Modality-Specific Enhanced Dynamic Emotion Experts: Yiyang Fang,

Wenke Huang,

Guancheng Wan,

Kehua Su,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Yiyang and Huang, Wenke and Wan, Guancheng and Su, Kehua and Ye, Mang}, title = {EMOE: Modality-Specific Enhanced Dynamic Emotion Experts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14314-14324} }
Generative Video Propagation: Shaoteng Liu,

Tianyu Wang,

Jui-Hsien Wang,

Qing Liu,

Zhifei Zhang,

Joon-Young Lee,

Yijun Li,

Bei Yu,

Zhe Lin,

Soo Ye Kim,

Jiaya Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Shaoteng and Wang, Tianyu and Wang, Jui-Hsien and Liu, Qing and Zhang, Zhifei and Lee, Joon-Young and Li, Yijun and Yu, Bei and Lin, Zhe and Kim, Soo Ye and Jia, Jiaya}, title = {Generative Video Propagation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17712-17722} }
From Multimodal LLMs to Generalist Embodied Agents: Methods and Lessons: Andrew Szot,

Bogdan Mazoure,

Omar Attia,

Aleksei Timofeev,

Harsh Agrawal,

Devon Hjelm,

Zhe Gan,

Zsolt Kira,

Alexander Toshev; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Szot_2025_CVPR, author = {Szot, Andrew and Mazoure, Bogdan and Attia, Omar and Timofeev, Aleksei and Agrawal, Harsh and Hjelm, Devon and Gan, Zhe and Kira, Zsolt and Toshev, Alexander}, title = {From Multimodal LLMs to Generalist Embodied Agents: Methods and Lessons}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10644-10655} }
Mosaic3D: Foundation Dataset and Model for Open-Vocabulary 3D Segmentation: Junha Lee,

Chunghyun Park,

Jaesung Choe,

Yu-Chiang Frank Wang,

Jan Kautz,

Minsu Cho,

Chris Choy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Junha and Park, Chunghyun and Choe, Jaesung and Wang, Yu-Chiang Frank and Kautz, Jan and Cho, Minsu and Choy, Chris}, title = {Mosaic3D: Foundation Dataset and Model for Open-Vocabulary 3D Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14089-14101} }
T-CIL: Temperature Scaling using Adversarial Perturbation for Calibration in Class-Incremental Learning: Seong-Hyeon Hwang,

Minsu Kim,

Steven Euijong Whang; [pdf] [supp]
[bibtex]
@InProceedings{Hwang_2025_CVPR, author = {Hwang, Seong-Hyeon and Kim, Minsu and Whang, Steven Euijong}, title = {T-CIL: Temperature Scaling using Adversarial Perturbation for Calibration in Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15339-15348} }
LoRA Subtraction for Drift-Resistant Space in Exemplar-Free Continual Learning: Xuan Liu,

Xiaobin Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xuan and Chang, Xiaobin}, title = {LoRA Subtraction for Drift-Resistant Space in Exemplar-Free Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15308-15318} }
AniMer: Animal Pose and Shape Estimation Using Family Aware Transformer: Jin Lyu,

Tianyi Zhu,

Yi Gu,

Li Lin,

Pujin Cheng,

Yebin Liu,

Xiaoying Tang,

Liang An; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2025_CVPR, author = {Lyu, Jin and Zhu, Tianyi and Gu, Yi and Lin, Li and Cheng, Pujin and Liu, Yebin and Tang, Xiaoying and An, Liang}, title = {AniMer: Animal Pose and Shape Estimation Using Family Aware Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17486-17496} }
Co-op: Correspondence-based Novel Object Pose Estimation: Sungphill Moon,

Hyeontae Son,

Dongcheol Hur,

Sangwook Kim; [pdf] [supp]
[bibtex]
@InProceedings{Moon_2025_CVPR, author = {Moon, Sungphill and Son, Hyeontae and Hur, Dongcheol and Kim, Sangwook}, title = {Co-op: Correspondence-based Novel Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11622-11632} }
CATANet: Efficient Content-Aware Token Aggregation for Lightweight Image Super-Resolution: Xin Liu,

Jie Liu,

Jie Tang,

Gangshan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xin and Liu, Jie and Tang, Jie and Wu, Gangshan}, title = {CATANet: Efficient Content-Aware Token Aggregation for Lightweight Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17902-17912} }
RayFlow: Instance-Aware Diffusion Acceleration via Adaptive Flow Trajectories: Huiyang Shao,

Xin Xia,

Yuhong Yang,

Yuxi Ren,

Xing Wang,

Xuefeng Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2025_CVPR, author = {Shao, Huiyang and Xia, Xin and Yang, Yuhong and Ren, Yuxi and Wang, Xing and Xiao, Xuefeng}, title = {RayFlow: Instance-Aware Diffusion Acceleration via Adaptive Flow Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18113-18123} }
Self-Supervised Large Scale Point Cloud Completion for Archaeological Site Restoration: Aocheng Li,

James R. Zimmer-Dauphinee,

Rajesh Kalyanam,

Ian Lindsay,

Parker VanValkenburgh,

Steven Wernke,

Daniel Aliaga; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Aocheng and Zimmer-Dauphinee, James R. and Kalyanam, Rajesh and Lindsay, Ian and VanValkenburgh, Parker and Wernke, Steven and Aliaga, Daniel}, title = {Self-Supervised Large Scale Point Cloud Completion for Archaeological Site Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11759-11768} }
Chain of Attack: On the Robustness of Vision-Language Models Against Transfer-Based Adversarial Attacks: Peng Xie,

Yequan Bie,

Jianda Mao,

Yangqiu Song,

Yang Wang,

Hao Chen,

Kani Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Peng and Bie, Yequan and Mao, Jianda and Song, Yangqiu and Wang, Yang and Chen, Hao and Chen, Kani}, title = {Chain of Attack: On the Robustness of Vision-Language Models Against Transfer-Based Adversarial Attacks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14679-14689} }
Rate-In: Information-Driven Adaptive Dropout Rates for Improved Inference-Time Uncertainty Estimation: Tal Zeevi,

Ravid Shwartz-Ziv,

Yann LeCun,

Lawrence H. Staib,

John A. Onofrey; [pdf] [supp]
[bibtex]
@InProceedings{Zeevi_2025_CVPR, author = {Zeevi, Tal and Shwartz-Ziv, Ravid and LeCun, Yann and Staib, Lawrence H. and Onofrey, John A.}, title = {Rate-In: Information-Driven Adaptive Dropout Rates for Improved Inference-Time Uncertainty Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20757-20766} }
Thin-Shell-SfT: Fine-Grained Monocular Non-rigid 3D Surface Tracking with Neural Deformation Fields: Navami Kairanda,

Marc Habermann,

Shanthika Naik,

Christian Theobalt,

Vladislav Golyanik; [pdf] [supp]
[bibtex]
@InProceedings{Kairanda_2025_CVPR, author = {Kairanda, Navami and Habermann, Marc and Naik, Shanthika and Theobalt, Christian and Golyanik, Vladislav}, title = {Thin-Shell-SfT: Fine-Grained Monocular Non-rigid 3D Surface Tracking with Neural Deformation Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11373-11383} }
DeCLIP: Decoupled Learning for Open-Vocabulary Dense Perception: Junjie Wang,

Bin Chen,

Yulin Li,

Bin Kang,

Yichi Chen,

Zhuotao Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Junjie and Chen, Bin and Li, Yulin and Kang, Bin and Chen, Yichi and Tian, Zhuotao}, title = {DeCLIP: Decoupled Learning for Open-Vocabulary Dense Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14824-14834} }
SocialGesture: Delving into Multi-person Gesture Understanding: Xu Cao,

Pranav Virupaksha,

Wenqi Jia,

Bolin Lai,

Fiona Ryan,

Sangmin Lee,

James M. Rehg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Xu and Virupaksha, Pranav and Jia, Wenqi and Lai, Bolin and Ryan, Fiona and Lee, Sangmin and Rehg, James M.}, title = {SocialGesture: Delving into Multi-person Gesture Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19509-19519} }
Multi-modal Topology-embedded Graph Learning for Spatially Resolved Genes Prediction from Pathology Images with Prior Gene Similarity Information: Hang Shi,

Changxi Chi,

Peng Wan,

Daoqiang Zhang,

Wei Shao; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Hang and Chi, Changxi and Wan, Peng and Zhang, Daoqiang and Shao, Wei}, title = {Multi-modal Topology-embedded Graph Learning for Spatially Resolved Genes Prediction from Pathology Images with Prior Gene Similarity Information}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20810-20819} }
Question-Aware Gaussian Experts for Audio-Visual Question Answering: Hongyeob Kim,

Inyoung Jung,

Dayoon Suh,

Youjia Zhang,

Sangmin Lee,

Sungeun Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Hongyeob and Jung, Inyoung and Suh, Dayoon and Zhang, Youjia and Lee, Sangmin and Hong, Sungeun}, title = {Question-Aware Gaussian Experts for Audio-Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13681-13690} }
Adaptive Rectangular Convolution for Remote Sensing Pansharpening: Xueyang Wang,

Zhixin Zheng,

Jiandong Shao,

Yule Duan,

Liang-Jian Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xueyang and Zheng, Zhixin and Shao, Jiandong and Duan, Yule and Deng, Liang-Jian}, title = {Adaptive Rectangular Convolution for Remote Sensing Pansharpening}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17872-17881} }
UIBDiffusion: Universal Imperceptible Backdoor Attack for Diffusion Models: Yuning Han,

Bingyin Zhao,

Rui Chu,

Feng Luo,

Biplab Sikdar,

Yingjie Lao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Yuning and Zhao, Bingyin and Chu, Rui and Luo, Feng and Sikdar, Biplab and Lao, Yingjie}, title = {UIBDiffusion: Universal Imperceptible Backdoor Attack for Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19186-19196} }
FisherTune: Fisher-Guided Robust Tuning of Vision Foundation Models for Domain Generalized Segmentation: Dong Zhao,

Jinlong Li,

Shuang Wang,

Mengyao Wu,

Qi Zang,

Nicu Sebe,

Zhun Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Dong and Li, Jinlong and Wang, Shuang and Wu, Mengyao and Zang, Qi and Sebe, Nicu and Zhong, Zhun}, title = {FisherTune: Fisher-Guided Robust Tuning of Vision Foundation Models for Domain Generalized Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15043-15054} }
AdMiT: Adaptive Multi-Source Tuning in Dynamic Environments: Xiangyu Chang,

Fahim Faisal Niloy,

Sk Miraj Ahmed,

Srikanth V. Krishnamurthy,

Basak Guler,

Ananthram Swami,

Samet Oymak,

Amit Roy-Chowdhury; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2025_CVPR, author = {Chang, Xiangyu and Niloy, Fahim Faisal and Ahmed, Sk Miraj and Krishnamurthy, Srikanth V. and Guler, Basak and Swami, Ananthram and Oymak, Samet and Roy-Chowdhury, Amit}, title = {AdMiT: Adaptive Multi-Source Tuning in Dynamic Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20569-20579} }
Unbiased Video Scene Graph Generation via Visual and Semantic Dual Debiasing: Yanjun Li,

Zhaoyang Li,

Honghui Chen,

Lizhi Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yanjun and Li, Zhaoyang and Chen, Honghui and Xu, Lizhi}, title = {Unbiased Video Scene Graph Generation via Visual and Semantic Dual Debiasing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19047-19056} }
Unleashing In-context Learning of Autoregressive Models for Few-shot Image Manipulation: Bolin Lai,

Felix Juefei-Xu,

Miao Liu,

Xiaoliang Dai,

Nikhil Mehta,

Chenguang Zhu,

Zeyi Huang,

James M. Rehg,

Sangmin Lee,

Ning Zhang,

Tong Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Bolin and Juefei-Xu, Felix and Liu, Miao and Dai, Xiaoliang and Mehta, Nikhil and Zhu, Chenguang and Huang, Zeyi and Rehg, James M. and Lee, Sangmin and Zhang, Ning and Xiao, Tong}, title = {Unleashing In-context Learning of Autoregressive Models for Few-shot Image Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18346-18357} }
Revisiting Generative Replay for Class Incremental Object Detection: Shizhou Zhang,

Xueqiang Lv,

Yinghui Xing,

Qirui Wu,

Di Xu,

Yanning Zhang; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shizhou and Lv, Xueqiang and Xing, Yinghui and Wu, Qirui and Xu, Di and Zhang, Yanning}, title = {Revisiting Generative Replay for Class Incremental Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20340-20349} }
Bridging Viewpoint Gaps: Geometric Reasoning Boosts Semantic Correspondence: Qiyang Qian,

Hansheng Chen,

Masayoshi Tomizuka,

Kurt Keutzer,

Qianqian Wang,

Chenfeng Xu; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2025_CVPR, author = {Qian, Qiyang and Chen, Hansheng and Tomizuka, Masayoshi and Keutzer, Kurt and Wang, Qianqian and Xu, Chenfeng}, title = {Bridging Viewpoint Gaps: Geometric Reasoning Boosts Semantic Correspondence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11579-11589} }
Spatial Transport Optimization by Repositioning Attention Map for Training-Free Text-to-Image Synthesis: Woojung Han,

Yeonkyung Lee,

Chanyoung Kim,

Kwanghyun Park,

Seong Jae Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Woojung and Lee, Yeonkyung and Kim, Chanyoung and Park, Kwanghyun and Hwang, Seong Jae}, title = {Spatial Transport Optimization by Repositioning Attention Map for Training-Free Text-to-Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18401-18410} }
MobileH2R: Learning Generalizable Human to Mobile Robot Handover Exclusively from Scalable and Diverse Synthetic Data: Zifan Wang,

Ziqing Chen,

Junyu Chen,

Jilong Wang,

Yuxin Yang,

Yunze Liu,

Xueyi Liu,

He Wang,

Li Yi; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zifan and Chen, Ziqing and Chen, Junyu and Wang, Jilong and Yang, Yuxin and Liu, Yunze and Liu, Xueyi and Wang, He and Yi, Li}, title = {MobileH2R: Learning Generalizable Human to Mobile Robot Handover Exclusively from Scalable and Diverse Synthetic Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17315-17325} }
PERSE: Personalized 3D Generative Avatars from A Single Portrait: Hyunsoo Cha,

Inhee Lee,

Hanbyul Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cha_2025_CVPR, author = {Cha, Hyunsoo and Lee, Inhee and Joo, Hanbyul}, title = {PERSE: Personalized 3D Generative Avatars from A Single Portrait}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15953-15962} }
Dynamic Stereotype Theory Induced Micro-expression Recognition with Oriented Deformation: Bohao Zhang,

Xuejiao Wang,

Changbo Wang,

Gaoqi He; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Bohao and Wang, Xuejiao and Wang, Changbo and He, Gaoqi}, title = {Dynamic Stereotype Theory Induced Micro-expression Recognition with Oriented Deformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10701-10711} }
ACL: Activating Capability of Linear Attention for Image Restoration: Yubin Gu,

Yuan Meng,

Jiayi Ji,

Xiaoshuai Sun; [pdf]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Yubin and Meng, Yuan and Ji, Jiayi and Sun, Xiaoshuai}, title = {ACL: Activating Capability of Linear Attention for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17913-17923} }
MARBLE: Material Recomposition and Blending in CLIP-Space: Ta Ying Cheng,

Prafull Sharma,

Mark Boss,

Varun Jampani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Ta Ying and Sharma, Prafull and Boss, Mark and Jampani, Varun}, title = {MARBLE: Material Recomposition and Blending in CLIP-Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13061-13071} }
Efficient Visual State Space Model for Image Deblurring: Lingshun Kong,

Jiangxin Dong,

Jinhui Tang,

Ming-Hsuan Yang,

Jinshan Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2025_CVPR, author = {Kong, Lingshun and Dong, Jiangxin and Tang, Jinhui and Yang, Ming-Hsuan and Pan, Jinshan}, title = {Efficient Visual State Space Model for Image Deblurring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12710-12719} }
Enhancing 3D Gaze Estimation in the Wild using Weak Supervision with Gaze Following Labels: Pierre Vuillecard,

Jean-Marc Odobez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vuillecard_2025_CVPR, author = {Vuillecard, Pierre and Odobez, Jean-Marc}, title = {Enhancing 3D Gaze Estimation in the Wild using Weak Supervision with Gaze Following Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13508-13518} }
Reward Fine-Tuning Two-Step Diffusion Models via Learning Differentiable Latent-Space Surrogate Reward: Zhiwei Jia,

Yuesong Nan,

Huixi Zhao,

Gengdai Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2025_CVPR, author = {Jia, Zhiwei and Nan, Yuesong and Zhao, Huixi and Liu, Gengdai}, title = {Reward Fine-Tuning Two-Step Diffusion Models via Learning Differentiable Latent-Space Surrogate Reward}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12912-12922} }
Detecting Out-of-Distribution Through the Lens of Neural Collapse: Litian Liu,

Yao Qin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Litian and Qin, Yao}, title = {Detecting Out-of-Distribution Through the Lens of Neural Collapse}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15424-15433} }
Sparse2DGS: Geometry-Prioritized Gaussian Splatting for Surface Reconstruction from Sparse Views: Jiang Wu,

Rui Li,

Yu Zhu,

Rong Guo,

Jinqiu Sun,

Yanning Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Jiang and Li, Rui and Zhu, Yu and Guo, Rong and Sun, Jinqiu and Zhang, Yanning}, title = {Sparse2DGS: Geometry-Prioritized Gaussian Splatting for Surface Reconstruction from Sparse Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11307-11316} }
VinTAGe: Joint Video and Text Conditioning for Holistic Audio Generation: Saksham Singh Kushwaha,

Yapeng Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kushwaha_2025_CVPR, author = {Kushwaha, Saksham Singh and Tian, Yapeng}, title = {VinTAGe: Joint Video and Text Conditioning for Holistic Audio Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13529-13539} }
Efficient Decoupled Feature 3D Gaussian Splatting via Hierarchical Compression: Zhenqi Dai,

Ting Liu,

Yanning Zhang; [pdf]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Zhenqi and Liu, Ting and Zhang, Yanning}, title = {Efficient Decoupled Feature 3D Gaussian Splatting via Hierarchical Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11156-11166} }
CountLLM: Towards Generalizable Repetitive Action Counting via Large Language Model: Ziyu Yao,

Xuxin Cheng,

Zhiqi Huang,

Lei Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Yao_2025_CVPR, author = {Yao, Ziyu and Cheng, Xuxin and Huang, Zhiqi and Li, Lei}, title = {CountLLM: Towards Generalizable Repetitive Action Counting via Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19143-19153} }
SPAR3D: Stable Point-Aware Reconstruction of 3D Objects from Single Images: Zixuan Huang,

Mark Boss,

Aaryaman Vasishta,

James M. Rehg,

Varun Jampani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zixuan and Boss, Mark and Vasishta, Aaryaman and Rehg, James M. and Jampani, Varun}, title = {SPAR3D: Stable Point-Aware Reconstruction of 3D Objects from Single Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16860-16870} }
Focus-N-Fix: Region-Aware Fine-Tuning for Text-to-Image Generation: Xiaoying Xing,

Avinab Saha,

Junfeng He,

Susan Hao,

Paul Vicol,

Moonkyung Ryu,

Gang Li,

Sahil Singla,

Sarah Young,

Yinxiao Li,

Feng Yang,

Deepak Ramachandran; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Xiaoying and Saha, Avinab and He, Junfeng and Hao, Susan and Vicol, Paul and Ryu, Moonkyung and Li, Gang and Singla, Sahil and Young, Sarah and Li, Yinxiao and Yang, Feng and Ramachandran, Deepak}, title = {Focus-N-Fix: Region-Aware Fine-Tuning for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18486-18496} }
Label Shift Meets Online Learning: Ensuring Consistent Adaptation with Universal Dynamic Regret: Yucong Dai,

Shilin Gu,

Ruidong Fan,

Chao Xu,

Chenping Hou; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Yucong and Gu, Shilin and Fan, Ruidong and Xu, Chao and Hou, Chenping}, title = {Label Shift Meets Online Learning: Ensuring Consistent Adaptation with Universal Dynamic Regret}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15392-15401} }
A Physics-Informed Blur Learning Framework for Imaging Systems: Liqun Chen,

Yuxuan Li,

Jun Dai,

Jinwei Gu,

Tianfan Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Liqun and Li, Yuxuan and Dai, Jun and Gu, Jinwei and Xue, Tianfan}, title = {A Physics-Informed Blur Learning Framework for Imaging Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10913-10922} }
Towards Practical Real-Time Neural Video Compression: Zhaoyang Jia,

Bin Li,

Jiahao Li,

Wenxuan Xie,

Linfeng Qi,

Houqiang Li,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2025_CVPR, author = {Jia, Zhaoyang and Li, Bin and Li, Jiahao and Xie, Wenxuan and Qi, Linfeng and Li, Houqiang and Lu, Yan}, title = {Towards Practical Real-Time Neural Video Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12543-12552} }
DepthSplat: Connecting Gaussian Splatting and Depth: Haofei Xu,

Songyou Peng,

Fangjinhua Wang,

Hermann Blum,

Daniel Barath,

Andreas Geiger,

Marc Pollefeys; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Haofei and Peng, Songyou and Wang, Fangjinhua and Blum, Hermann and Barath, Daniel and Geiger, Andreas and Pollefeys, Marc}, title = {DepthSplat: Connecting Gaussian Splatting and Depth}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16453-16463} }
Dynamic Camera Poses and Where to Find Them: Chris Rockwell,

Joseph Tung,

Tsung-Yi Lin,

Ming-Yu Liu,

David F. Fouhey,

Chen-Hsuan Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rockwell_2025_CVPR, author = {Rockwell, Chris and Tung, Joseph and Lin, Tsung-Yi and Liu, Ming-Yu and Fouhey, David F. and Lin, Chen-Hsuan}, title = {Dynamic Camera Poses and Where to Find Them}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12444-12455} }
OmniGen: Unified Image Generation: Shitao Xiao,

Yueze Wang,

Junjie Zhou,

Huaying Yuan,

Xingrun Xing,

Ruiran Yan,

Chaofan Li,

Shuting Wang,

Tiejun Huang,

Zheng Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Shitao and Wang, Yueze and Zhou, Junjie and Yuan, Huaying and Xing, Xingrun and Yan, Ruiran and Li, Chaofan and Wang, Shuting and Huang, Tiejun and Liu, Zheng}, title = {OmniGen: Unified Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13294-13304} }
QuCOOP: A Versatile Framework for Solving Composite and Binary-Parametrised Problems on Quantum Annealers: Natacha Kuete Meli,

Vladislav Golyanik,

Marcel Seelbach Benkner,

Michael Moeller; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meli_2025_CVPR, author = {Meli, Natacha Kuete and Golyanik, Vladislav and Benkner, Marcel Seelbach and Moeller, Michael}, title = {QuCOOP: A Versatile Framework for Solving Composite and Binary-Parametrised Problems on Quantum Annealers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11395-11405} }
Mesh Mamba: A Unified State Space Model for Saliency Prediction in Non-Textured and Textured Meshes: Kaiwei Zhang,

Dandan Zhu,

Xiongkuo Min,

Guangtao Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Kaiwei and Zhu, Dandan and Min, Xiongkuo and Zhai, Guangtao}, title = {Mesh Mamba: A Unified State Space Model for Saliency Prediction in Non-Textured and Textured Meshes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16219-16228} }
SILMM: Self-Improving Large Multimodal Models for Compositional Text-to-Image Generation: Leigang Qu,

Haochuan Li,

Wenjie Wang,

Xiang Liu,

Juncheng Li,

Liqiang Nie,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Leigang and Li, Haochuan and Wang, Wenjie and Liu, Xiang and Li, Juncheng and Nie, Liqiang and Chua, Tat-Seng}, title = {SILMM: Self-Improving Large Multimodal Models for Compositional Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18497-18508} }
Calibrated Multi-Preference Optimization for Aligning Diffusion Models: Kyungmin Lee,

Xiahong Li,

Qifei Wang,

Junfeng He,

Junjie Ke,

Ming-Hsuan Yang,

Irfan Essa,

Jinwoo Shin,

Feng Yang,

Yinxiao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Kyungmin and Li, Xiahong and Wang, Qifei and He, Junfeng and Ke, Junjie and Yang, Ming-Hsuan and Essa, Irfan and Shin, Jinwoo and Yang, Feng and Li, Yinxiao}, title = {Calibrated Multi-Preference Optimization for Aligning Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18465-18475} }
Advancing Adversarial Robustness in GNeRFs: The IL2-NeRF Attack: Nicole Meng,

Caleb Manicke,

Ronak Sahu,

Caiwen Ding,

Yingjie Lao; [pdf] [supp]
[bibtex]
@InProceedings{Meng_2025_CVPR, author = {Meng, Nicole and Manicke, Caleb and Sahu, Ronak and Ding, Caiwen and Lao, Yingjie}, title = {Advancing Adversarial Robustness in GNeRFs: The IL2-NeRF Attack}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16388-16397} }
PolarNeXt: Rethink Instance Segmentation with Polar Representation: Jiacheng Sun,

Xinghong Zhou,

Yiqiang Wu,

Bin Zhu,

Jiaxuan Lu,

Yu Qin,

Xiaomao Li; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Jiacheng and Zhou, Xinghong and Wu, Yiqiang and Zhu, Bin and Lu, Jiaxuan and Qin, Yu and Li, Xiaomao}, title = {PolarNeXt: Rethink Instance Segmentation with Polar Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19315-19324} }
SAM-REF: Introducing Image-Prompt Synergy during Interaction for Detail Enhancement in the Segment Anything Model: Chongkai Yu,

Ting Liu,

Anqi Li,

Xiaochao Qu,

Chengjing Wu,

Luoqi Liu,

Xiaolin Hu; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Chongkai and Liu, Ting and Li, Anqi and Qu, Xiaochao and Wu, Chengjing and Liu, Luoqi and Hu, Xiaolin}, title = {SAM-REF: Introducing Image-Prompt Synergy during Interaction for Detail Enhancement in the Segment Anything Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19356-19365} }
DarkIR: Robust Low-Light Image Restoration: Daniel Feijoo,

Juan C. Benito,

Alvaro Garcia,

Marcos V. Conde; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feijoo_2025_CVPR, author = {Feijoo, Daniel and Benito, Juan C. and Garcia, Alvaro and Conde, Marcos V.}, title = {DarkIR: Robust Low-Light Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10879-10889} }
R2C: Mapping Room to Chessboard to Unlock LLM As Low-Level Action Planner: Ziyi Bai,

Hanxuan Li,

Bin Fu,

Chuyan Xiong,

Ruiping Wang,

Xilin Chen; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2025_CVPR, author = {Bai, Ziyi and Li, Hanxuan and Fu, Bin and Xiong, Chuyan and Wang, Ruiping and Chen, Xilin}, title = {R2C: Mapping Room to Chessboard to Unlock LLM As Low-Level Action Planner}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19456-19466} }
From Prototypes to General Distributions: An Efficient Curriculum for Masked Image Modeling: Jinhong Lin,

Cheng-En Wu,

Huanran Li,

Jifan Zhang,

Yu Hen Hu,

Pedro Morgado; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Jinhong and Wu, Cheng-En and Li, Huanran and Zhang, Jifan and Hu, Yu Hen and Morgado, Pedro}, title = {From Prototypes to General Distributions: An Efficient Curriculum for Masked Image Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20028-20038} }
Difference Inversion: Interpolate and Isolate the Difference with Token Consistency for Image Analogy Generation: Hyunsoo Kim,

Donghyun Kim,

Suhyun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Hyunsoo and Kim, Donghyun and Kim, Suhyun}, title = {Difference Inversion: Interpolate and Isolate the Difference with Token Consistency for Image Analogy Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18250-18259} }
MTADiffusion: Mask Text Alignment Diffusion Model for Object Inpainting: Jun Huang,

Ting Liu,

Yihang Wu,

Xiaochao Qu,

Luoqi Liu,

Xiaolin Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Jun and Liu, Ting and Wu, Yihang and Qu, Xiaochao and Liu, Luoqi and Hu, Xiaolin}, title = {MTADiffusion: Mask Text Alignment Diffusion Model for Object Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18325-18334} }
Grounding 3D Object Affordance with Language Instructions, Visual Observations and Interactions: He Zhu,

Quyu Kong,

Kechun Xu,

Xunlong Xia,

Bing Deng,

Jieping Ye,

Rong Xiong,

Yue Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, He and Kong, Quyu and Xu, Kechun and Xia, Xunlong and Deng, Bing and Ye, Jieping and Xiong, Rong and Wang, Yue}, title = {Grounding 3D Object Affordance with Language Instructions, Visual Observations and Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17337-17346} }
Image is All You Need to Empower Large-scale Diffusion Models for In-Domain Generation: Pu Cao,

Feng Zhou,

Lu Yang,

Tianrui Huang,

Qing Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Pu and Zhou, Feng and Yang, Lu and Huang, Tianrui and Song, Qing}, title = {Image is All You Need to Empower Large-scale Diffusion Models for In-Domain Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18358-18368} }
Evolving High-Quality Rendering and Reconstruction in a Unified Framework with Contribution-Adaptive Regularization: You Shen,

Zhipeng Zhang,

Xinyang Li,

Yansong Qu,

Yu Lin,

Shengchuan Zhang,

Liujuan Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, You and Zhang, Zhipeng and Li, Xinyang and Qu, Yansong and Lin, Yu and Zhang, Shengchuan and Cao, Liujuan}, title = {Evolving High-Quality Rendering and Reconstruction in a Unified Framework with Contribution-Adaptive Regularization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16346-16355} }
NoiseCtrl: A Sampling-Algorithm-Agnostic Conditional Generation Method for Diffusion Models: Longquan Dai,

He Wang,

Jinhui Tang; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Longquan and Wang, He and Tang, Jinhui}, title = {NoiseCtrl: A Sampling-Algorithm-Agnostic Conditional Generation Method for Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18093-18102} }
KMD: Koopman Multi-modality Decomposition for Generalized Brain Tumor Segmentation under Incomplete Modalities: Tianyi Liu,

Haochuan Jiang,

Kaizhu Huang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Tianyi and Jiang, Haochuan and Huang, Kaizhu}, title = {KMD: Koopman Multi-modality Decomposition for Generalized Brain Tumor Segmentation under Incomplete Modalities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15663-15671} }
DORNet: A Degradation Oriented and Regularized Network for Blind Depth Super-Resolution: Zhengxue Wang,

Zhiqiang Yan,

Jinshan Pan,

Guangwei Gao,

Kai Zhang,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhengxue and Yan, Zhiqiang and Pan, Jinshan and Gao, Guangwei and Zhang, Kai and Yang, Jian}, title = {DORNet: A Degradation Oriented and Regularized Network for Blind Depth Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15813-15822} }
Fractal Calibration for Long-tailed Object Detection: Konstantinos Panagiotis Alexandridis,

Ismail Elezi,

Jiankang Deng,

Anh Nguyen,

Shan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alexandridis_2025_CVPR, author = {Alexandridis, Konstantinos Panagiotis and Elezi, Ismail and Deng, Jiankang and Nguyen, Anh and Luo, Shan}, title = {Fractal Calibration for Long-tailed Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15139-15150} }
M3GYM: A Large-Scale Multimodal Multi-view Multi-person Pose Dataset for Fitness Activity Understanding in Real-world Settings: Qingzheng Xu,

Ru Cao,

Xin Shen,

Heming Du,

Sen Wang,

Xin Yu; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Qingzheng and Cao, Ru and Shen, Xin and Du, Heming and Wang, Sen and Yu, Xin}, title = {M3GYM: A Large-Scale Multimodal Multi-view Multi-person Pose Dataset for Fitness Activity Understanding in Real-world Settings}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12289-12300} }
FedSPA: Generalizable Federated Graph Learning under Homophily Heterogeneity: Zihan Tan,

Guancheng Wan,

Wenke Huang,

He Li,

Guibin Zhang,

Carl Yang,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2025_CVPR, author = {Tan, Zihan and Wan, Guancheng and Huang, Wenke and Li, He and Zhang, Guibin and Yang, Carl and Ye, Mang}, title = {FedSPA: Generalizable Federated Graph Learning under Homophily Heterogeneity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15464-15475} }
GazeGene: Large-scale Synthetic Gaze Dataset with 3D Eyeball Annotations: Yiwei Bao,

Zhiming Wang,

Feng Lu; [pdf] [supp]
[bibtex]
@InProceedings{Bao_2025_CVPR, author = {Bao, Yiwei and Wang, Zhiming and Lu, Feng}, title = {GazeGene: Large-scale Synthetic Gaze Dataset with 3D Eyeball Annotations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18749-18759} }
VideoHandles: Editing 3D Object Compositions in Videos Using Video Generative Priors: Juil Koo,

Paul Guerrero,

Chun-Hao P. Huang,

Duygu Ceylan,

Minhyuk Sung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koo_2025_CVPR, author = {Koo, Juil and Guerrero, Paul and Huang, Chun-Hao P. and Ceylan, Duygu and Sung, Minhyuk}, title = {VideoHandles: Editing 3D Object Compositions in Videos Using Video Generative Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17692-17701} }
GaussTR: Foundation Model-Aligned Gaussian Transformer for Self-Supervised 3D Spatial Understanding: Haoyi Jiang,

Liu Liu,

Tianheng Cheng,

Xinjie Wang,

Tianwei Lin,

Zhizhong Su,

Wenyu Liu,

Xinggang Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Haoyi and Liu, Liu and Cheng, Tianheng and Wang, Xinjie and Lin, Tianwei and Su, Zhizhong and Liu, Wenyu and Wang, Xinggang}, title = {GaussTR: Foundation Model-Aligned Gaussian Transformer for Self-Supervised 3D Spatial Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11960-11970} }
Continuous, Subject-Specific Attribute Control in T2I Models by Identifying Semantic Directions: Stefan Andreas Baumann,

Felix Krause,

Michael Neumayr,

Nick Stracke,

Melvin Sevi,

Vincent Tao Hu,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Baumann_2025_CVPR, author = {Baumann, Stefan Andreas and Krause, Felix and Neumayr, Michael and Stracke, Nick and Sevi, Melvin and Hu, Vincent Tao and Ommer, Bj\"orn}, title = {Continuous, Subject-Specific Attribute Control in T2I Models by Identifying Semantic Directions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13231-13241} }
SimLingo: Vision-Only Closed-Loop Autonomous Driving with Language-Action Alignment: Katrin Renz,

Long Chen,

Elahe Arani,

Oleg Sinavski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Renz_2025_CVPR, author = {Renz, Katrin and Chen, Long and Arani, Elahe and Sinavski, Oleg}, title = {SimLingo: Vision-Only Closed-Loop Autonomous Driving with Language-Action Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11993-12003} }
Improved Video VAE for Latent Video Diffusion Model: Pingyu Wu,

Kai Zhu,

Yu Liu,

Liming Zhao,

Wei Zhai,

Yang Cao,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Pingyu and Zhu, Kai and Liu, Yu and Zhao, Liming and Zhai, Wei and Cao, Yang and Zha, Zheng-Jun}, title = {Improved Video VAE for Latent Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18124-18133} }
Efficient Video Super-Resolution for Real-time Rendering with Decoupled G-buffer Guidance: Mingjun Zheng,

Long Sun,

Jiangxin Dong,

Jinshan Pan; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Mingjun and Sun, Long and Dong, Jiangxin and Pan, Jinshan}, title = {Efficient Video Super-Resolution for Real-time Rendering with Decoupled G-buffer Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11328-11337} }
Learned Image Compression with Dictionary-based Entropy Model: Jingbo Lu,

Leheng Zhang,

Xingyu Zhou,

Mu Li,

Wen Li,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Jingbo and Zhang, Leheng and Zhou, Xingyu and Li, Mu and Li, Wen and Gu, Shuhang}, title = {Learned Image Compression with Dictionary-based Entropy Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12850-12859} }
FireEdit: Fine-grained Instruction-based Image Editing via Region-aware Vision Language Model: Jun Zhou,

Jiahao Li,

Zunnan Xu,

Hanhui Li,

Yiji Cheng,

Fa-Ting Hong,

Qin Lin,

Qinglin Lu,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jun and Li, Jiahao and Xu, Zunnan and Li, Hanhui and Cheng, Yiji and Hong, Fa-Ting and Lin, Qin and Lu, Qinglin and Liang, Xiaodan}, title = {FireEdit: Fine-grained Instruction-based Image Editing via Region-aware Vision Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13093-13103} }
DL2G: Degradation-guided Local-to-Global Restoration for Eyeglass Reflection Removal: Zhilv Yi,

Xiao Lu,

Hong Ding,

Jingbo Hu,

Zhi Jiang,

Chunxia Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Yi_2025_CVPR, author = {Yi, Zhilv and Lu, Xiao and Ding, Hong and Hu, Jingbo and Jiang, Zhi and Xiao, Chunxia}, title = {DL2G: Degradation-guided Local-to-Global Restoration for Eyeglass Reflection Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16061-16070} }
MFogHub: Bridging Multi-Regional and Multi-Satellite Data for Global Marine Fog Detection and Forecasting: Mengqiu Xu,

Kaixin Chen,

Heng Guo,

Yixiang Huang,

Ming Wu,

Zhenwei Shi,

Chuang Zhang,

Jun Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Mengqiu and Chen, Kaixin and Guo, Heng and Huang, Yixiang and Wu, Ming and Shi, Zhenwei and Zhang, Chuang and Guo, Jun}, title = {MFogHub: Bridging Multi-Regional and Multi-Satellite Data for Global Marine Fog Detection and Forecasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12637-12646} }
The Illusion of Unlearning: The Unstable Nature of Machine Unlearning in Text-to-Image Diffusion Models: Naveen George,

Karthik Nandan Dasaraju,

Rutheesh Reddy Chittepu,

Konda Reddy Mopuri; [pdf] [supp]
[bibtex]
@InProceedings{George_2025_CVPR, author = {George, Naveen and Dasaraju, Karthik Nandan and Chittepu, Rutheesh Reddy and Mopuri, Konda Reddy}, title = {The Illusion of Unlearning: The Unstable Nature of Machine Unlearning in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13393-13402} }
Leveraging Global Stereo Consistency for Category-Level Shape and 6D Pose Estimation from Stereo Images: Junning Qiu,

Minglei Lu,

Fei Wang,

Yu Guo,

Yonggen Ling; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2025_CVPR, author = {Qiu, Junning and Lu, Minglei and Wang, Fei and Guo, Yu and Ling, Yonggen}, title = {Leveraging Global Stereo Consistency for Category-Level Shape and 6D Pose Estimation from Stereo Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16839-16849} }
AlphaPre: Amplitude-Phase Disentanglement Model for Precipitation Nowcasting: Kenghong Lin,

Baoquan Zhang,

Demin Yu,

Wenzhi Feng,

Shidong Chen,

Feifan Gao,

Xutao Li,

Yunming Ye; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Kenghong and Zhang, Baoquan and Yu, Demin and Feng, Wenzhi and Chen, Shidong and Gao, Feifan and Li, Xutao and Ye, Yunming}, title = {AlphaPre: Amplitude-Phase Disentanglement Model for Precipitation Nowcasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17841-17850} }
Detection-Friendly Nonuniformity Correction: A Union Framework for Infrared UAV Target Detection: Houzhang Fang,

Xiaolin Wang,

Zengyang Li,

Lu Wang,

Qingshan Li,

Yi Chang,

Luxin Yan; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Houzhang and Wang, Xiaolin and Li, Zengyang and Wang, Lu and Li, Qingshan and Chang, Yi and Yan, Luxin}, title = {Detection-Friendly Nonuniformity Correction: A Union Framework for Infrared UAV Target Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11898-11907} }
Articulated Kinematics Distillation from Video Diffusion Models: Xuan Li,

Qianli Ma,

Tsung-Yi Lin,

Yongxin Chen,

Chenfanfu Jiang,

Ming-Yu Liu,

Donglai Xiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xuan and Ma, Qianli and Lin, Tsung-Yi and Chen, Yongxin and Jiang, Chenfanfu and Liu, Ming-Yu and Xiang, Donglai}, title = {Articulated Kinematics Distillation from Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17571-17581} }
ExpertAF: Expert Actionable Feedback from Video: Kumar Ashutosh,

Tushar Nagarajan,

Georgios Pavlakos,

Kris Kitani,

Kristen Grauman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ashutosh_2025_CVPR, author = {Ashutosh, Kumar and Nagarajan, Tushar and Pavlakos, Georgios and Kitani, Kris and Grauman, Kristen}, title = {ExpertAF: Expert Actionable Feedback from Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13582-13594} }
Volumetrically Consistent 3D Gaussian Rasterization: Chinmay Talegaonkar,

Yash Belhe,

Ravi Ramamoorthi,

Nicholas Antipa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Talegaonkar_2025_CVPR, author = {Talegaonkar, Chinmay and Belhe, Yash and Ramamoorthi, Ravi and Antipa, Nicholas}, title = {Volumetrically Consistent 3D Gaussian Rasterization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10953-10963} }
The Impact Label Noise and Choice of Threshold has on Cross-Entropy and Soft-Dice in Image Segmentation: Marcus Nordström,

Atsuto Maki,

Henrik Hult; [pdf] [supp]
[bibtex]
@InProceedings{Nordstrom_2025_CVPR, author = {Nordstr\"om, Marcus and Maki, Atsuto and Hult, Henrik}, title = {The Impact Label Noise and Choice of Threshold has on Cross-Entropy and Soft-Dice in Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20820-20829} }
LLaVA-Critic: Learning to Evaluate Multimodal Models: Tianyi Xiong,

Xiyao Wang,

Dong Guo,

Qinghao Ye,

Haoqi Fan,

Quanquan Gu,

Heng Huang,

Chunyuan Li; [pdf] [supp]
[bibtex]
@InProceedings{Xiong_2025_CVPR, author = {Xiong, Tianyi and Wang, Xiyao and Guo, Dong and Ye, Qinghao and Fan, Haoqi and Gu, Quanquan and Huang, Heng and Li, Chunyuan}, title = {LLaVA-Critic: Learning to Evaluate Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13618-13628} }
VILA-M3: Enhancing Vision-Language Models with Medical Expert Knowledge: Vishwesh Nath,

Wenqi Li,

Dong Yang,

Andriy Myronenko,

Mingxin Zheng,

Yao Lu,

Zhijian Liu,

Hongxu Yin,

Yee Man Law,

Yucheng Tang,

Pengfei Guo,

Can Zhao,

Ziyue Xu,

Yufan He,

Stephanie Harmon,

Benjamin Simon,

Greg Heinrich,

Stephen Aylward,

Marc Edgar,

Michael Zephyr,

Pavlo Molchanov,

Baris Turkbey,

Holger Roth,

Daguang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Nath_2025_CVPR, author = {Nath, Vishwesh and Li, Wenqi and Yang, Dong and Myronenko, Andriy and Zheng, Mingxin and Lu, Yao and Liu, Zhijian and Yin, Hongxu and Law, Yee Man and Tang, Yucheng and Guo, Pengfei and Zhao, Can and Xu, Ziyue and He, Yufan and Harmon, Stephanie and Simon, Benjamin and Heinrich, Greg and Aylward, Stephen and Edgar, Marc and Zephyr, Michael and Molchanov, Pavlo and Turkbey, Baris and Roth, Holger and Xu, Daguang}, title = {VILA-M3: Enhancing Vision-Language Models with Medical Expert Knowledge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14788-14798} }
Repurposing Pre-trained Video Diffusion Models for Event-based Video Interpolation: Jingxi Chen,

Brandon Y. Feng,

Haoming Cai,

Tianfu Wang,

Levi Burner,

Dehao Yuan,

Cornelia Fermuller,

Christopher A. Metzler,

Yiannis Aloimonos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Jingxi and Feng, Brandon Y. and Cai, Haoming and Wang, Tianfu and Burner, Levi and Yuan, Dehao and Fermuller, Cornelia and Metzler, Christopher A. and Aloimonos, Yiannis}, title = {Repurposing Pre-trained Video Diffusion Models for Event-based Video Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12456-12466} }
Large-scale Multi-view Tensor Clustering with Implicit Linear Kernels: Jiyuan Liu,

Xinwang Liu,

Chuankun Li,

Xinhang Wan,

Hao Tan,

Yi Zhang,

Weixuan Liang,

Qian Qu,

Yu Feng,

Renxiang Guan,

Ke Liang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jiyuan and Liu, Xinwang and Li, Chuankun and Wan, Xinhang and Tan, Hao and Zhang, Yi and Liang, Weixuan and Qu, Qian and Feng, Yu and Guan, Renxiang and Liang, Ke}, title = {Large-scale Multi-view Tensor Clustering with Implicit Linear Kernels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20727-20736} }
Q-Eval-100K: Evaluating Visual Quality and Alignment Level for Text-to-Vision Content: Zicheng Zhang,

Tengchuan Kou,

Shushi Wang,

Chunyi Li,

Wei Sun,

Wei Wang,

Xiaoyu Li,

Zongyu Wang,

Xuezhi Cao,

Xiongkuo Min,

Xiaohong Liu,

Guangtao Zhai; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zicheng and Kou, Tengchuan and Wang, Shushi and Li, Chunyi and Sun, Wei and Wang, Wei and Li, Xiaoyu and Wang, Zongyu and Cao, Xuezhi and Min, Xiongkuo and Liu, Xiaohong and Zhai, Guangtao}, title = {Q-Eval-100K: Evaluating Visual Quality and Alignment Level for Text-to-Vision Content}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10621-10631} }
Dual Focus-Attention Transformer for Robust Point Cloud Registration: Kexue Fu,

Mingzhi Yuan,

Changwei Wang,

Weiguang Pang,

Jing Chi,

Manning Wang,

Longxiang Gao; [pdf]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Kexue and Yuan, Mingzhi and Wang, Changwei and Pang, Weiguang and Chi, Jing and Wang, Manning and Gao, Longxiang}, title = {Dual Focus-Attention Transformer for Robust Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11769-11778} }
Forming Auxiliary High-confident Instance-level Loss to Promote Learning from Label Proportions: Tianhao Ma,

Han Chen,

Juncheng Hu,

Yungang Zhu,

Ximing Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Tianhao and Chen, Han and Hu, Juncheng and Zhu, Yungang and Li, Ximing}, title = {Forming Auxiliary High-confident Instance-level Loss to Promote Learning from Label Proportions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20592-20601} }
Progress-Aware Video Frame Captioning: Zihui Xue,

Joungbin An,

Xitong Yang,

Kristen Grauman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2025_CVPR, author = {Xue, Zihui and An, Joungbin and Yang, Xitong and Grauman, Kristen}, title = {Progress-Aware Video Frame Captioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13639-13650} }
SMTPD: A New Benchmark for Temporal Prediction of Social Media Popularity: Yijie Xu,

Bolun Zheng,

Wei Zhu,

Hangjia Pan,

Yuchen Yao,

Ning Xu,

Anan Liu,

Quan Zhang,

Chenggang Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yijie and Zheng, Bolun and Zhu, Wei and Pan, Hangjia and Yao, Yuchen and Xu, Ning and Liu, Anan and Zhang, Quan and Yan, Chenggang}, title = {SMTPD: A New Benchmark for Temporal Prediction of Social Media Popularity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18847-18857} }
Learning on Model Weights using Tree Experts: Eliahu Horwitz,

Bar Cavia,

Jonathan Kahana,

Yedid Hoshen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Horwitz_2025_CVPR, author = {Horwitz, Eliahu and Cavia, Bar and Kahana, Jonathan and Hoshen, Yedid}, title = {Learning on Model Weights using Tree Experts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20468-20478} }
Image Reconstruction from Readout-Multiplexed Single-Photon Detector Arrays: Shashwath Bharadwaj,

Ruangrawee Kitichotkul,

Akshay Agarwal,

Vivek K Goyal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bharadwaj_2025_CVPR, author = {Bharadwaj, Shashwath and Kitichotkul, Ruangrawee and Agarwal, Akshay and Goyal, Vivek K}, title = {Image Reconstruction from Readout-Multiplexed Single-Photon Detector Arrays}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11406-11415} }
Towards Transformer-Based Aligned Generation with Self-Coherence Guidance: Shulei Wang,

Wang Lin,

Hai Huang,

Hanting Wang,

Sihang Cai,

WenKang Han,

Tao Jin,

Jingyuan Chen,

Jiacheng Sun,

Jieming Zhu,

Zhou Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shulei and Lin, Wang and Huang, Hai and Wang, Hanting and Cai, Sihang and Han, WenKang and Jin, Tao and Chen, Jingyuan and Sun, Jiacheng and Zhu, Jieming and Zhao, Zhou}, title = {Towards Transformer-Based Aligned Generation with Self-Coherence Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18455-18464} }
Accurate Scene Text Recognition with Efficient Model Scaling and Cloze Self-Distillation: Andrea Maracani,

Savas Ozkan,

Sijun Cho,

Hyowon Kim,

Eunchung Noh,

Jeongwon Min,

Cho Jung Min,

Dookun Park,

Mete Ozay; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maracani_2025_CVPR, author = {Maracani, Andrea and Ozkan, Savas and Cho, Sijun and Kim, Hyowon and Noh, Eunchung and Min, Jeongwon and Min, Cho Jung and Park, Dookun and Ozay, Mete}, title = {Accurate Scene Text Recognition with Efficient Model Scaling and Cloze Self-Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14516-14526} }
DART: Disease-aware Image-Text Alignment and Self-correcting Re-alignment for Trustworthy Radiology Report Generation: Sang-Jun Park,

Keun-Soo Heo,

Dong-Hee Shin,

Young-Han Son,

Ji-Hye Oh,

Tae-Eui Kam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Sang-Jun and Heo, Keun-Soo and Shin, Dong-Hee and Son, Young-Han and Oh, Ji-Hye and Kam, Tae-Eui}, title = {DART: Disease-aware Image-Text Alignment and Self-correcting Re-alignment for Trustworthy Radiology Report Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15580-15589} }
On the Consistency of Video Large Language Models in Temporal Comprehension: Minjoon Jung,

Junbin Xiao,

Byoung-Tak Zhang,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_CVPR, author = {Jung, Minjoon and Xiao, Junbin and Zhang, Byoung-Tak and Yao, Angela}, title = {On the Consistency of Video Large Language Models in Temporal Comprehension}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13713-13722} }
Less is More: Efficient Model Merging with Binary Task Switch: Biqing Qi,

Fangyuan Li,

Zhen Wang,

Junqi Gao,

Dong Li,

Peng Ye,

Bowen Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2025_CVPR, author = {Qi, Biqing and Li, Fangyuan and Wang, Zhen and Gao, Junqi and Li, Dong and Ye, Peng and Zhou, Bowen}, title = {Less is More: Efficient Model Merging with Binary Task Switch}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15265-15274} }
One-Minute Video Generation with Test-Time Training: Karan Dalal,

Daniel Koceja,

Jiarui Xu,

Yue Zhao,

Shihao Han,

Ka Chun Cheung,

Jan Kautz,

Yejin Choi,

Yu Sun,

Xiaolong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dalal_2025_CVPR, author = {Dalal, Karan and Koceja, Daniel and Xu, Jiarui and Zhao, Yue and Han, Shihao and Cheung, Ka Chun and Kautz, Jan and Choi, Yejin and Sun, Yu and Wang, Xiaolong}, title = {One-Minute Video Generation with Test-Time Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17702-17711} }
InteractionMap: Improving Online Vectorized HDMap Construction with Interaction: Kuang Wu,

Chuan Yang,

Zhanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Kuang and Yang, Chuan and Li, Zhanbin}, title = {InteractionMap: Improving Online Vectorized HDMap Construction with Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17176-17186} }
ROCKET-1: Mastering Open-World Interaction with Visual-Temporal Context Prompting: Shaofei Cai,

Zihao Wang,

Kewei Lian,

Zhancun Mu,

Xiaojian Ma,

Anji Liu,

Yitao Liang; [pdf]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Shaofei and Wang, Zihao and Lian, Kewei and Mu, Zhancun and Ma, Xiaojian and Liu, Anji and Liang, Yitao}, title = {ROCKET-1: Mastering Open-World Interaction with Visual-Temporal Context Prompting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12122-12131} }
RLAIF-V: Open-Source AI Feedback Leads to Super GPT-4V Trustworthiness: Tianyu Yu,

Haoye Zhang,

Qiming Li,

Qixin Xu,

Yuan Yao,

Da Chen,

Xiaoman Lu,

Ganqu Cui,

Yunkai Dang,

Taiwen He,

Xiaocheng Feng,

Jun Song,

Bo Zheng,

Zhiyuan Liu,

Tat-Seng Chua,

Maosong Sun; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Tianyu and Zhang, Haoye and Li, Qiming and Xu, Qixin and Yao, Yuan and Chen, Da and Lu, Xiaoman and Cui, Ganqu and Dang, Yunkai and He, Taiwen and Feng, Xiaocheng and Song, Jun and Zheng, Bo and Liu, Zhiyuan and Chua, Tat-Seng and Sun, Maosong}, title = {RLAIF-V: Open-Source AI Feedback Leads to Super GPT-4V Trustworthiness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19985-19995} }
EditSplat: Multi-View Fusion and Attention-Guided Optimization for View-Consistent 3D Scene Editing with 3D Gaussian Splatting: Dong In Lee,

Hyeongcheol Park,

Jiyoung Seo,

Eunbyung Park,

Hyunje Park,

Ha Dam Baek,

Sangheon Shin,

Sangmin Kim,

Sangpil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Dong In and Park, Hyeongcheol and Seo, Jiyoung and Park, Eunbyung and Park, Hyunje and Baek, Ha Dam and Shin, Sangheon and Kim, Sangmin and Kim, Sangpil}, title = {EditSplat: Multi-View Fusion and Attention-Guided Optimization for View-Consistent 3D Scene Editing with 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11135-11145} }
One-shot 3D Object Canonicalization based on Geometric and Semantic Consistency: Li Jin,

Yujie Wang,

Wenzheng Chen,

Qiyu Dai,

Qingzhe Gao,

Xueying Qin,

Baoquan Chen; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Li and Wang, Yujie and Chen, Wenzheng and Dai, Qiyu and Gao, Qingzhe and Qin, Xueying and Chen, Baoquan}, title = {One-shot 3D Object Canonicalization based on Geometric and Semantic Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16850-16859} }
Inst3D-LMM: Instance-Aware 3D Scene Understanding with Multi-modal Instruction Tuning: Hanxun Yu,

Wentong Li,

Song Wang,

Junbo Chen,

Jianke Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Hanxun and Li, Wentong and Wang, Song and Chen, Junbo and Zhu, Jianke}, title = {Inst3D-LMM: Instance-Aware 3D Scene Understanding with Multi-modal Instruction Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14147-14157} }
ProHOC: Probabilistic Hierarchical Out-of-Distribution Classification via Multi-Depth Networks: Erik Wallin,

Fredrik Kahl,

Lars Hammarstrand; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wallin_2025_CVPR, author = {Wallin, Erik and Kahl, Fredrik and Hammarstrand, Lars}, title = {ProHOC: Probabilistic Hierarchical Out-of-Distribution Classification via Multi-Depth Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20612-20621} }
CLIP is Strong Enough to Fight Back: Test-time Counterattacks towards Zero-shot Adversarial Robustness of CLIP: Songlong Xing,

Zhengyu Zhao,

Nicu Sebe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Songlong and Zhao, Zhengyu and Sebe, Nicu}, title = {CLIP is Strong Enough to Fight Back: Test-time Counterattacks towards Zero-shot Adversarial Robustness of CLIP}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15172-15182} }
Graph-Embedded Structure-Aware Perceptual Hashing for Neural Network Protection and Piracy Detection: Ruiheng Liu,

Haozhe Chen,

Boyao Zhao,

Kejiang Chen,

Weiming Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Ruiheng and Chen, Haozhe and Zhao, Boyao and Chen, Kejiang and Zhang, Weiming}, title = {Graph-Embedded Structure-Aware Perceptual Hashing for Neural Network Protection and Piracy Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20169-20178} }
Interleaved-Modal Chain-of-Thought: Jun Gao,

Yongqi Li,

Ziqiang Cao,

Wenjie Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Jun and Li, Yongqi and Cao, Ziqiang and Li, Wenjie}, title = {Interleaved-Modal Chain-of-Thought}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19520-19529} }
Enhancing Adversarial Transferability with Checkpoints of a Single Model's Training: Shixin Li,

Chaoxiang He,

Xiaojing Ma,

Bin Benjamin Zhu,

Shuo Wang,

Hongsheng Hu,

Dongmei Zhang,

Linchen Yu; [pdf]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Shixin and He, Chaoxiang and Ma, Xiaojing and Zhu, Bin Benjamin and Wang, Shuo and Hu, Hongsheng and Zhang, Dongmei and Yu, Linchen}, title = {Enhancing Adversarial Transferability with Checkpoints of a Single Model's Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20685-20694} }
O-TPT: Orthogonality Constraints for Calibrating Test-time Prompt Tuning in Vision-Language Models: Ashshak Sharifdeen,

Muhammad Akhtar Munir,

Sanoojan Baliah,

Salman Khan,

Muhammad Haris Khan; [pdf] [supp]
[bibtex]
@InProceedings{Sharifdeen_2025_CVPR, author = {Sharifdeen, Ashshak and Munir, Muhammad Akhtar and Baliah, Sanoojan and Khan, Salman and Khan, Muhammad Haris}, title = {O-TPT: Orthogonality Constraints for Calibrating Test-time Prompt Tuning in Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19942-19951} }
Analyzing the Synthetic-to-Real Domain Gap in 3D Hand Pose Estimation: Zhuoran Zhao,

Linlin Yang,

Pengzhan Sun,

Pan Hui,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zhuoran and Yang, Linlin and Sun, Pengzhan and Hui, Pan and Yao, Angela}, title = {Analyzing the Synthetic-to-Real Domain Gap in 3D Hand Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12255-12265} }
Feature4X: Bridging Any Monocular Video to 4D Agentic AI with Versatile Gaussian Feature Fields: Shijie Zhou,

Hui Ren,

Yijia Weng,

Shuwang Zhang,

Zhen Wang,

Dejia Xu,

Zhiwen Fan,

Suya You,

Zhangyang Wang,

Leonidas Guibas,

Achuta Kadambi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Shijie and Ren, Hui and Weng, Yijia and Zhang, Shuwang and Wang, Zhen and Xu, Dejia and Fan, Zhiwen and You, Suya and Wang, Zhangyang and Guibas, Leonidas and Kadambi, Achuta}, title = {Feature4X: Bridging Any Monocular Video to 4D Agentic AI with Versatile Gaussian Feature Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14179-14190} }
Hyperspectral Pansharpening via Diffusion Models with Iteratively Zero-Shot Guidance: Jin-Liang Xiao,

Ting-Zhu Huang,

Liang-Jian Deng,

Guang Lin,

Zihan Cao,

Chao Li,

Qibin Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Jin-Liang and Huang, Ting-Zhu and Deng, Liang-Jian and Lin, Guang and Cao, Zihan and Li, Chao and Zhao, Qibin}, title = {Hyperspectral Pansharpening via Diffusion Models with Iteratively Zero-Shot Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12669-12678} }
EASEMVC:Efficient Dual Selection Mechanism for Deep Multi-View Clustering: Baili Xiao,

Zhibin Dong,

Ke Liang,

Suyuan Liu,

Siwei Wang,

Tianrui Liu,

Xingchen Hu,

En Zhu,

Xinwang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Baili and Dong, Zhibin and Liang, Ke and Liu, Suyuan and Wang, Siwei and Liu, Tianrui and Hu, Xingchen and Zhu, En and Liu, Xinwang}, title = {EASEMVC:Efficient Dual Selection Mechanism for Deep Multi-View Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20716-20726} }
DSPNet: Dual-vision Scene Perception for Robust 3D Question Answering: Jingzhou Luo,

Yang Liu,

Weixing Chen,

Zhen Li,

Yaowei Wang,

Guanbin Li,

Liang Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Jingzhou and Liu, Yang and Chen, Weixing and Li, Zhen and Wang, Yaowei and Li, Guanbin and Lin, Liang}, title = {DSPNet: Dual-vision Scene Perception for Robust 3D Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14169-14178} }
IceDiff: High Resolution and High-Quality Arctic Sea Ice Forecasting with Generative Diffusion Prior: Jingyi Xu,

Siwei Tu,

Weidong Yang,

Ben Fei,

Shuhao Li,

Keyi Liu,

Yeqi Luo,

Lipeng Ma,

Lei Bai; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Jingyi and Tu, Siwei and Yang, Weidong and Fei, Ben and Li, Shuhao and Liu, Keyi and Luo, Yeqi and Ma, Lipeng and Bai, Lei}, title = {IceDiff: High Resolution and High-Quality Arctic Sea Ice Forecasting with Generative Diffusion Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10567-10576} }
DTOS: Dynamic Time Object Sensing with Large Multimodal Model: Jirui Tian,

Jinrong Zhang,

Shenglan Liu,

Luhao Xu,

Zhixiong Huang,

Gao Huang; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Jirui and Zhang, Jinrong and Liu, Shenglan and Xu, Luhao and Huang, Zhixiong and Huang, Gao}, title = {DTOS: Dynamic Time Object Sensing with Large Multimodal Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13810-13820} }
How to Merge Your Multimodal Models Over Time?: Sebastian Dziadzio,

Vishaal Udandarao,

Karsten Roth,

Ameya Prabhu,

Zeynep Akata,

Samuel Albanie,

Matthias Bethge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dziadzio_2025_CVPR, author = {Dziadzio, Sebastian and Udandarao, Vishaal and Roth, Karsten and Prabhu, Ameya and Akata, Zeynep and Albanie, Samuel and Bethge, Matthias}, title = {How to Merge Your Multimodal Models Over Time?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20479-20491} }
Identifying and Mitigating Position Bias of Multi-image Vision-Language Models: Xinyu Tian,

Shu Zou,

Zhaoyuan Yang,

Jing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Xinyu and Zou, Shu and Yang, Zhaoyuan and Zhang, Jing}, title = {Identifying and Mitigating Position Bias of Multi-image Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10599-10609} }
Exploring CLIP's Dense Knowledge for Weakly Supervised Semantic Segmentation: Zhiwei Yang,

Yucong Meng,

Kexue Fu,

Feilong Tang,

Shuo Wang,

Zhijian Song; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Zhiwei and Meng, Yucong and Fu, Kexue and Tang, Feilong and Wang, Shuo and Song, Zhijian}, title = {Exploring CLIP's Dense Knowledge for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20223-20232} }
ShowUI: One Vision-Language-Action Model for GUI Visual Agent: Kevin Qinghong Lin,

Linjie Li,

Difei Gao,

Zhengyuan Yang,

Shiwei Wu,

Zechen Bai,

Stan Weixian Lei,

Lijuan Wang,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Kevin Qinghong and Li, Linjie and Gao, Difei and Yang, Zhengyuan and Wu, Shiwei and Bai, Zechen and Lei, Stan Weixian and Wang, Lijuan and Shou, Mike Zheng}, title = {ShowUI: One Vision-Language-Action Model for GUI Visual Agent}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19498-19508} }
Infinity: Scaling Bitwise AutoRegressive Modeling for High-Resolution Image Synthesis: Jian Han,

Jinlai Liu,

Yi Jiang,

Bin Yan,

Yuqi Zhang,

Zehuan Yuan,

Bingyue Peng,

Xiaobing Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Jian and Liu, Jinlai and Jiang, Yi and Yan, Bin and Zhang, Yuqi and Yuan, Zehuan and Peng, Bingyue and Liu, Xiaobing}, title = {Infinity: Scaling Bitwise AutoRegressive Modeling for High-Resolution Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15733-15744} }
HumanDreamer: Generating Controllable Human-Motion Videos via Decoupled Generation: Boyuan Wang,

Xiaofeng Wang,

Chaojun Ni,

Guosheng Zhao,

Zhiqin Yang,

Zheng Zhu,

Muyang Zhang,

Yukun Zhou,

Xinze Chen,

Guan Huang,

Lihong Liu,

Xingang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Boyuan and Wang, Xiaofeng and Ni, Chaojun and Zhao, Guosheng and Yang, Zhiqin and Zhu, Zheng and Zhang, Muyang and Zhou, Yukun and Chen, Xinze and Huang, Guan and Liu, Lihong and Wang, Xingang}, title = {HumanDreamer: Generating Controllable Human-Motion Videos via Decoupled Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12391-12401} }
ReVisionLLM: Recursive Vision-Language Model for Temporal Grounding in Hour-Long Videos: Tanveer Hannan,

Md Mohaiminul Islam,

Jindong Gu,

Thomas Seidl,

Gedas Bertasius; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hannan_2025_CVPR, author = {Hannan, Tanveer and Islam, Md Mohaiminul and Gu, Jindong and Seidl, Thomas and Bertasius, Gedas}, title = {ReVisionLLM: Recursive Vision-Language Model for Temporal Grounding in Hour-Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19012-19022} }
ArtiFade: Learning to Generate High-quality Subject from Blemished Images: Shuya Yang,

Shaozhe Hao,

Yukang Cao,

Kwan-Yee K. Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Shuya and Hao, Shaozhe and Cao, Yukang and Wong, Kwan-Yee K.}, title = {ArtiFade: Learning to Generate High-quality Subject from Blemished Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13167-13177} }
Prompting Depth Anything for 4K Resolution Accurate Metric Depth Estimation: Haotong Lin,

Sida Peng,

Jingxiao Chen,

Songyou Peng,

Jiaming Sun,

Minghuan Liu,

Hujun Bao,

Jiashi Feng,

Xiaowei Zhou,

Bingyi Kang; [pdf] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Haotong and Peng, Sida and Chen, Jingxiao and Peng, Songyou and Sun, Jiaming and Liu, Minghuan and Bao, Hujun and Feng, Jiashi and Zhou, Xiaowei and Kang, Bingyi}, title = {Prompting Depth Anything for 4K Resolution Accurate Metric Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17070-17080} }
GET: Unlocking the Multi-modal Potential of CLIP for Generalized Category Discovery: Enguang Wang,

Zhimao Peng,

Zhengyuan Xie,

Fei Yang,

Xialei Liu,

Ming-Ming Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Enguang and Peng, Zhimao and Xie, Zhengyuan and Yang, Fei and Liu, Xialei and Cheng, Ming-Ming}, title = {GET: Unlocking the Multi-modal Potential of CLIP for Generalized Category Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20296-20306} }
Test-Time Domain Generalization via Universe Learning: A Multi-Graph Matching Approach for Medical Image Segmentation: Xingguo Lv,

Xingbo Dong,

Liwen Wang,

Jiewen Yang,

Lei Zhao,

Bin Pu,

Zhe Jin,

Xuejun Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2025_CVPR, author = {Lv, Xingguo and Dong, Xingbo and Wang, Liwen and Yang, Jiewen and Zhao, Lei and Pu, Bin and Jin, Zhe and Li, Xuejun}, title = {Test-Time Domain Generalization via Universe Learning: A Multi-Graph Matching Approach for Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15621-15631} }
DeDe: Detecting Backdoor Samples for SSL Encoders via Decoders: Sizai Hou,

Songze Li,

Duanyi Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2025_CVPR, author = {Hou, Sizai and Li, Songze and Yao, Duanyi}, title = {DeDe: Detecting Backdoor Samples for SSL Encoders via Decoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20675-20684} }
Towards Scalable Human-aligned Benchmark for Text-guided Image Editing: Suho Ryu,

Kihyun Kim,

Eugene Baek,

Dongsoo Shin,

Joonseok Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ryu_2025_CVPR, author = {Ryu, Suho and Kim, Kihyun and Baek, Eugene and Shin, Dongsoo and Lee, Joonseok}, title = {Towards Scalable Human-aligned Benchmark for Text-guided Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18292-18301} }
Coeff-Tuning: A Graph Filter Subspace View for Tuning Attention-Based Large Models: Zichen Miao,

Wei Chen,

Qiang Qiu; [pdf] [supp]
[bibtex]
@InProceedings{Miao_2025_CVPR, author = {Miao, Zichen and Chen, Wei and Qiu, Qiang}, title = {Coeff-Tuning: A Graph Filter Subspace View for Tuning Attention-Based Large Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20146-20157} }
Self-Supervised Cross-View Correspondence with Predictive Cycle Consistency: Alan Baade,

Changan Chen; [pdf] [supp]
[bibtex]
@InProceedings{Baade_2025_CVPR, author = {Baade, Alan and Chen, Changan}, title = {Self-Supervised Cross-View Correspondence with Predictive Cycle Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16753-16763} }
CryptoFace: End-to-End Encrypted Face Recognition: Wei Ao,

Vishnu Naresh Boddeti; [pdf]
[bibtex]
@InProceedings{Ao_2025_CVPR, author = {Ao, Wei and Boddeti, Vishnu Naresh}, title = {CryptoFace: End-to-End Encrypted Face Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19197-19206} }
Relation-Rich Visual Document Generator for Visual Information Extraction: Zi-Han Jiang,

Chien-Wei Lin,

Wei-Hua Li,

Hsuan-Tung Liu,

Yi-Ren Yeh,

Chu-Song Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Zi-Han and Lin, Chien-Wei and Li, Wei-Hua and Liu, Hsuan-Tung and Yeh, Yi-Ren and Chen, Chu-Song}, title = {Relation-Rich Visual Document Generator for Visual Information Extraction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14449-14459} }
PromptHash:Affinity-Prompted Collaborative Cross-Modal Learning for Adaptive Hashing Retrieval: Qiang Zou,

Shuli Cheng,

Jiayi Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Zou_2025_CVPR, author = {Zou, Qiang and Cheng, Shuli and Chen, Jiayi}, title = {PromptHash:Affinity-Prompted Collaborative Cross-Modal Learning for Adaptive Hashing Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19649-19658} }
Universal Scene Graph Generation: Shengqiong Wu,

Hao Fei,

Tat-seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Shengqiong and Fei, Hao and Chua, Tat-seng}, title = {Universal Scene Graph Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14158-14168} }
Split Adaptation for Pre-trained Vision Transformers: Lixu Wang,

Bingqi Shang,

Yi Li,

Payal Mohapatra,

Wei Dong,

Xiao Wang,

Qi Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Lixu and Shang, Bingqi and Li, Yi and Mohapatra, Payal and Dong, Wei and Wang, Xiao and Zhu, Qi}, title = {Split Adaptation for Pre-trained Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20092-20102} }
SpatialLLM: A Compound 3D-Informed Design towards Spatially-Intelligent Large Multimodal Models: Wufei Ma,

Luoxin Ye,

Celso M de Melo,

Alan Yuille,

Jieneng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Wufei and Ye, Luoxin and de Melo, Celso M and Yuille, Alan and Chen, Jieneng}, title = {SpatialLLM: A Compound 3D-Informed Design towards Spatially-Intelligent Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17249-17260} }
Learning Occlusion-Robust Vision Transformers for Real-Time UAV Tracking: You Wu,

Xucheng Wang,

Xiangyang Yang,

Mengyuan Liu,

Dan Zeng,

Hengzhou Ye,

Shuiwang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, You and Wang, Xucheng and Yang, Xiangyang and Liu, Mengyuan and Zeng, Dan and Ye, Hengzhou and Li, Shuiwang}, title = {Learning Occlusion-Robust Vision Transformers for Real-Time UAV Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17103-17113} }
Plug-and-Play Versatile Compressed Video Enhancement: Huimin Zeng,

Jiacheng Li,

Zhiwei Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_CVPR, author = {Zeng, Huimin and Li, Jiacheng and Xiong, Zhiwei}, title = {Plug-and-Play Versatile Compressed Video Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17767-17777} }
UltraFusion: Ultra High Dynamic Imaging using Exposure Fusion: Zixuan Chen,

Yujin Wang,

Xin Cai,

Zhiyuan You,

Zheming Lu,

Fan Zhang,

Shi Guo,

Tianfan Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zixuan and Wang, Yujin and Cai, Xin and You, Zhiyuan and Lu, Zheming and Zhang, Fan and Guo, Shi and Xue, Tianfan}, title = {UltraFusion: Ultra High Dynamic Imaging using Exposure Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16111-16121} }
Noise-Resistant Video Anomaly Detection via RGB Error-Guided Multiscale Predictive Coding and Dynamic Memory: Han Hu,

Wenli Du,

Peng Liao,

Bing Wang,

Siyuan Fan; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Han and Du, Wenli and Liao, Peng and Wang, Bing and Fan, Siyuan}, title = {Noise-Resistant Video Anomaly Detection via RGB Error-Guided Multiscale Predictive Coding and Dynamic Memory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19109-19119} }
GroupMamba: Efficient Group-Based Visual State Space Model: Abdelrahman Shaker,

Syed Talal Wasim,

Salman Khan,

Juergen Gall,

Fahad Shahbaz Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shaker_2025_CVPR, author = {Shaker, Abdelrahman and Wasim, Syed Talal and Khan, Salman and Gall, Juergen and Khan, Fahad Shahbaz}, title = {GroupMamba: Efficient Group-Based Visual State Space Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14912-14922} }
Escaping Plato's Cave: Towards the Alignment of 3D and Text Latent Spaces: Souhail Hadgi,

Luca Moschella,

Andrea Santilli,

Diego Gomez,

Qixing Huang,

Emanuele Rodolà,

Simone Melzi,

Maks Ovsjanikov; [pdf] [supp]
[bibtex]
@InProceedings{Hadgi_2025_CVPR, author = {Hadgi, Souhail and Moschella, Luca and Santilli, Andrea and Gomez, Diego and Huang, Qixing and Rodol\`a, Emanuele and Melzi, Simone and Ovsjanikov, Maks}, title = {Escaping Plato's Cave: Towards the Alignment of 3D and Text Latent Spaces}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19825-19835} }
ActiveGAMER: Active GAussian Mapping through Efficient Rendering: Liyan Chen,

Huangying Zhan,

Kevin Chen,

Xiangyu Xu,

Qingan Yan,

Changjiang Cai,

Yi Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Liyan and Zhan, Huangying and Chen, Kevin and Xu, Xiangyu and Yan, Qingan and Cai, Changjiang and Xu, Yi}, title = {ActiveGAMER: Active GAussian Mapping through Efficient Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16486-16497} }
Positive2Negative: Breaking the Information-Lossy Barrier in Self-Supervised Single Image Denoising: Tong Li,

Lizhi Wang,

Zhiyuan Xu,

Lin Zhu,

Wanxuan Lu,

Hua Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Tong and Wang, Lizhi and Xu, Zhiyuan and Zhu, Lin and Lu, Wanxuan and Huang, Hua}, title = {Positive2Negative: Breaking the Information-Lossy Barrier in Self-Supervised Single Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17924-17934} }
SeaLion: Semantic Part-Aware Latent Point Diffusion Models for 3D Generation: Dekai Zhu,

Yan Di,

Stefan Gavranovic,

Slobodan Ilic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Dekai and Di, Yan and Gavranovic, Stefan and Ilic, Slobodan}, title = {SeaLion: Semantic Part-Aware Latent Point Diffusion Models for 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11789-11798} }
Toward Real-world BEV Perception: Depth Uncertainty Estimation via Gaussian Splatting: Shu-Wei Lu,

Yi-Hsuan Tsai,

Yi-Ting Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Shu-Wei and Tsai, Yi-Hsuan and Chen, Yi-Ting}, title = {Toward Real-world BEV Perception: Depth Uncertainty Estimation via Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17124-17133} }
Efficient Fine-Tuning and Concept Suppression for Pruned Diffusion Models: Reza Shirkavand,

Peiran Yu,

Shangqian Gao,

Gowthami Somepalli,

Tom Goldstein,

Heng Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shirkavand_2025_CVPR, author = {Shirkavand, Reza and Yu, Peiran and Gao, Shangqian and Somepalli, Gowthami and Goldstein, Tom and Huang, Heng}, title = {Efficient Fine-Tuning and Concept Suppression for Pruned Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18619-18629} }
WildGS-SLAM: Monocular Gaussian Splatting SLAM in Dynamic Environments: Jianhao Zheng,

Zihan Zhu,

Valentin Bieri,

Marc Pollefeys,

Songyou Peng,

Iro Armeni; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Jianhao and Zhu, Zihan and Bieri, Valentin and Pollefeys, Marc and Peng, Songyou and Armeni, Iro}, title = {WildGS-SLAM: Monocular Gaussian Splatting SLAM in Dynamic Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11461-11471} }
RePerformer: Immersive Human-centric Volumetric Videos from Playback to Photoreal Reperformance: Yuheng Jiang,

Zhehao Shen,

Chengcheng Guo,

Yu Hong,

Zhuo Su,

Yingliang Zhang,

Marc Habermann,

Lan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yuheng and Shen, Zhehao and Guo, Chengcheng and Hong, Yu and Su, Zhuo and Zhang, Yingliang and Habermann, Marc and Xu, Lan}, title = {RePerformer: Immersive Human-centric Volumetric Videos from Playback to Photoreal Reperformance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11349-11360} }
CheXWorld: Exploring Image World Modeling for Radiograph Representation Learning: Yang Yue,

Yulin Wang,

Chenxin Tao,

Pan Liu,

Shiji Song,

Gao Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yue_2025_CVPR, author = {Yue, Yang and Wang, Yulin and Tao, Chenxin and Liu, Pan and Song, Shiji and Huang, Gao}, title = {CheXWorld: Exploring Image World Modeling for Radiograph Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20778-20788} }
Towards Long-Horizon Vision-Language Navigation: Platform, Benchmark and Method: Xinshuai Song,

Weixing Chen,

Yang Liu,

Weikai Chen,

Guanbin Li,

Liang Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Xinshuai and Chen, Weixing and Liu, Yang and Chen, Weikai and Li, Guanbin and Lin, Liang}, title = {Towards Long-Horizon Vision-Language Navigation: Platform, Benchmark and Method}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12078-12088} }
AIGV-Assessor: Benchmarking and Evaluating the Perceptual Quality of Text-to-Video Generation with LMM: Jiarui Wang,

Huiyu Duan,

Guangtao Zhai,

Juntong Wang,

Xiongkuo Min; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jiarui and Duan, Huiyu and Zhai, Guangtao and Wang, Juntong and Min, Xiongkuo}, title = {AIGV-Assessor: Benchmarking and Evaluating the Perceptual Quality of Text-to-Video Generation with LMM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18869-18880} }
Autoregressive Distillation of Diffusion Transformers: Yeongmin Kim,

Sotiris Anagnostidis,

Yuming Du,

Edgar Schönfeld,

Jonas Kohler,

Markos Georgopoulos,

Albert Pumarola,

Ali Thabet,

Artsiom Sanakoyeu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Yeongmin and Anagnostidis, Sotiris and Du, Yuming and Sch\"onfeld, Edgar and Kohler, Jonas and Georgopoulos, Markos and Pumarola, Albert and Thabet, Ali and Sanakoyeu, Artsiom}, title = {Autoregressive Distillation of Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15745-15756} }
OmniManip: Towards General Robotic Manipulation via Object-Centric Interaction Primitives as Spatial Constraints: Mingjie Pan,

Jiyao Zhang,

Tianshu Wu,

Yinghao Zhao,

Wenlong Gao,

Hao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_CVPR, author = {Pan, Mingjie and Zhang, Jiyao and Wu, Tianshu and Zhao, Yinghao and Gao, Wenlong and Dong, Hao}, title = {OmniManip: Towards General Robotic Manipulation via Object-Centric Interaction Primitives as Spatial Constraints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17359-17369} }
DiscoVLA: Discrepancy Reduction in Vision, Language, and Alignment for Parameter-Efficient Video-Text Retrieval: Leqi Shen,

Guoqiang Gong,

Tianxiang Hao,

Tao He,

Yifeng Zhang,

Pengzhang Liu,

Sicheng Zhao,

Jungong Han,

Guiguang Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Leqi and Gong, Guoqiang and Hao, Tianxiang and He, Tao and Zhang, Yifeng and Liu, Pengzhang and Zhao, Sicheng and Han, Jungong and Ding, Guiguang}, title = {DiscoVLA: Discrepancy Reduction in Vision, Language, and Alignment for Parameter-Efficient Video-Text Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19702-19712} }
Visual-Instructed Degradation Diffusion for All-in-One Image Restoration: Wenyang Luo,

Haina Qin,

Zewen Chen,

Libin Wang,

Dandan Zheng,

Yuming Li,

Yufan Liu,

Bing Li,

Weiming Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Wenyang and Qin, Haina and Chen, Zewen and Wang, Libin and Zheng, Dandan and Li, Yuming and Liu, Yufan and Li, Bing and Hu, Weiming}, title = {Visual-Instructed Degradation Diffusion for All-in-One Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12764-12777} }
Insightful Instance Features for 3D Instance Segmentation: Wonseok Roh,

Hwanhee Jung,

Giljoo Nam,

Dong In Lee,

Hyeongcheol Park,

Sang Ho Yoon,

Jungseock Joo,

Sangpil Kim; [pdf] [supp]
[bibtex]
@InProceedings{Roh_2025_CVPR, author = {Roh, Wonseok and Jung, Hwanhee and Nam, Giljoo and Lee, Dong In and Park, Hyeongcheol and Yoon, Sang Ho and Joo, Jungseock and Kim, Sangpil}, title = {Insightful Instance Features for 3D Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14057-14067} }
EmoDubber: Towards High Quality and Emotion Controllable Movie Dubbing: Gaoxiang Cong,

Jiadong Pan,

Liang Li,

Yuankai Qi,

Yuxin Peng,

Anton van den Hengel,

Jian Yang,

Qingming Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Cong_2025_CVPR, author = {Cong, Gaoxiang and Pan, Jiadong and Li, Liang and Qi, Yuankai and Peng, Yuxin and van den Hengel, Anton and Yang, Jian and Huang, Qingming}, title = {EmoDubber: Towards High Quality and Emotion Controllable Movie Dubbing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15863-15873} }
A Hubness Perspective on Representation Learning for Graph-Based Multi-View Clustering: Zheming Xu,

He Liu,

Congyan Lang,

Tao Wang,

Yidong Li,

Michael C. Kampffmeyer; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Zheming and Liu, He and Lang, Congyan and Wang, Tao and Li, Yidong and Kampffmeyer, Michael C.}, title = {A Hubness Perspective on Representation Learning for Graph-Based Multi-View Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15528-15537} }
Spatial-Temporal Graph Diffusion Policy with Kinematic Modeling for Bimanual Robotic Manipulation: Qi Lv,

Hao Li,

Xiang Deng,

Rui Shao,

Yinchuan Li,

Jianye Hao,

Longxiang Gao,

Michael Yu Wang,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2025_CVPR, author = {Lv, Qi and Li, Hao and Deng, Xiang and Shao, Rui and Li, Yinchuan and Hao, Jianye and Gao, Longxiang and Wang, Michael Yu and Nie, Liqiang}, title = {Spatial-Temporal Graph Diffusion Policy with Kinematic Modeling for Bimanual Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17394-17404} }
ZeroVO: Visual Odometry with Minimal Assumptions: Lei Lai,

Zekai Yin,

Eshed Ohn-Bar; [pdf] [arXiv]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Lei and Yin, Zekai and Ohn-Bar, Eshed}, title = {ZeroVO: Visual Odometry with Minimal Assumptions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17092-17102} }
VideoRefer Suite: Advancing Spatial-Temporal Object Understanding with Video LLM: Yuqian Yuan,

Hang Zhang,

Wentong Li,

Zesen Cheng,

Boqiang Zhang,

Long Li,

Xin Li,

Deli Zhao,

Wenqiao Zhang,

Yueting Zhuang,

Jianke Zhu,

Lidong Bing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Yuqian and Zhang, Hang and Li, Wentong and Cheng, Zesen and Zhang, Boqiang and Li, Long and Li, Xin and Zhao, Deli and Zhang, Wenqiao and Zhuang, Yueting and Zhu, Jianke and Bing, Lidong}, title = {VideoRefer Suite: Advancing Spatial-Temporal Object Understanding with Video LLM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18970-18980} }
HoVLE: Unleashing the Power of Monolithic Vision-Language Models with Holistic Vision-Language Embedding: Chenxin Tao,

Shiqian Su,

Xizhou Zhu,

Chenyu Zhang,

Zhe Chen,

Jiawen Liu,

Wenhai Wang,

Lewei Lu,

Gao Huang,

Yu Qiao,

Jifeng Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2025_CVPR, author = {Tao, Chenxin and Su, Shiqian and Zhu, Xizhou and Zhang, Chenyu and Chen, Zhe and Liu, Jiawen and Wang, Wenhai and Lu, Lewei and Huang, Gao and Qiao, Yu and Dai, Jifeng}, title = {HoVLE: Unleashing the Power of Monolithic Vision-Language Models with Holistic Vision-Language Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14559-14569} }
Gen3DEval: Using vLLMs for Automatic Evaluation of Generated 3D Objects: Shalini Maiti,

Lourdes Agapito,

Filippos Kokkinos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maiti_2025_CVPR, author = {Maiti, Shalini and Agapito, Lourdes and Kokkinos, Filippos}, title = {Gen3DEval: Using vLLMs for Automatic Evaluation of Generated 3D Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18552-18562} }
SkySense-O: Towards Open-World Remote Sensing Interpretation with Vision-Centric Visual-Language Modeling: Qi Zhu,

Jiangwei Lao,

Deyi Ji,

Junwei Luo,

Kang Wu,

Yingying Zhang,

Lixiang Ru,

Jian Wang,

Jingdong Chen,

Ming Yang,

Dong Liu,

Feng Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Qi and Lao, Jiangwei and Ji, Deyi and Luo, Junwei and Wu, Kang and Zhang, Yingying and Ru, Lixiang and Wang, Jian and Chen, Jingdong and Yang, Ming and Liu, Dong and Zhao, Feng}, title = {SkySense-O: Towards Open-World Remote Sensing Interpretation with Vision-Centric Visual-Language Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14733-14744} }
AdaDARE-gamma: Balancing Stability and Plasticity in Multi-modal LLMs through Efficient Adaptation: Jingyi Xie,

Jintao Yang,

Zhunchen Luo,

Yunbo Cao,

Qiang Gao,

Mengyuan Zhang,

Wenpeng Hu; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Jingyi and Yang, Jintao and Luo, Zhunchen and Cao, Yunbo and Gao, Qiang and Zhang, Mengyuan and Hu, Wenpeng}, title = {AdaDARE-gamma: Balancing Stability and Plasticity in Multi-modal LLMs through Efficient Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19758-19768} }
LeviTor: 3D Trajectory Oriented Image-to-Video Synthesis: Hanlin Wang,

Hao Ouyang,

Qiuyu Wang,

Wen Wang,

Ka Leong Cheng,

Qifeng Chen,

Yujun Shen,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Hanlin and Ouyang, Hao and Wang, Qiuyu and Wang, Wen and Cheng, Ka Leong and Chen, Qifeng and Shen, Yujun and Wang, Limin}, title = {LeviTor: 3D Trajectory Oriented Image-to-Video Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12490-12500} }
SapiensID: Foundation for Human Recognition: Minchul Kim,

Dingqiang Ye,

Yiyang Su,

Feng Liu,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Minchul and Ye, Dingqiang and Su, Yiyang and Liu, Feng and Liu, Xiaoming}, title = {SapiensID: Foundation for Human Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13937-13947} }
Extrapolating and Decoupling Image-to-Video Generation Models: Motion Modeling is Easier Than You Think: Jie Tian,

Xiaoye Qu,

Zhenyi Lu,

Wei Wei,

Sichen Liu,

Yu Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Jie and Qu, Xiaoye and Lu, Zhenyi and Wei, Wei and Liu, Sichen and Cheng, Yu}, title = {Extrapolating and Decoupling Image-to-Video Generation Models: Motion Modeling is Easier Than You Think}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12512-12521} }
FreeCloth: Free-form Generation Enhances Challenging Clothed Human Modeling: Hang Ye,

Xiaoxuan Ma,

Hai Ci,

Wentao Zhu,

Yizhou Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Hang and Ma, Xiaoxuan and Ci, Hai and Zhu, Wentao and Wang, Yizhou}, title = {FreeCloth: Free-form Generation Enhances Challenging Clothed Human Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15987-15997} }
InstanceGaussian: Appearance-Semantic Joint Gaussian Representation for 3D Instance-Level Perception: Haijie Li,

Yanmin Wu,

Jiarui Meng,

Qiankun Gao,

Zhiyao Zhang,

Ronggang Wang,

Jian Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Haijie and Wu, Yanmin and Meng, Jiarui and Gao, Qiankun and Zhang, Zhiyao and Wang, Ronggang and Zhang, Jian}, title = {InstanceGaussian: Appearance-Semantic Joint Gaussian Representation for 3D Instance-Level Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14078-14088} }
CSC-PA: Cross-image Semantic Correlation via Prototype Attentions for Single-network Semi-supervised Breast Tumor Segmentation: Zhenhui Ding,

Guilian Chen,

Qin Zhang,

Huisi Wu,

Jing Qin; [pdf] [supp]
[bibtex]
@InProceedings{Ding_2025_CVPR, author = {Ding, Zhenhui and Chen, Guilian and Zhang, Qin and Wu, Huisi and Qin, Jing}, title = {CSC-PA: Cross-image Semantic Correlation via Prototype Attentions for Single-network Semi-supervised Breast Tumor Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15632-15641} }
VisionPAD: A Vision-Centric Pre-training Paradigm for Autonomous Driving: Haiming Zhang,

Wending Zhou,

Yiyao Zhu,

Xu Yan,

Jiantao Gao,

Dongfeng Bai,

Yingjie Cai,

Bingbing Liu,

Shuguang Cui,

Zhen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Haiming and Zhou, Wending and Zhu, Yiyao and Yan, Xu and Gao, Jiantao and Bai, Dongfeng and Cai, Yingjie and Liu, Bingbing and Cui, Shuguang and Li, Zhen}, title = {VisionPAD: A Vision-Centric Pre-training Paradigm for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17165-17175} }
Detecting Adversarial Data Using Perturbation Forgery: Qian Wang,

Chen Li,

Yuchen Luo,

Hefei Ling,

Shijuan Huang,

Ruoxi Jia,

Ning Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Qian and Li, Chen and Luo, Yuchen and Ling, Hefei and Huang, Shijuan and Jia, Ruoxi and Yu, Ning}, title = {Detecting Adversarial Data Using Perturbation Forgery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13917-13926} }
CoA: Towards Real Image Dehazing via Compression-and-Adaptation: Long Ma,

Yuxin Feng,

Yan Zhang,

Jinyuan Liu,

Weimin Wang,

Guang-Yong Chen,

Chengpei Xu,

Zhuo Su; [pdf] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Long and Feng, Yuxin and Zhang, Yan and Liu, Jinyuan and Wang, Weimin and Chen, Guang-Yong and Xu, Chengpei and Su, Zhuo}, title = {CoA: Towards Real Image Dehazing via Compression-and-Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11197-11206} }
TopV: Compatible Token Pruning with Inference Time Optimization for Fast and Low-Memory Multimodal Vision Language Model: Cheng Yang,

Yang Sui,

Jinqi Xiao,

Lingyi Huang,

Yu Gong,

Chendi Li,

Jinghua Yan,

Yu Bai,

Ponnuswamy Sadayappan,

Xia Hu,

Bo Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Cheng and Sui, Yang and Xiao, Jinqi and Huang, Lingyi and Gong, Yu and Li, Chendi and Yan, Jinghua and Bai, Yu and Sadayappan, Ponnuswamy and Hu, Xia and Yuan, Bo}, title = {TopV: Compatible Token Pruning with Inference Time Optimization for Fast and Low-Memory Multimodal Vision Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19803-19813} }
Learned Binocular-Encoding Optics for RGBD Imaging Using Joint Stereo and Focus Cues: Yuhui Liu,

Liangxun Ou,

Qiang Fu,

Hadi Amata,

Wolfgang Heidrich,

Yifan Peng; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuhui and Ou, Liangxun and Fu, Qiang and Amata, Hadi and Heidrich, Wolfgang and Peng, Yifan}, title = {Learned Binocular-Encoding Optics for RGBD Imaging Using Joint Stereo and Focus Cues}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15833-15842} }
MobilePortrait: Real-Time One-Shot Neural Head Avatars on Mobile Devices: Jianwen Jiang,

Gaojie Lin,

Zhengkun Rong,

Chao Liang,

Yongming Zhu,

Jiaqi Yang,

Tianyun Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Jianwen and Lin, Gaojie and Rong, Zhengkun and Liang, Chao and Zhu, Yongming and Yang, Jiaqi and Zhong, Tianyun}, title = {MobilePortrait: Real-Time One-Shot Neural Head Avatars on Mobile Devices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15920-15929} }
Light3R-SfM: Towards Feed-forward Structure-from-Motion: Sven Elflein,

Qunjie Zhou,

Laura Leal-Taixé; [pdf] [supp]
[bibtex]
@InProceedings{Elflein_2025_CVPR, author = {Elflein, Sven and Zhou, Qunjie and Leal-Taix\'e, Laura}, title = {Light3R-SfM: Towards Feed-forward Structure-from-Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16774-16784} }
Robotic Visual Instruction: Yanbang Li,

Ziyang Gong,

Haoyang Li,

Xiaoqi Huang,

Haolan Kang,

Guangping Bai,

Xianzheng Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yanbang and Gong, Ziyang and Li, Haoyang and Huang, Xiaoqi and Kang, Haolan and Bai, Guangping and Ma, Xianzheng}, title = {Robotic Visual Instruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12155-12165} }
MASt3R-SLAM: Real-Time Dense SLAM with 3D Reconstruction Priors: Riku Murai,

Eric Dexheimer,

Andrew J. Davison; [pdf] [supp]
[bibtex]
@InProceedings{Murai_2025_CVPR, author = {Murai, Riku and Dexheimer, Eric and Davison, Andrew J.}, title = {MASt3R-SLAM: Real-Time Dense SLAM with 3D Reconstruction Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16695-16705} }
Viewpoint Rosetta Stone: Unlocking Unpaired Ego-Exo Videos for View-invariant Representation Learning: Mi Luo,

Zihui Xue,

Alex Dimakis,

Kristen Grauman; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Mi and Xue, Zihui and Dimakis, Alex and Grauman, Kristen}, title = {Viewpoint Rosetta Stone: Unlocking Unpaired Ego-Exo Videos for View-invariant Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15802-15812} }
Cross-modal Information Flow in Multimodal Large Language Models: Zhi Zhang,

Srishti Yadav,

Fengze Han,

Ekaterina Shutova; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhi and Yadav, Srishti and Han, Fengze and Shutova, Ekaterina}, title = {Cross-modal Information Flow in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19781-19791} }
Keyframe-Guided Creative Video Inpainting: Yuwei Guo,

Ceyuan Yang,

Anyi Rao,

Chenlin Meng,

Omer Bar-Tal,

Shuangrui Ding,

Maneesh Agrawala,

Dahua Lin,

Bo Dai; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Yuwei and Yang, Ceyuan and Rao, Anyi and Meng, Chenlin and Bar-Tal, Omer and Ding, Shuangrui and Agrawala, Maneesh and Lin, Dahua and Dai, Bo}, title = {Keyframe-Guided Creative Video Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13009-13020} }
EdgeTAM: On-Device Track Anything Model: Chong Zhou,

Chenchen Zhu,

Yunyang Xiong,

Saksham Suri,

Fanyi Xiao,

Lemeng Wu,

Raghuraman Krishnamoorthi,

Bo Dai,

Chen Change Loy,

Vikas Chandra,

Bilge Soran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Chong and Zhu, Chenchen and Xiong, Yunyang and Suri, Saksham and Xiao, Fanyi and Wu, Lemeng and Krishnamoorthi, Raghuraman and Dai, Bo and Loy, Chen Change and Chandra, Vikas and Soran, Bilge}, title = {EdgeTAM: On-Device Track Anything Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13832-13842} }
EarthDial: Turning Multi-sensory Earth Observations to Interactive Dialogues: Sagar Soni,

Akshay Dudhane,

Hiyam Debary,

Mustansar Fiaz,

Muhammad Akhtar Munir,

Muhammad Sohail Danish,

Paolo Fraccaro,

Campbell D Watson,

Levente J Klein,

Fahad Shahbaz Khan,

Salman Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Soni_2025_CVPR, author = {Soni, Sagar and Dudhane, Akshay and Debary, Hiyam and Fiaz, Mustansar and Munir, Muhammad Akhtar and Danish, Muhammad Sohail and Fraccaro, Paolo and Watson, Campbell D and Klein, Levente J and Khan, Fahad Shahbaz and Khan, Salman}, title = {EarthDial: Turning Multi-sensory Earth Observations to Interactive Dialogues}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14303-14313} }
Video Summarization with Large Language Models: Min Jung Lee,

Dayoung Gong,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Min Jung and Gong, Dayoung and Cho, Minsu}, title = {Video Summarization with Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18981-18991} }
Sketchtopia: A Dataset and Foundational Agents for Benchmarking Asynchronous Multimodal Communication with Iconic Feedback: Mohd Hozaifa Khan,

Ravi Kiran Sarvadevabhatla; [pdf] [supp]
[bibtex]
@InProceedings{Khan_2025_CVPR, author = {Khan, Mohd Hozaifa and Sarvadevabhatla, Ravi Kiran}, title = {Sketchtopia: A Dataset and Foundational Agents for Benchmarking Asynchronous Multimodal Communication with Iconic Feedback}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18176-18186} }
Consistency-aware Self-Training for Iterative-based Stereo Matching: Jingyi Zhou,

Peng Ye,

Haoyu Zhang,

Jiakang Yuan,

Rao Qiang,

Liu YangChenXu,

Wu Cailin,

Feng Xu,

Tao Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jingyi and Ye, Peng and Zhang, Haoyu and Yuan, Jiakang and Qiang, Rao and YangChenXu, Liu and Cailin, Wu and Xu, Feng and Chen, Tao}, title = {Consistency-aware Self-Training for Iterative-based Stereo Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16641-16650} }
MV-MATH: Evaluating Multimodal Math Reasoning in Multi-Visual Contexts: Peijie Wang,

Zhong-Zhi Li,

Fei Yin,

Dekang Ran,

Cheng-Lin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Peijie and Li, Zhong-Zhi and Yin, Fei and Ran, Dekang and Liu, Cheng-Lin}, title = {MV-MATH: Evaluating Multimodal Math Reasoning in Multi-Visual Contexts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19541-19551} }
Generalized Few-shot 3D Point Cloud Segmentation with Vision-Language Model: Zhaochong An,

Guolei Sun,

Yun Liu,

Runjia Li,

Junlin Han,

Ender Konukoglu,

Serge Belongie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2025_CVPR, author = {An, Zhaochong and Sun, Guolei and Liu, Yun and Li, Runjia and Han, Junlin and Konukoglu, Ender and Belongie, Serge}, title = {Generalized Few-shot 3D Point Cloud Segmentation with Vision-Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16997-17007} }; Back