ICCV 2025 Open Access Repository

Papers

Back
kh: Symmetry Understanding of 3D Shapes via Chirality Disentanglement: Weikang Wang,

Tobias Weißberg,

Nafie El Amrani,

Florian Bernard; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Weikang and Wei{\ss}berg, Tobias and El Amrani, Nafie and Bernard, Florian}, title = {kh: Symmetry Understanding of 3D Shapes via Chirality Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28292-28302} }
Efficient Adaptation of Pre-trained Vision Transformer underpinned by Approximately Orthogonal Fine-Tuning Strategy: Yiting Yang,

Hao Luo,

Yuan Sun,

Qingsen Yan,

Haokui Zhang,

Wei Dong,

Guoqing Wang,

Peng Wang,

Yang Yang,

Hengtao Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Yiting and Luo, Hao and Sun, Yuan and Yan, Qingsen and Zhang, Haokui and Dong, Wei and Wang, Guoqing and Wang, Peng and Yang, Yang and Shen, Hengtao}, title = {Efficient Adaptation of Pre-trained Vision Transformer underpinned by Approximately Orthogonal Fine-Tuning Strategy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4878-4887} }
MM-IFEngine: Towards Multimodal Instruction Following: Shengyuan Ding,

Shenxi Wu,

Xiangyu Zhao,

Yuhang Zang,

Haodong Duan,

Xiaoyi Dong,

Pan Zhang,

Yuhang Cao,

Dahua Lin,

Jiaqi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ding_2025_ICCV, author = {Ding, Shengyuan and Wu, Shenxi and Zhao, Xiangyu and Zang, Yuhang and Duan, Haodong and Dong, Xiaoyi and Zhang, Pan and Cao, Yuhang and Lin, Dahua and Wang, Jiaqi}, title = {MM-IFEngine: Towards Multimodal Instruction Following}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1099-1109} }
Who is a Better Talker: Subjective and Objective Quality Assessment for AI-Generated Talking Heads: Yingjie Zhou,

Jiezhang Cao,

Zicheng Zhang,

Farong Wen,

Yanwei Jiang,

Jun Jia,

Xiaohong Liu,

Xiongkuo Min,

Guangtao Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Yingjie and Cao, Jiezhang and Zhang, Zicheng and Wen, Farong and Jiang, Yanwei and Jia, Jun and Liu, Xiaohong and Min, Xiongkuo and Zhai, Guangtao}, title = {Who is a Better Talker: Subjective and Objective Quality Assessment for AI-Generated Talking Heads}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12201-12211} }
LayerAnimate: Layer-level Control for Animation: Yuxue Yang,

Lue Fan,

Zuzeng Lin,

Feng Wang,

Zhaoxiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Yuxue and Fan, Lue and Lin, Zuzeng and Wang, Feng and Zhang, Zhaoxiang}, title = {LayerAnimate: Layer-level Control for Animation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10865-10874} }
Towards a Unified Copernicus Foundation Model for Earth Vision: Yi Wang,

Zhitong Xiong,

Chenying Liu,

Adam J. Stewart,

Thomas Dujardin,

Nikolaos Ioannis Bountos,

Angelos Zavras,

Franziska Gerken,

Ioannis Papoutsis,

Laura Leal-Taixé,

Xiao Xiang Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yi and Xiong, Zhitong and Liu, Chenying and Stewart, Adam J. and Dujardin, Thomas and Bountos, Nikolaos Ioannis and Zavras, Angelos and Gerken, Franziska and Papoutsis, Ioannis and Leal-Taix\'e, Laura and Zhu, Xiao Xiang}, title = {Towards a Unified Copernicus Foundation Model for Earth Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9888-9899} }
ROADWork: A Dataset and Benchmark for Learning to Recognize, Observe, Analyze and Drive Through Work Zones: Anurag Ghosh,

Shen Zheng,

Robert Tamburo,

Khiem Vuong,

Juan Alvarez-Padilla,

Hailiang Zhu,

Michael Cardei,

Nicholas Dunn,

Christoph Mertz,

Srinivasa G. Narasimhan; [pdf] [supp]
[bibtex]
@InProceedings{Ghosh_2025_ICCV, author = {Ghosh, Anurag and Zheng, Shen and Tamburo, Robert and Vuong, Khiem and Alvarez-Padilla, Juan and Zhu, Hailiang and Cardei, Michael and Dunn, Nicholas and Mertz, Christoph and Narasimhan, Srinivasa G.}, title = {ROADWork: A Dataset and Benchmark for Learning to Recognize, Observe, Analyze and Drive Through Work Zones}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6132-6142} }
Gradient Decomposition and Alignment for Incremental Object Detection: Wenlong Luo,

Shizhou Zhang,

De Cheng,

Yinghui Xing,

Guoqiang Liang,

Peng Wang,

Yanning Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Wenlong and Zhang, Shizhou and Cheng, De and Xing, Yinghui and Liang, Guoqiang and Wang, Peng and Zhang, Yanning}, title = {Gradient Decomposition and Alignment for Incremental Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4486-4495} }
One Polyp Identifies All: One-Shot Polyp Segmentation with SAM via Cascaded Priors and Iterative Prompt Evolution: Xinyu Mao,

Xiaohan Xing,

Fei Meng,

Jianbang Liu,

Fan Bai,

Qiang Nie,

Max Meng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2025_ICCV, author = {Mao, Xinyu and Xing, Xiaohan and Meng, Fei and Liu, Jianbang and Bai, Fan and Nie, Qiang and Meng, Max}, title = {One Polyp Identifies All: One-Shot Polyp Segmentation with SAM via Cascaded Priors and Iterative Prompt Evolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24182-24191} }
Gradient Extrapolation for Debiased Representation Learning: Ihab Asaad,

Maha Shadaydeh,

Joachim Denzler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Asaad_2025_ICCV, author = {Asaad, Ihab and Shadaydeh, Maha and Denzler, Joachim}, title = {Gradient Extrapolation for Debiased Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3819-3829} }
From Gaze to Movement: Predicting Visual Attention for Autonomous Driving Human-Machine Interaction based on Programmatic Imitation Learning: Yexin Huang,

Yongbin Lin,

Lishengsa Yue,

Zhihong Yao,

Jie Wang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Yexin and Lin, Yongbin and Yue, Lishengsa and Yao, Zhihong and Wang, Jie}, title = {From Gaze to Movement: Predicting Visual Attention for Autonomous Driving Human-Machine Interaction based on Programmatic Imitation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26146-26155} }
Less-to-More Generalization: Unlocking More Controllability by In-Context Generation: Shaojin Wu,

Mengqi Huang,

Wenxu Wu,

Yufeng Cheng,

Fei Ding,

Qian He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Shaojin and Huang, Mengqi and Wu, Wenxu and Cheng, Yufeng and Ding, Fei and He, Qian}, title = {Less-to-More Generalization: Unlocking More Controllability by In-Context Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18682-18692} }
Improving Large Vision and Language Models by Learning from a Panel of Peers: Jefferson Hernandez,

Jing Shi,

Simon Jenni,

Vicente Ordonez,

Kushal Kafle; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hernandez_2025_ICCV, author = {Hernandez, Jefferson and Shi, Jing and Jenni, Simon and Ordonez, Vicente and Kafle, Kushal}, title = {Improving Large Vision and Language Models by Learning from a Panel of Peers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1402-1412} }
Federated Representation Angle Learning: Liping Yi,

Han Yu,

Gang Wang,

Xiaoguang Liu,

Xiaoxiao Li; [pdf]
[bibtex]
@InProceedings{Yi_2025_ICCV, author = {Yi, Liping and Yu, Han and Wang, Gang and Liu, Xiaoguang and Li, Xiaoxiao}, title = {Federated Representation Angle Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1314-1324} }
Why LVLMs Are More Prone to Hallucinations in Longer Responses: The Role of Context: Ge Zheng,

Jiaye Qian,

Jiajin Tang,

Sibei Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Ge and Qian, Jiaye and Tang, Jiajin and Yang, Sibei}, title = {Why LVLMs Are More Prone to Hallucinations in Longer Responses: The Role of Context}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4101-4113} }
Training-Free Personalization via Retrieval and Reasoning on Fingerprints: Deepayan Das,

Davide Talon,

Yiming Wang,

Massimiliano Mancini,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Das_2025_ICCV, author = {Das, Deepayan and Talon, Davide and Wang, Yiming and Mancini, Massimiliano and Ricci, Elisa}, title = {Training-Free Personalization via Retrieval and Reasoning on Fingerprints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9683-9692} }
How Far are AI-generated Videos from Simulating the 3D Visual World: A Learned 3D Evaluation Approach: Chirui Chang,

Jiahui Liu,

Zhengzhe Liu,

Xiaoyang Lyu,

Yi-Hua Huang,

Xin Tao,

Pengfei Wan,

Di Zhang,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chang_2025_ICCV, author = {Chang, Chirui and Liu, Jiahui and Liu, Zhengzhe and Lyu, Xiaoyang and Huang, Yi-Hua and Tao, Xin and Wan, Pengfei and Zhang, Di and Qi, Xiaojuan}, title = {How Far are AI-generated Videos from Simulating the 3D Visual World: A Learned 3D Evaluation Approach}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10307-10317} }
Rethinking Detecting Salient and Camouflaged Objects in Unconstrained Scenes: Zhangjun Zhou,

Yiping Li,

Chunlin Zhong,

Jianuo Huang,

Jialun Pei,

Hua Li,

He Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zhangjun and Li, Yiping and Zhong, Chunlin and Huang, Jianuo and Pei, Jialun and Li, Hua and Tang, He}, title = {Rethinking Detecting Salient and Camouflaged Objects in Unconstrained Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22372-22382} }
OccluGaussian: Occlusion-Aware Gaussian Splatting for Large Scene Reconstruction and Rendering: Shiyong Liu,

Xiao Tang,

Zhihao Li,

Yingfan He,

Chongjie Ye,

Jianzhuang Liu,

Binxiao Huang,

Shunbo Zhou,

Xiaofei Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Shiyong and Tang, Xiao and Li, Zhihao and He, Yingfan and Ye, Chongjie and Liu, Jianzhuang and Huang, Binxiao and Zhou, Shunbo and Wu, Xiaofei}, title = {OccluGaussian: Occlusion-Aware Gaussian Splatting for Large Scene Reconstruction and Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26643-26652} }
VisionMath: Vision-Form Mathematical Problem-Solving: Zongyang Ma,

Yuxin Chen,

Ziqi Zhang,

Zhongang Qi,

Chunfeng Yuan,

Shaojie Zhu,

Chengxiang Zhuo,

Bing Li,

Ye Liu,

Zang Li,

Ying Shan,

Weiming Hu; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Zongyang and Chen, Yuxin and Zhang, Ziqi and Qi, Zhongang and Yuan, Chunfeng and Zhu, Shaojie and Zhuo, Chengxiang and Li, Bing and Liu, Ye and Li, Zang and Shan, Ying and Hu, Weiming}, title = {VisionMath: Vision-Form Mathematical Problem-Solving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1162-1172} }
Unsupervised RGB-D Point Cloud Registration for Scenes with Low Overlap and Photometric Inconsistency: Yejun Shou,

Haocheng Wang,

Lingfeng Shen,

Qian Zheng,

Gang Pan,

Yanlong Cao; [pdf] [supp]
[bibtex]
@InProceedings{Shou_2025_ICCV, author = {Shou, Yejun and Wang, Haocheng and Shen, Lingfeng and Zheng, Qian and Pan, Gang and Cao, Yanlong}, title = {Unsupervised RGB-D Point Cloud Registration for Scenes with Low Overlap and Photometric Inconsistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24868-24877} }
CWNet: Causal Wavelet Network for Low-Light Image Enhancement: Tongshun Zhang,

Pingping Liu,

Yubing Lu,

Mengen Cai,

Zijian Zhang,

Zhe Zhang,

Qiuzhan Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Tongshun and Liu, Pingping and Lu, Yubing and Cai, Mengen and Zhang, Zijian and Zhang, Zhe and Zhou, Qiuzhan}, title = {CWNet: Causal Wavelet Network for Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8789-8799} }
Demeter: A Parametric Model of Crop Plant Morphology from the Real World: Tianhang Cheng,

Albert J. Zhai,

Evan Z. Chen,

Rui Zhou,

Yawen Deng,

Zitong Li,

Kejie Zhao,

Janice Shiu,

Qianyu Zhao,

Yide Xu,

Xinlei Wang,

Yuan Shen,

Sheng Wang,

Lisa Ainsworth,

Kaiyu Guan,

Shenlong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Tianhang and Zhai, Albert J. and Chen, Evan Z. and Zhou, Rui and Deng, Yawen and Li, Zitong and Zhao, Kejie and Shiu, Janice and Zhao, Qianyu and Xu, Yide and Wang, Xinlei and Shen, Yuan and Wang, Sheng and Ainsworth, Lisa and Guan, Kaiyu and Wang, Shenlong}, title = {Demeter: A Parametric Model of Crop Plant Morphology from the Real World}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28740-28751} }
VideoLLaMB: Long Streaming Video Understanding with Recurrent Memory Bridges: Yuxuan Wang,

Yiqi Song,

Cihang Xie,

Yang Liu,

Zilong Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuxuan and Song, Yiqi and Xie, Cihang and Liu, Yang and Zheng, Zilong}, title = {VideoLLaMB: Long Streaming Video Understanding with Recurrent Memory Bridges}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24170-24181} }
Automated Red Teaming for Text-to-Image Models through Feedback-Guided Prompt Iteration with Vision-Language Models: Wei Xu,

Kangjie Chen,

Jiawei Qiu,

Yuyang Zhang,

Run Wang,

Jin Mao,

Tianwei Zhang,

Lina Wang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Wei and Chen, Kangjie and Qiu, Jiawei and Zhang, Yuyang and Wang, Run and Mao, Jin and Zhang, Tianwei and Wang, Lina}, title = {Automated Red Teaming for Text-to-Image Models through Feedback-Guided Prompt Iteration with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18575-18584} }
CoA-VLA: Improving Vision-Language-Action Models via Visual-Text Chain-of-Affordance: Jinming Li,

Yichen Zhu,

Zhibin Tang,

Junjie Wen,

Minjie Zhu,

Xiaoyu Liu,

Chengmeng Li,

Ran Cheng,

Yaxin Peng,

Yan Peng,

Feifei Feng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jinming and Zhu, Yichen and Tang, Zhibin and Wen, Junjie and Zhu, Minjie and Liu, Xiaoyu and Li, Chengmeng and Cheng, Ran and Peng, Yaxin and Peng, Yan and Feng, Feifei}, title = {CoA-VLA: Improving Vision-Language-Action Models via Visual-Text Chain-of-Affordance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9759-9769} }
HiERO: Understanding the Hierarchy of Human Behavior Enhances Reasoning on Egocentric Videos: Simone Alberto Peirone,

Francesca Pistilli,

Giuseppe Averta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peirone_2025_ICCV, author = {Peirone, Simone Alberto and Pistilli, Francesca and Averta, Giuseppe}, title = {HiERO: Understanding the Hierarchy of Human Behavior Enhances Reasoning on Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19862-19871} }
FVGen: Accelerating Novel-View Synthesis with Adversarial Video Diffusion Distillation: Wenbin Teng,

Gonglin Chen,

Haiwei Chen,

Yajie Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Teng_2025_ICCV, author = {Teng, Wenbin and Chen, Gonglin and Chen, Haiwei and Zhao, Yajie}, title = {FVGen: Accelerating Novel-View Synthesis with Adversarial Video Diffusion Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26095-26105} }
ZFusion: Efficient Deep Compositional Zero-shot Learning for Blind Image Super-Resolution with Generative Diffusion Prior: Alireza Esmaeilzehi,

Hossein Zaredar,

Yapeng Tian,

Laleh Seyyed-Kalantari; [pdf] [supp]
[bibtex]
@InProceedings{Esmaeilzehi_2025_ICCV, author = {Esmaeilzehi, Alireza and Zaredar, Hossein and Tian, Yapeng and Seyyed-Kalantari, Laleh}, title = {ZFusion: Efficient Deep Compositional Zero-shot Learning for Blind Image Super-Resolution with Generative Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12338-12348} }
Doodle Your Keypoints: Sketch-Based Few-Shot Keypoint Detection: Subhajit Maity,

Ayan Kumar Bhunia,

Subhadeep Koley,

Pinaki Nath Chowdhury,

Aneeshan Sain,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maity_2025_ICCV, author = {Maity, Subhajit and Bhunia, Ayan Kumar and Koley, Subhadeep and Chowdhury, Pinaki Nath and Sain, Aneeshan and Song, Yi-Zhe}, title = {Doodle Your Keypoints: Sketch-Based Few-Shot Keypoint Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {284-296} }
Open-Vocabulary Octree-Graph for 3D Scene Understanding: Zhigang Wang,

Yifei Su,

Chenhui Li,

Dong Wang,

Yan Huang,

Xuelong Li,

Bin Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zhigang and Su, Yifei and Li, Chenhui and Wang, Dong and Huang, Yan and Li, Xuelong and Zhao, Bin}, title = {Open-Vocabulary Octree-Graph for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7037-7047} }
FlexGen: Flexible Multi-View Generation from Text and Image Inputs: Xinli Xu,

Wenhang Ge,

Jiantao Lin,

Jiawei Feng,

Lie Xu,

Hanfeng Zhao,

Shunsi Zhang,

Ying-Cong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Xinli and Ge, Wenhang and Lin, Jiantao and Feng, Jiawei and Xu, Lie and Zhao, Hanfeng and Zhang, Shunsi and Chen, Ying-Cong}, title = {FlexGen: Flexible Multi-View Generation from Text and Image Inputs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18714-18724} }
SummDiff: Generative Modeling of Video Summarization with Diffusion: Kwanseok Kim,

Jaehoon Hahm,

Sumin Kim,

Jinhwan Sul,

Byunghak Kim,

Joonseok Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Kwanseok and Hahm, Jaehoon and Kim, Sumin and Sul, Jinhwan and Kim, Byunghak and Lee, Joonseok}, title = {SummDiff: Generative Modeling of Video Summarization with Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15096-15106} }
FlowDPS : Flow-Driven Posterior Sampling for Inverse Problems: Jeongsol Kim,

Bryan Sangwoo Kim,

Jong Chul Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jeongsol and Kim, Bryan Sangwoo and Ye, Jong Chul}, title = {FlowDPS : Flow-Driven Posterior Sampling for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12328-12337} }
Head2Body: Body Pose Generation from Multi-sensory Head-mounted Inputs: Minh Tran,

Hongda Mao,

Qingshuang Chen,

Yelin Kim; [pdf] [supp]
[bibtex]
@InProceedings{Tran_2025_ICCV, author = {Tran, Minh and Mao, Hongda and Chen, Qingshuang and Kim, Yelin}, title = {Head2Body: Body Pose Generation from Multi-sensory Head-mounted Inputs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6849-6858} }
Closed-Loop Transfer for Weakly-supervised Affordance Grounding: Jiajin Tang,

Zhengxuan Wei,

Ge Zheng,

Sibei Yang; [pdf]
[bibtex]
@InProceedings{Tang_2025_ICCV, author = {Tang, Jiajin and Wei, Zhengxuan and Zheng, Ge and Yang, Sibei}, title = {Closed-Loop Transfer for Weakly-supervised Affordance Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9530-9539} }
OminiControl: Minimal and Universal Control for Diffusion Transformer: Zhenxiong Tan,

Songhua Liu,

Xingyi Yang,

Qiaochu Xue,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2025_ICCV, author = {Tan, Zhenxiong and Liu, Songhua and Yang, Xingyi and Xue, Qiaochu and Wang, Xinchao}, title = {OminiControl: Minimal and Universal Control for Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14940-14950} }
Zeroth-Order Fine-Tuning of LLMs in Random Subspaces: Ziming Yu,

Pan Zhou,

Sike Wang,

Jia Li,

Mi Tian,

Hua Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Ziming and Zhou, Pan and Wang, Sike and Li, Jia and Tian, Mi and Huang, Hua}, title = {Zeroth-Order Fine-Tuning of LLMs in Random Subspaces}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4475-4485} }
G2D: Boosting Multimodal Learning with Gradient-Guided Distillation: Mohammed Rakib,

Arunkumar Bagavathi; [pdf] [supp]
[bibtex]
@InProceedings{Rakib_2025_ICCV, author = {Rakib, Mohammed and Bagavathi, Arunkumar}, title = {G2D: Boosting Multimodal Learning with Gradient-Guided Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4059-4068} }
AIComposer: Any Style and Content Image Composition via Feature Integration: Haowen Li,

Zhenfeng Fan,

Zhang Wen,

Zhengzhou Zhu,

Yunjin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Haowen and Fan, Zhenfeng and Wen, Zhang and Zhu, Zhengzhou and Li, Yunjin}, title = {AIComposer: Any Style and Content Image Composition via Feature Integration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16840-16850} }
PAN-Crafter: Learning Modality-Consistent Alignment for PAN-Sharpening: Jeonghyeok Do,

Sungpyo Kim,

Geunhyuk Youk,

Jaehyup Lee,

Munchurl Kim; [pdf] [supp]
[bibtex]
@InProceedings{Do_2025_ICCV, author = {Do, Jeonghyeok and Kim, Sungpyo and Youk, Geunhyuk and Lee, Jaehyup and Kim, Munchurl}, title = {PAN-Crafter: Learning Modality-Consistent Alignment for PAN-Sharpening}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4242-4252} }
M2SFormer: Multi-Spectral and Multi-Scale Attention with Edge-Aware Difficulty Guidance for Image Forgery Localization: Ju-Hyeon Nam,

Dong-Hyun Moon,

Sang-Chul Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2025_ICCV, author = {Nam, Ju-Hyeon and Moon, Dong-Hyun and Lee, Sang-Chul}, title = {M2SFormer: Multi-Spectral and Multi-Scale Attention with Edge-Aware Difficulty Guidance for Image Forgery Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15927-15938} }
Pinco: Position-induced Consistent Adapter for Diffusion Transformer in Foreground-conditioned Inpainting: Guangben Lu,

Yuzhen Du,

Yizhe Tang,

Zhimin Sun,

Ran Yi,

Yifan Qi,

Tianyi Wang,

Lizhuang Ma,

Fangyuan Zou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Guangben and Du, Yuzhen and Tang, Yizhe and Sun, Zhimin and Yi, Ran and Qi, Yifan and Wang, Tianyi and Ma, Lizhuang and Zou, Fangyuan}, title = {Pinco: Position-induced Consistent Adapter for Diffusion Transformer in Foreground-conditioned Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15266-15276} }
ReconDreamer++: Harmonizing Generative and Reconstructive Models for Driving Scene Representation: Guosheng Zhao,

Xiaofeng Wang,

Chaojun Ni,

Zheng Zhu,

Wenkang Qin,

Guan Huang,

Xingang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Guosheng and Wang, Xiaofeng and Ni, Chaojun and Zhu, Zheng and Qin, Wenkang and Huang, Guan and Wang, Xingang}, title = {ReconDreamer++: Harmonizing Generative and Reconstructive Models for Driving Scene Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26718-26728} }
SyncDiff: Synchronized Motion Diffusion for Multi-Body Human-Object Interaction Synthesis: Wenkun He,

Yun Liu,

Ruitao Liu,

Li Yi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Wenkun and Liu, Yun and Liu, Ruitao and Yi, Li}, title = {SyncDiff: Synchronized Motion Diffusion for Multi-Body Human-Object Interaction Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11731-11743} }
Rethinking Few Shot CLIP Benchmarks: A Critical Analysis in the Inductive Setting: Alexey Kravets,

Da Chen,

Vinay P. Namboodiri; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kravets_2025_ICCV, author = {Kravets, Alexey and Chen, Da and Namboodiri, Vinay P.}, title = {Rethinking Few Shot CLIP Benchmarks: A Critical Analysis in the Inductive Setting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1902-1911} }
Mind the Gap: Aligning Vision Foundation Models to Image Feature Matching: Yuhan Liu,

Jingwen Fu,

Yang Wu,

Kangyi Wu,

Pengna Li,

Jiayi Wu,

Sanping Zhou,

Jingmin Xin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yuhan and Fu, Jingwen and Wu, Yang and Wu, Kangyi and Li, Pengna and Wu, Jiayi and Zhou, Sanping and Xin, Jingmin}, title = {Mind the Gap: Aligning Vision Foundation Models to Image Feature Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20313-20323} }
CoStoDet-DDPM: Collaborative Training of Stochastic and Deterministic Models Improves Surgical Workflow Anticipation and Recognition: Kaixiang Yang,

Xin Li,

Qiang Li,

Zhiwei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Kaixiang and Li, Xin and Li, Qiang and Wang, Zhiwei}, title = {CoStoDet-DDPM: Collaborative Training of Stochastic and Deterministic Models Improves Surgical Workflow Anticipation and Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23741-23751} }
GSOT3D: Towards Generic 3D Single Object Tracking in the Wild: Yifan Jiao,

Yunhao Li,

Junhua Ding,

Qing Yang,

Song Fu,

Heng Fan,

Libo Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiao_2025_ICCV, author = {Jiao, Yifan and Li, Yunhao and Ding, Junhua and Yang, Qing and Fu, Song and Fan, Heng and Zhang, Libo}, title = {GSOT3D: Towards Generic 3D Single Object Tracking in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5469-5478} }
UnZipLoRA: Separating Content and Style from a Single Image: Chang Liu,

Viraj Shah,

Aiyu Cui,

Svetlana Lazebnik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Chang and Shah, Viraj and Cui, Aiyu and Lazebnik, Svetlana}, title = {UnZipLoRA: Separating Content and Style from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16776-16785} }
What You Have is What You Track: Adaptive and Robust Multimodal Tracking: Yuedong Tan,

Jiawei Shao,

Eduard Zamfir,

Ruanjun Li,

Zhaochong An,

Chao Ma,

Danda Paudel,

Luc Van Gool,

Radu Timofte,

Zongwei Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2025_ICCV, author = {Tan, Yuedong and Shao, Jiawei and Zamfir, Eduard and Li, Ruanjun and An, Zhaochong and Ma, Chao and Paudel, Danda and Van Gool, Luc and Timofte, Radu and Wu, Zongwei}, title = {What You Have is What You Track: Adaptive and Robust Multimodal Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3455-3465} }
RareCLIP: Rarity-aware Online Zero-shot Industrial Anomaly Detection: Jianfang He,

Min Cao,

Silong Peng,

Qiong Xie; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Jianfang and Cao, Min and Peng, Silong and Xie, Qiong}, title = {RareCLIP: Rarity-aware Online Zero-shot Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24478-24487} }
AdaDCP: Learning an Adapter with Discrete Cosine Prior for Clear-to-Adverse Domain Generalization: Qi Bi,

Yixian Shen,

Jingjun Yi,

Gui-Song Xia; [pdf]
[bibtex]
@InProceedings{Bi_2025_ICCV, author = {Bi, Qi and Shen, Yixian and Yi, Jingjun and Xia, Gui-Song}, title = {AdaDCP: Learning an Adapter with Discrete Cosine Prior for Clear-to-Adverse Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12997-13008} }
HERMES: A Unified Self-Driving World Model for Simultaneous 3D Scene Understanding and Generation: Xin Zhou,

Dingkang Liang,

Sifan Tu,

Xiwu Chen,

Yikang Ding,

Dingyuan Zhang,

Feiyang Tan,

Hengshuang Zhao,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Xin and Liang, Dingkang and Tu, Sifan and Chen, Xiwu and Ding, Yikang and Zhang, Dingyuan and Tan, Feiyang and Zhao, Hengshuang and Bai, Xiang}, title = {HERMES: A Unified Self-Driving World Model for Simultaneous 3D Scene Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27817-27827} }
ArgMatch: Adaptive Refinement Gathering for Efficient Dense Matching: Yuxin Deng,

Kaining Zhang,

Linfeng Tang,

Jiaqi Yang,

Jiayi Ma; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Yuxin and Zhang, Kaining and Tang, Linfeng and Yang, Jiaqi and Ma, Jiayi}, title = {ArgMatch: Adaptive Refinement Gathering for Efficient Dense Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27369-27379} }
Enhancing Image Restoration Transformer via Adaptive Translation Equivariance: JiaKui Hu,

Zhengjian Yao,

Lujia Jin,

Hangzhou He,

Yanye Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, JiaKui and Yao, Zhengjian and Jin, Lujia and He, Hangzhou and Lu, Yanye}, title = {Enhancing Image Restoration Transformer via Adaptive Translation Equivariance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16047-16057} }
Free4D: Tuning-free 4D Scene Generation with Spatial-Temporal Consistency: Tianqi Liu,

Zihao Huang,

Zhaoxi Chen,

Guangcong Wang,

Shoukang Hu,

Liao Shen,

Huiqiang Sun,

Zhiguo Cao,

Wei Li,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Tianqi and Huang, Zihao and Chen, Zhaoxi and Wang, Guangcong and Hu, Shoukang and Shen, Liao and Sun, Huiqiang and Cao, Zhiguo and Li, Wei and Liu, Ziwei}, title = {Free4D: Tuning-free 4D Scene Generation with Spatial-Temporal Consistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25571-25582} }
Generative Zoo: Tomasz Niewiadomski,

Anastasios Yiannakidis,

Hanz Cuevas-Velasquez,

Soubhik Sanyal,

Michael J. Black,

Silvia Zuffi,

Peter Kulits; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Niewiadomski_2025_ICCV, author = {Niewiadomski, Tomasz and Yiannakidis, Anastasios and Cuevas-Velasquez, Hanz and Sanyal, Soubhik and Black, Michael J. and Zuffi, Silvia and Kulits, Peter}, title = {Generative Zoo}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8492-8502} }
Any-SSR: How Recursive Least Squares Works in Continual Learning of Large Language Model: Kai Tong,

Kang Pan,

Xiao Zhang,

Erli Meng,

Run He,

Yawen Cui,

Nuoyan Guo,

Huiping Zhuang; [pdf] [supp]
[bibtex]
@InProceedings{Tong_2025_ICCV, author = {Tong, Kai and Pan, Kang and Zhang, Xiao and Meng, Erli and He, Run and Cui, Yawen and Guo, Nuoyan and Zhuang, Huiping}, title = {Any-SSR: How Recursive Least Squares Works in Continual Learning of Large Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3047-3057} }
Instruction-Oriented Preference Alignment for Enhancing Multi-Modal Comprehension Capability of MLLMs: Zitian Wang,

Yue Liao,

Kang Rong,

Fengyun Rao,

Yibo Yang,

Si Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zitian and Liao, Yue and Rong, Kang and Rao, Fengyun and Yang, Yibo and Liu, Si}, title = {Instruction-Oriented Preference Alignment for Enhancing Multi-Modal Comprehension Capability of MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2010-2021} }
RapVerse: Coherent Vocals and Whole-Body Motion Generation from Text: Jiaben Chen,

Xin Yan,

Yihang Chen,

Siyuan Cen,

Zixin Wang,

Qinwei Ma,

Haoyu Zhen,

Kaizhi Qian,

Lie Lu,

Chuang Gan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jiaben and Yan, Xin and Chen, Yihang and Cen, Siyuan and Wang, Zixin and Ma, Qinwei and Zhen, Haoyu and Qian, Kaizhi and Lu, Lie and Gan, Chuang}, title = {RapVerse: Coherent Vocals and Whole-Body Motion Generation from Text}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10097-10107} }
MoFRR: Mixture of Diffusion Models for Face Retouching Restoration: Jiaxin Liu,

Qichao Ying,

Zhenxing Qian,

Sheng Li,

Runqi Zhang,

Jian Liu,

Xinpeng Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Jiaxin and Ying, Qichao and Qian, Zhenxing and Li, Sheng and Zhang, Runqi and Liu, Jian and Zhang, Xinpeng}, title = {MoFRR: Mixture of Diffusion Models for Face Retouching Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12842-12851} }
SFUOD: Source-Free Unknown Object Detection: Keon-Hee Park,

Seun-An Choe,

Gyeong-Moon Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Keon-Hee and Choe, Seun-An and Park, Gyeong-Moon}, title = {SFUOD: Source-Free Unknown Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3499-3508} }
UniEgoMotion: A Unified Model for Egocentric Motion Reconstruction, Forecasting, and Generation: Chaitanya Patel,

Hiroki Nakamura,

Yuta Kyuragi,

Kazuki Kozuka,

Juan Carlos Niebles,

Ehsan Adeli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Patel_2025_ICCV, author = {Patel, Chaitanya and Nakamura, Hiroki and Kyuragi, Yuta and Kozuka, Kazuki and Niebles, Juan Carlos and Adeli, Ehsan}, title = {UniEgoMotion: A Unified Model for Egocentric Motion Reconstruction, Forecasting, and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10318-10329} }
ToolVQA: A Dataset for Multi-step Reasoning VQA with External Tools: Shaofeng Yin,

Ting Lei,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_ICCV, author = {Yin, Shaofeng and Lei, Ting and Liu, Yang}, title = {ToolVQA: A Dataset for Multi-step Reasoning VQA with External Tools}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4424-4433} }
Spherical Epipolar Rectification for Deep Two-View Absolute Depth Estimation: Pierre-André Brousseau,

Sébastien Roy; [pdf] [supp]
[bibtex]
@InProceedings{Brousseau_2025_ICCV, author = {Brousseau, Pierre-Andr\'e and Roy, S\'ebastien}, title = {Spherical Epipolar Rectification for Deep Two-View Absolute Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28925-28934} }
ScenePainter: Semantically Consistent Perpetual 3D Scene Generation with Concept Relation Alignment: Chong Xia,

Shengjun Zhang,

Fangfu Liu,

Chang Liu,

Khodchaphun Hirunyaratsameewong,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_ICCV, author = {Xia, Chong and Zhang, Shengjun and Liu, Fangfu and Liu, Chang and Hirunyaratsameewong, Khodchaphun and Duan, Yueqi}, title = {ScenePainter: Semantically Consistent Perpetual 3D Scene Generation with Concept Relation Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28808-28817} }
ESCNet:Edge-Semantic Collaborative Network for Camouflaged Object Detection: Sheng Ye,

Xin Chen,

Yan Zhang,

Xianming Lin,

Liujuan Cao; [pdf]
[bibtex]
@InProceedings{Ye_2025_ICCV, author = {Ye, Sheng and Chen, Xin and Zhang, Yan and Lin, Xianming and Cao, Liujuan}, title = {ESCNet:Edge-Semantic Collaborative Network for Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20053-20063} }
PixelStitch: Structure-Preserving Pixel-Wise Bidirectional Warps for Unsupervised Image Stitching: Hengzhe Jin,

Lang Nie,

Chunyu Lin,

Xiaomei Feng,

Yao Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Hengzhe and Nie, Lang and Lin, Chunyu and Feng, Xiaomei and Zhao, Yao}, title = {PixelStitch: Structure-Preserving Pixel-Wise Bidirectional Warps for Unsupervised Image Stitching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28125-28134} }
SANA-Sprint: One-Step Diffusion with Continuous-Time Consistency Distillation: Junsong Chen,

Shuchen Xue,

Yuyang Zhao,

Jincheng Yu,

Sayak Paul,

Junyu Chen,

Han Cai,

Song Han,

Enze Xie; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Junsong and Xue, Shuchen and Zhao, Yuyang and Yu, Jincheng and Paul, Sayak and Chen, Junyu and Cai, Han and Han, Song and Xie, Enze}, title = {SANA-Sprint: One-Step Diffusion with Continuous-Time Consistency Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16185-16195} }
Information-Bottleneck Driven Binary Neural Network for Change Detection: Kaijie Yin,

Zhiyuan Zhang,

Shu Kong,

Tian Gao,

Cheng-Zhong Xu,

Hui Kong; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2025_ICCV, author = {Yin, Kaijie and Zhang, Zhiyuan and Kong, Shu and Gao, Tian and Xu, Cheng-Zhong and Kong, Hui}, title = {Information-Bottleneck Driven Binary Neural Network for Change Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7176-7186} }
Erasing More Than Intended? How Concept Erasure Degrades the Generation of Non-Target Concepts: Ibtihel Amara,

Ahmed Imtiaz Humayun,

Ivana Kajic,

Zarana Parekh,

Natalie Harris,

Sarah Young,

Chirag Nagpal,

Najoung Kim,

Junfeng He,

Cristina Nader Vasconcelos,

Deepak Ramachandran,

Golnoosh Farnadi,

Katherine Heller,

Mohammad Havaei,

Negar Rostamzadeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Amara_2025_ICCV, author = {Amara, Ibtihel and Humayun, Ahmed Imtiaz and Kajic, Ivana and Parekh, Zarana and Harris, Natalie and Young, Sarah and Nagpal, Chirag and Kim, Najoung and He, Junfeng and Vasconcelos, Cristina Nader and Ramachandran, Deepak and Farnadi, Golnoosh and Heller, Katherine and Havaei, Mohammad and Rostamzadeh, Negar}, title = {Erasing More Than Intended? How Concept Erasure Degrades the Generation of Non-Target Concepts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16420-16430} }
Global and Local Entailment Learning for Natural World Imagery: Srikumar Sastry,

Aayush Dhakal,

Eric Xing,

Subash Khanal,

Nathan Jacobs; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sastry_2025_ICCV, author = {Sastry, Srikumar and Dhakal, Aayush and Xing, Eric and Khanal, Subash and Jacobs, Nathan}, title = {Global and Local Entailment Learning for Natural World Imagery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15770-15780} }
Ross3D: Reconstructive Visual Instruction Tuning with 3D-Awareness: Haochen Wang,

Yucheng Zhao,

Tiancai Wang,

Haoqiang Fan,

Xiangyu Zhang,

Zhaoxiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haochen and Zhao, Yucheng and Wang, Tiancai and Fan, Haoqiang and Zhang, Xiangyu and Zhang, Zhaoxiang}, title = {Ross3D: Reconstructive Visual Instruction Tuning with 3D-Awareness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9275-9286} }
LVFace: Progressive Cluster Optimization for Large Vision Models in Face Recognition: Jinghan You,

Shanglin Li,

Yuanrui Sun,

Jiangchuan Wei,

Mingyu Guo,

Chao Feng,

Jiao Ran; [pdf] [arXiv]
[bibtex]
@InProceedings{You_2025_ICCV, author = {You, Jinghan and Li, Shanglin and Sun, Yuanrui and Wei, Jiangchuan and Guo, Mingyu and Feng, Chao and Ran, Jiao}, title = {LVFace: Progressive Cluster Optimization for Large Vision Models in Face Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11840-11849} }
Dataset Ownership Verification for Pre-trained Masked Models: Yuechen Xie,

Jie Song,

Yicheng Shan,

Xiaoyan Zhang,

Yuanyu Wan,

Shengxuming Zhang,

Jiarui Duan,

Mingli Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Yuechen and Song, Jie and Shan, Yicheng and Zhang, Xiaoyan and Wan, Yuanyu and Zhang, Shengxuming and Duan, Jiarui and Song, Mingli}, title = {Dataset Ownership Verification for Pre-trained Masked Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3132-3142} }
VLR-Driver: Large Vision-Language-Reasoning Models for Embodied Autonomous Driving: Fanjie Kong,

Yitong Li,

Weihuang Chen,

Chen Min,

Yizhe Li,

Zhiqiang Gao,

Haoyang Li,

Zhongyu Guo,

Hongbin Sun; [pdf]
[bibtex]
@InProceedings{Kong_2025_ICCV, author = {Kong, Fanjie and Li, Yitong and Chen, Weihuang and Min, Chen and Li, Yizhe and Gao, Zhiqiang and Li, Haoyang and Guo, Zhongyu and Sun, Hongbin}, title = {VLR-Driver: Large Vision-Language-Reasoning Models for Embodied Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26966-26976} }
ResGS: Residual Densification of 3D Gaussian for Efficient Detail Recovery: Yanzhe Lyu,

Kai Cheng,

Xin Kang,

Xuejin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2025_ICCV, author = {Lyu, Yanzhe and Cheng, Kai and Kang, Xin and Chen, Xuejin}, title = {ResGS: Residual Densification of 3D Gaussian for Efficient Detail Recovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28093-28102} }
Language Driven Occupancy Prediction: Zhu Yu,

Bowen Pang,

Lizhe Liu,

Runmin Zhang,

Qiang Li,

Si-Yuan Cao,

Maochun Luo,

Mingxia Chen,

Sheng Yang,

Hui-Liang Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Zhu and Pang, Bowen and Liu, Lizhe and Zhang, Runmin and Li, Qiang and Cao, Si-Yuan and Luo, Maochun and Chen, Mingxia and Yang, Sheng and Shen, Hui-Liang}, title = {Language Driven Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7548-7558} }
Prior2Former - Evidential Modeling of Mask Transformers for Assumption-Free Open-World Panoptic Segmentation: Sebastian Schmidt,

Julius Koerner,

Dominik Fuchsgruber,

Stefano Gasperini,

Federico Tombari,

Stephan Günnemann; [pdf] [supp]
[bibtex]
@InProceedings{Schmidt_2025_ICCV, author = {Schmidt, Sebastian and Koerner, Julius and Fuchsgruber, Dominik and Gasperini, Stefano and Tombari, Federico and G\"unnemann, Stephan}, title = {Prior2Former - Evidential Modeling of Mask Transformers for Assumption-Free Open-World Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23646-23656} }
E-NeMF: Event-based Neural Motion Field for Novel Space-time View Synthesis of Dynamic Scenes: Yan Liu,

Zehao Chen,

Haojie Yan,

De Ma,

Huajin Tang,

Qian Zheng,

Gang Pan; [pdf]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yan and Chen, Zehao and Yan, Haojie and Ma, De and Tang, Huajin and Zheng, Qian and Pan, Gang}, title = {E-NeMF: Event-based Neural Motion Field for Novel Space-time View Synthesis of Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10854-10864} }
Event-based Tiny Object Detection: A Benchmark Dataset and Baseline: Nuo Chen,

Chao Xiao,

Yimian Dai,

Shiman He,

Miao Li,

Wei An; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Nuo and Xiao, Chao and Dai, Yimian and He, Shiman and Li, Miao and An, Wei}, title = {Event-based Tiny Object Detection: A Benchmark Dataset and Baseline}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7209-7218} }
Optimal Transport for Brain-Image Alignment: Unveiling Redundancy and Synergy in Neural Information Processing: Yang Xiao,

Wang Lu,

Jie Ji,

Ruimeng Ye,

Gen Li,

Xiaolong Ma,

Bo Hui; [pdf] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_ICCV, author = {Xiao, Yang and Lu, Wang and Ji, Jie and Ye, Ruimeng and Li, Gen and Ma, Xiaolong and Hui, Bo}, title = {Optimal Transport for Brain-Image Alignment: Unveiling Redundancy and Synergy in Neural Information Processing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20445-20455} }
Adversarial Distribution Matching for Diffusion Distillation Towards Efficient Image and Video Synthesis: Yanzuo Lu,

Yuxi Ren,

Xin Xia,

Shanchuan Lin,

Xing Wang,

Xuefeng Xiao,

Andy J. Ma,

Xiaohua Xie,

Jian-Huang Lai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Yanzuo and Ren, Yuxi and Xia, Xin and Lin, Shanchuan and Wang, Xing and Xiao, Xuefeng and Ma, Andy J. and Xie, Xiaohua and Lai, Jian-Huang}, title = {Adversarial Distribution Matching for Diffusion Distillation Towards Efficient Image and Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16818-16829} }
Switch-a-View: View Selection Learned from Unlabeled In-the-wild Videos: Sagnik Majumder,

Tushar Nagarajan,

Ziad Al-Halah,

Kristen Grauman; [pdf] [supp]
[bibtex]
@InProceedings{Majumder_2025_ICCV, author = {Majumder, Sagnik and Nagarajan, Tushar and Al-Halah, Ziad and Grauman, Kristen}, title = {Switch-a-View: View Selection Learned from Unlabeled In-the-wild Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11969-11979} }
E-SAM: Training-Free Segment Every Entity Model: Weiming Zhang,

Dingwen Xiao,

Lei Chen,

Lin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Weiming and Xiao, Dingwen and Chen, Lei and Wang, Lin}, title = {E-SAM: Training-Free Segment Every Entity Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24688-24697} }
ViewSRD: 3D Visual Grounding via Structured Multi-View Decomposition: Ronggang Huang,

Haoxin Yang,

Yan Cai,

Xuemiao Xu,

Huaidong Zhang,

Shengfeng He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Ronggang and Yang, Haoxin and Cai, Yan and Xu, Xuemiao and Zhang, Huaidong and He, Shengfeng}, title = {ViewSRD: 3D Visual Grounding via Structured Multi-View Decomposition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9726-9736} }
UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization: Junjie He,

Yifeng Geng,

Liefeng Bo; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Junjie and Geng, Yifeng and Bo, Liefeng}, title = {UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14399-14408} }
FDPT: Federated Discrete Prompt Tuning for Black-Box Visual-Language Models: Jiaqi Wu,

Simin Chen,

Jing Tang,

Yuzhe Yang,

Yiming Chen,

Lixu Wang,

Song Lin,

Zehua Wang,

Wei Chen,

Zijian Tian; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Jiaqi and Chen, Simin and Tang, Jing and Yang, Yuzhe and Chen, Yiming and Wang, Lixu and Lin, Song and Wang, Zehua and Chen, Wei and Tian, Zijian}, title = {FDPT: Federated Discrete Prompt Tuning for Black-Box Visual-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2461-2470} }
Exploiting Diffusion Prior for Task-driven Image Restoration: Jaeha Kim,

Junghun Oh,

Kyoung Mu Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jaeha and Oh, Junghun and Lee, Kyoung Mu}, title = {Exploiting Diffusion Prior for Task-driven Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10151-10161} }
CE-FAM: Concept-Based Explanation via Fusion of Activation Maps: Michihiro Kuroki,

Toshihiko Yamasaki; [pdf]
[bibtex]
@InProceedings{Kuroki_2025_ICCV, author = {Kuroki, Michihiro and Yamasaki, Toshihiko}, title = {CE-FAM: Concept-Based Explanation via Fusion of Activation Maps}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1413-1422} }
Dual-level Prototype Learning for Composite Degraded Image Restoration: Zhongze Wang,

Haitao Zhao,

Lujian Yao,

Jingchao Peng,

Kaijie Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zhongze and Zhao, Haitao and Yao, Lujian and Peng, Jingchao and Zhao, Kaijie}, title = {Dual-level Prototype Learning for Composite Degraded Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14006-14016} }
Forensic-MoE: Exploring Comprehensive Synthetic Image Detection Traces with Mixture of Experts: Mingqi Fang,

Ziguang Li,

Lingyun Yu,

Quanwei Yang,

Hongtao Xie,

Yongdong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Mingqi and Li, Ziguang and Yu, Lingyun and Yang, Quanwei and Xie, Hongtao and Zhang, Yongdong}, title = {Forensic-MoE: Exploring Comprehensive Synthetic Image Detection Traces with Mixture of Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17772-17782} }
Robust Adverse Weather Removal via Spectral-based Spatial Grouping: Yuhwan Jeong,

Yunseo Yang,

Youngho Yoon,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_ICCV, author = {Jeong, Yuhwan and Yang, Yunseo and Yoon, Youngho and Yoon, Kuk-Jin}, title = {Robust Adverse Weather Removal via Spectral-based Spatial Grouping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11872-11883} }
PLAN: Proactive Low-Rank Allocation for Continual Learning: Xiequn Wang,

Zhan Zhuang,

Yu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xiequn and Zhuang, Zhan and Zhang, Yu}, title = {PLAN: Proactive Low-Rank Allocation for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2909-2918} }
EMoTive: Event-guided Trajectory Modeling for 3D Motion Estimation: Zengyu Wan,

Wei Zhai,

Yang Cao,

Zhengjun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wan_2025_ICCV, author = {Wan, Zengyu and Zhai, Wei and Cao, Yang and Zha, Zhengjun}, title = {EMoTive: Event-guided Trajectory Modeling for 3D Motion Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9342-9351} }
RobuSTereo: Robust Zero-Shot Stereo Matching under Adverse Weather: Yuran Wang,

Yingping Liang,

Yutao Hu,

Ying Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuran and Liang, Yingping and Hu, Yutao and Fu, Ying}, title = {RobuSTereo: Robust Zero-Shot Stereo Matching under Adverse Weather}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25134-25144} }
SemGes: Semantics-aware Co-Speech Gesture Generation using Semantic Coherence and Relevance Learning: Lanmiao Liu,

Esam Ghaleb,

Asli Ozyurek,

Zerrin Yumak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Lanmiao and Ghaleb, Esam and Ozyurek, Asli and Yumak, Zerrin}, title = {SemGes: Semantics-aware Co-Speech Gesture Generation using Semantic Coherence and Relevance Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13963-13973} }
From Sharp to Blur: Unsupervised Domain Adaptation for 2D Human Pose Estimation Under Extreme Motion Blur Using Event Cameras: Youngho Kim,

Hoonhee Cho,

Kuk-Jin Yoon; [pdf] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Youngho and Cho, Hoonhee and Yoon, Kuk-Jin}, title = {From Sharp to Blur: Unsupervised Domain Adaptation for 2D Human Pose Estimation Under Extreme Motion Blur Using Event Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9406-9417} }
HyPiDecoder: Hybrid Pixel Decoder for Efficient Segmentation and Detection: Fengzhe Zhou,

Humphrey Shi; [pdf]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Fengzhe and Shi, Humphrey}, title = {HyPiDecoder: Hybrid Pixel Decoder for Efficient Segmentation and Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22100-22109} }
DDB: Diffusion Driven Balancing to Address Spurious Correlations: Aryan Yazdan Parast,

Basim Azam,

Naveed Akhtar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parast_2025_ICCV, author = {Parast, Aryan Yazdan and Azam, Basim and Akhtar, Naveed}, title = {DDB: Diffusion Driven Balancing to Address Spurious Correlations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17526-17535} }
Feature Purification Matters: Suppressing Outlier Propagation for Training-Free Open-Vocabulary Semantic Segmentation: Shuo Jin,

Siyue Yu,

Bingfeng Zhang,

Mingjie Sun,

Yi Dong,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Shuo and Yu, Siyue and Zhang, Bingfeng and Sun, Mingjie and Dong, Yi and Xiao, Jimin}, title = {Feature Purification Matters: Suppressing Outlier Propagation for Training-Free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20291-20300} }
Motal: Unsupervised 3D Object Detection by Modality and Task-specific Knowledge Transfer: Hai Wu,

Hongwei Lin,

Xusheng Guo,

Xin Li,

Mingming Wang,

Cheng Wang,

Chenglu Wen; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Hai and Lin, Hongwei and Guo, Xusheng and Li, Xin and Wang, Mingming and Wang, Cheng and Wen, Chenglu}, title = {Motal: Unsupervised 3D Object Detection by Modality and Task-specific Knowledge Transfer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6284-6293} }
MMAIF: Multi-task and Multi-degradation All-in-One for Image Fusion with Language Guidance: Zihan Cao,

Yu Zhong,

Ziqi Wang,

Liang-Jian Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Zihan and Zhong, Yu and Wang, Ziqi and Deng, Liang-Jian}, title = {MMAIF: Multi-task and Multi-degradation All-in-One for Image Fusion with Language Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11744-11754} }
SparseLaneSTP: Leveraging Spatio-Temporal Priors with Sparse Transformers for 3D Lane Detection: Maximilian Pittner,

Joel Janai,

Mario Faigle,

Alexandru Paul Condurache; [pdf] [supp]
[bibtex]
@InProceedings{Pittner_2025_ICCV, author = {Pittner, Maximilian and Janai, Joel and Faigle, Mario and Condurache, Alexandru Paul}, title = {SparseLaneSTP: Leveraging Spatio-Temporal Priors with Sparse Transformers for 3D Lane Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29099-29109} }
MonSTeR: a Unified Model for Motion, Scene, Text Retrieval: Luca Collorone,

Matteo Gioia,

Massimiliano Pappa,

Paolo Leoni,

Giovanni Ficarra,

Or Litany,

Indro Spinelli,

Fabio Galasso; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Collorone_2025_ICCV, author = {Collorone, Luca and Gioia, Matteo and Pappa, Massimiliano and Leoni, Paolo and Ficarra, Giovanni and Litany, Or and Spinelli, Indro and Galasso, Fabio}, title = {MonSTeR: a Unified Model for Motion, Scene, Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10940-10949} }
Semantic Causality-Aware Vision-Based 3D Occupancy Prediction: Dubing Chen,

Huan Zheng,

Yucheng Zhou,

Xianfei Li,

Wenlong Liao,

Tao He,

Pai Peng,

Jianbing Shen; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Dubing and Zheng, Huan and Zhou, Yucheng and Li, Xianfei and Liao, Wenlong and He, Tao and Peng, Pai and Shen, Jianbing}, title = {Semantic Causality-Aware Vision-Based 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24878-24888} }
OmniSAM: Omnidirectional Segment Anything Model for UDA in Panoramic Semantic Segmentation: Ding Zhong,

Xu Zheng,

Chenfei Liao,

Yuanhuiyi Lyu,

Jialei Chen,

Shengyang Wu,

Linfeng Zhang,

Xuming Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Ding and Zheng, Xu and Liao, Chenfei and Lyu, Yuanhuiyi and Chen, Jialei and Wu, Shengyang and Zhang, Linfeng and Hu, Xuming}, title = {OmniSAM: Omnidirectional Segment Anything Model for UDA in Panoramic Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23892-23901} }
Event-Driven Storytelling with Multiple Lifelike Humans in a 3D Scene: Donggeun Lim,

Jinseok Bae,

Inwoo Hwang,

Seungmin Lee,

Hwanhee Lee,

Young Min Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lim_2025_ICCV, author = {Lim, Donggeun and Bae, Jinseok and Hwang, Inwoo and Lee, Seungmin and Lee, Hwanhee and Kim, Young Min}, title = {Event-Driven Storytelling with Multiple Lifelike Humans in a 3D Scene}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11654-11664} }
Probabilistic Inertial Poser (ProbIP): Uncertainty-aware Human Motion Modeling from Sparse Inertial Sensors: Min Kim,

Younho Jeon,

Sungho Jo; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Min and Jeon, Younho and Jo, Sungho}, title = {Probabilistic Inertial Poser (ProbIP): Uncertainty-aware Human Motion Modeling from Sparse Inertial Sensors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25893-25902} }
Bi-Level Optimization for Self-Supervised AI-Generated Face Detection: Mian Zou,

Nan Zhong,

Baosheng Yu,

Yibing Zhan,

Kede Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zou_2025_ICCV, author = {Zou, Mian and Zhong, Nan and Yu, Baosheng and Zhan, Yibing and Ma, Kede}, title = {Bi-Level Optimization for Self-Supervised AI-Generated Face Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18959-18968} }
RIOcc: Efficient Cross-Modal Fusion Transformer with Collaborative Feature Refinement for 3D Semantic Occupancy Prediction: Baojie Fan,

Xiaotian Li,

Yuhan Zhou,

Yuyu Jiang,

Jiandong Tian,

Huijie Fan; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Baojie and Li, Xiaotian and Zhou, Yuhan and Jiang, Yuyu and Tian, Jiandong and Fan, Huijie}, title = {RIOcc: Efficient Cross-Modal Fusion Transformer with Collaborative Feature Refinement for 3D Semantic Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25851-25861} }
Golden Noise for Diffusion Models: A Learning Framework: Zikai Zhou,

Shitong Shao,

Lichen Bai,

Shufei Zhang,

Zhiqiang Xu,

Bo Han,

Zeke Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zikai and Shao, Shitong and Bai, Lichen and Zhang, Shufei and Xu, Zhiqiang and Han, Bo and Xie, Zeke}, title = {Golden Noise for Diffusion Models: A Learning Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17688-17697} }
CharaConsist: Fine-Grained Consistent Character Generation: Mengyu Wang,

Henghui Ding,

Jianing Peng,

Yao Zhao,

Yunpeng Chen,

Yunchao Wei; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Mengyu and Ding, Henghui and Peng, Jianing and Zhao, Yao and Chen, Yunpeng and Wei, Yunchao}, title = {CharaConsist: Fine-Grained Consistent Character Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16058-16067} }
RetinexMCNet: A Memory Controller Dominated Network for Low-Light Video Enhancement Based on Retinex: Meiao Wang,

Xuejing Kang,

Yaxi Lu,

Jie Xu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Meiao and Kang, Xuejing and Lu, Yaxi and Xu, Jie}, title = {RetinexMCNet: A Memory Controller Dominated Network for Low-Light Video Enhancement Based on Retinex}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9716-9725} }
Learnable Fractional Reaction-Diffusion Dynamics for Under-Display ToF Imaging and Beyond: Xin Qiao,

Matteo Poggi,

Xing Wei,

Pengchao Deng,

Yanhui Zhou,

Stefano Mattoccia; [pdf] [supp]
[bibtex]
@InProceedings{Qiao_2025_ICCV, author = {Qiao, Xin and Poggi, Matteo and Wei, Xing and Deng, Pengchao and Zhou, Yanhui and Mattoccia, Stefano}, title = {Learnable Fractional Reaction-Diffusion Dynamics for Under-Display ToF Imaging and Beyond}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6080-6090} }
Anomaly Detection of Integrated Circuits Package Substrates Using the Large Vision Model SAIC: Dataset Construction, Methodology, and Application: Ruiyun Yu,

Bingyang Guo,

Haoyuan Li; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Ruiyun and Guo, Bingyang and Li, Haoyuan}, title = {Anomaly Detection of Integrated Circuits Package Substrates Using the Large Vision Model SAIC: Dataset Construction, Methodology, and Application}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22563-22574} }
Refer to Any Segmentation Mask Group With Vision-Language Prompts: Shengcao Cao,

Zijun Wei,

Jason Kuen,

Kangning Liu,

Lingzhi Zhang,

Jiuxiang Gu,

HyunJoon Jung,

Liang-Yan Gui,

Yu-Xiong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Shengcao and Wei, Zijun and Kuen, Jason and Liu, Kangning and Zhang, Lingzhi and Gu, Jiuxiang and Jung, HyunJoon and Gui, Liang-Yan and Wang, Yu-Xiong}, title = {Refer to Any Segmentation Mask Group With Vision-Language Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21853-21863} }
Integrating Visual Interpretation and Linguistic Reasoning for Geometric Problem Solving: Zixian Guo,

Ming Liu,

Qilong Wang,

Zhilong Ji,

Jinfeng Bai,

Lei Zhang,

Wangmeng Zuo; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Zixian and Liu, Ming and Wang, Qilong and Ji, Zhilong and Bai, Jinfeng and Zhang, Lei and Zuo, Wangmeng}, title = {Integrating Visual Interpretation and Linguistic Reasoning for Geometric Problem Solving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3988-3998} }
Dual-Rate Dynamic Teacher for Source-Free Domain Adaptive Object Detection: Qi He,

Xiao Wu,

Jun-Yan He,

Shuai Li; [pdf]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Qi and Wu, Xiao and He, Jun-Yan and Li, Shuai}, title = {Dual-Rate Dynamic Teacher for Source-Free Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2067-2076} }
How Do Multimodal Large Language Models Handle Complex Multimodal Reasoning? Placing Them in An Extensible Escape Game: Ziyue Wang,

Yurui Dong,

Fuwen Luo,

Minyuan Ruan,

Zhili Cheng,

Chi Chen,

Peng Li,

Yang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ziyue and Dong, Yurui and Luo, Fuwen and Ruan, Minyuan and Cheng, Zhili and Chen, Chi and Li, Peng and Liu, Yang}, title = {How Do Multimodal Large Language Models Handle Complex Multimodal Reasoning? Placing Them in An Extensible Escape Game}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4807-4817} }
UST-SSM: Unified Spatio-Temporal State Space Models for Point Cloud Video Modeling: Peiming Li,

Ziyi Wang,

Yulin Yuan,

Hong Liu,

Xiangming Meng,

Junsong Yuan,

Mengyuan Liu; [pdf]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Peiming and Wang, Ziyi and Yuan, Yulin and Liu, Hong and Meng, Xiangming and Yuan, Junsong and Liu, Mengyuan}, title = {UST-SSM: Unified Spatio-Temporal State Space Models for Point Cloud Video Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6738-6747} }
Recover Biological Structure from Sparse-View Diffraction Images with Neural Volumetric Prior: Renzhi He,

Haowen Zhou,

Yubei Chen,

Yi Xue; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Renzhi and Zhou, Haowen and Chen, Yubei and Xue, Yi}, title = {Recover Biological Structure from Sparse-View Diffraction Images with Neural Volumetric Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27771-27782} }
PropVG: End-to-End Proposal-Driven Visual Grounding with Multi-Granularity Discrimination: Ming Dai,

Wenxuan Cheng,

Jiedong Zhuang,

Jiang-jiang Liu,

Hongshen Zhao,

Zhenhua Feng,

Wankou Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2025_ICCV, author = {Dai, Ming and Cheng, Wenxuan and Zhuang, Jiedong and Liu, Jiang-jiang and Zhao, Hongshen and Feng, Zhenhua and Yang, Wankou}, title = {PropVG: End-to-End Proposal-Driven Visual Grounding with Multi-Granularity Discrimination}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7058-7068} }
StyleSRN: Scene Text Image Super-Resolution with Text Style Embedding: Shengrong Yuan,

Runmin Wang,

Ke Hao,

Xuqi Ma,

Changxin Gao,

Li Liu,

Nong Sang; [pdf] [supp]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Shengrong and Wang, Runmin and Hao, Ke and Ma, Xuqi and Gao, Changxin and Liu, Li and Sang, Nong}, title = {StyleSRN: Scene Text Image Super-Resolution with Text Style Embedding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18693-18702} }
SpatialSplat: Efficient Semantic 3D from Sparse Unposed Images: Yu Sheng,

Jiajun Deng,

Xinran Zhang,

Yu Zhang,

Bei Hua,

Yanyong Zhang,

Jianmin Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sheng_2025_ICCV, author = {Sheng, Yu and Deng, Jiajun and Zhang, Xinran and Zhang, Yu and Hua, Bei and Zhang, Yanyong and Ji, Jianmin}, title = {SpatialSplat: Efficient Semantic 3D from Sparse Unposed Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26404-26414} }
Learning Visual Proxy for Compositional Zero-Shot Learning: Shiyu Zhang,

Cheng Yan,

Yang Liu,

Chenchen Jing,

Lei Zhou,

Wenjun Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shiyu and Yan, Cheng and Liu, Yang and Jing, Chenchen and Zhou, Lei and Wang, Wenjun}, title = {Learning Visual Proxy for Compositional Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2793-2802} }
Processing and acquisition traces in visual encoders: What does CLIP know about your camera?: Ryan Ramos,

Vladan Stojnić,

Giorgos Kordopatis-Zilos,

Yuta Nakashima,

Giorgos Tolias,

Noa Garcia; [pdf] [supp]
[bibtex]
@InProceedings{Ramos_2025_ICCV, author = {Ramos, Ryan and Stojni\'c, Vladan and Kordopatis-Zilos, Giorgos and Nakashima, Yuta and Tolias, Giorgos and Garcia, Noa}, title = {Processing and acquisition traces in visual encoders: What does CLIP know about your camera?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17056-17066} }
Holistic Tokenizer for Autoregressive Image Generation: Anlin Zheng,

Haochen Wang,

Yucheng Zhao,

Weipeng Deng,

Tiancai Wang,

Xiangyu Zhang,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Anlin and Wang, Haochen and Zhao, Yucheng and Deng, Weipeng and Wang, Tiancai and Zhang, Xiangyu and Qi, Xiaojuan}, title = {Holistic Tokenizer for Autoregressive Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16916-16926} }
ConsistentCity: Semantic Flow-guided Occupancy DiT for Temporally Consistent Driving Scene Synthesis: Benjin Zhu,

Xiaogang Wang,

Hongsheng Li; [pdf]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Benjin and Wang, Xiaogang and Li, Hongsheng}, title = {ConsistentCity: Semantic Flow-guided Occupancy DiT for Temporally Consistent Driving Scene Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26382-26392} }
G2PDiffusion: Cross-Species Genotype-to-Phenotype Prediction via Evolutionary Diffusion: Mengdi Liu,

Zhangyang Gao,

Hong Chang,

Stan Z. Li,

Shiguang Shan,

Xilin Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Mengdi and Gao, Zhangyang and Chang, Hong and Li, Stan Z. and Shan, Shiguang and Chen, Xilin}, title = {G2PDiffusion: Cross-Species Genotype-to-Phenotype Prediction via Evolutionary Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20705-20714} }
PathFinder: A Multi-Modal Multi-Agent System for Medical Diagnostic Decision-Making Applied to Histopathology: Fatemeh Ghezloo,

Mehmet Saygin Seyfioglu,

Rustin Soraki,

Wisdom O. Ikezogwo,

Beibin Li,

Tejoram Vivekanandan,

Joann G. Elmore,

Ranjay Krishna,

Linda Shapiro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghezloo_2025_ICCV, author = {Ghezloo, Fatemeh and Seyfioglu, Mehmet Saygin and Soraki, Rustin and Ikezogwo, Wisdom O. and Li, Beibin and Vivekanandan, Tejoram and Elmore, Joann G. and Krishna, Ranjay and Shapiro, Linda}, title = {PathFinder: A Multi-Modal Multi-Agent System for Medical Diagnostic Decision-Making Applied to Histopathology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23431-23441} }
VLABench: A Large-Scale Benchmark for Language-Conditioned Robotics Manipulation with Long-Horizon Reasoning Tasks: Shiduo Zhang,

Zhe Xu,

Peiju Liu,

Xiaopeng Yu,

Yuan Li,

Qinghui Gao,

Zhaoye Fei,

Zhangyue Yin,

Zuxuan Wu,

Yu-Gang Jiang,

Xipeng Qiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shiduo and Xu, Zhe and Liu, Peiju and Yu, Xiaopeng and Li, Yuan and Gao, Qinghui and Fei, Zhaoye and Yin, Zhangyue and Wu, Zuxuan and Jiang, Yu-Gang and Qiu, Xipeng}, title = {VLABench: A Large-Scale Benchmark for Language-Conditioned Robotics Manipulation with Long-Horizon Reasoning Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11142-11152} }
MolParser: End-to-end Visual Recognition of Molecule Structures in the Wild: Xi Fang,

Jiankun Wang,

Xiaochen Cai,

Shangqian Chen,

Shuwen Yang,

Haoyi Tao,

Nan Wang,

Lin Yao,

Linfeng Zhang,

Guolin Ke; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Xi and Wang, Jiankun and Cai, Xiaochen and Chen, Shangqian and Yang, Shuwen and Tao, Haoyi and Wang, Nan and Yao, Lin and Zhang, Linfeng and Ke, Guolin}, title = {MolParser: End-to-end Visual Recognition of Molecule Structures in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24528-24538} }
UPRE: Zero-Shot Domain Adaptation for Object Detection via Unified Prompt and Representation Enhancement: Xiao Zhang,

Fei Wei,

Yong Wang,

Wenda Zhao,

Feiyi Li,

Xiangxiang Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiao and Wei, Fei and Wang, Yong and Zhao, Wenda and Li, Feiyi and Chu, Xiangxiang}, title = {UPRE: Zero-Shot Domain Adaptation for Object Detection via Unified Prompt and Representation Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {508-518} }
Heavy Labels Out! Dataset Distillation with Label Space Lightening: Ruonan Yu,

Songhua Liu,

Zigeng Chen,

Jingwen Ye,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Ruonan and Liu, Songhua and Chen, Zigeng and Ye, Jingwen and Wang, Xinchao}, title = {Heavy Labels Out! Dataset Distillation with Label Space Lightening}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5017-5026} }
VideoMiner: Iteratively Grounding Key Frames of Hour-Long Videos via Tree-based Group Relative Policy Optimization: Xinye Cao,

Hongcan Guo,

Jiawen Qian,

Guoshun Nan,

Chao Wang,

Yuqi Pan,

Tianhao Hou,

Xiaojuan Wang,

Yutong Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Xinye and Guo, Hongcan and Qian, Jiawen and Nan, Guoshun and Wang, Chao and Pan, Yuqi and Hou, Tianhao and Wang, Xiaojuan and Gao, Yutong}, title = {VideoMiner: Iteratively Grounding Key Frames of Hour-Long Videos via Tree-based Group Relative Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23773-23783} }
Hierarchical Visual Prompt Learning for Continual Video Instance Segmentation: Jiahua Dong,

Hui Yin,

Wenqi Liang,

Hanbin Zhao,

Henghui Ding,

Nicu Sebe,

Salman Khan,

Fahad Shahbaz Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Jiahua and Yin, Hui and Liang, Wenqi and Zhao, Hanbin and Ding, Henghui and Sebe, Nicu and Khan, Salman and Khan, Fahad Shahbaz}, title = {Hierarchical Visual Prompt Learning for Continual Video Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11829-11839} }
FROSS: Faster-Than-Real-Time Online 3D Semantic Scene Graph Generation from RGB-D Images: Hao-Yu Hou,

Chun-Yi Lee,

Motoharu Sonogashira,

Yasutomo Kawanishi; [pdf] [supp]
[bibtex]
@InProceedings{Hou_2025_ICCV, author = {Hou, Hao-Yu and Lee, Chun-Yi and Sonogashira, Motoharu and Kawanishi, Yasutomo}, title = {FROSS: Faster-Than-Real-Time Online 3D Semantic Scene Graph Generation from RGB-D Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28818-28827} }
HVPUNet: Hybrid-Voxel Point-cloud Upsampling Network: Juhyung Ha,

Vibhas Kumar Vats,

Soon-heung Jung,

Alimoor Reza,

David J. Crandall; [pdf]
[bibtex]
@InProceedings{Ha_2025_ICCV, author = {Ha, Juhyung and Vats, Vibhas Kumar and Jung, Soon-heung and Reza, Alimoor and Crandall, David J.}, title = {HVPUNet: Hybrid-Voxel Point-cloud Upsampling Network}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29153-29162} }
MorphoGen: Efficient Unconditional Generation of Long-Range Projection Neuronal Morphology via a Global-to-Local Framework: Tianfang Zhu,

Hongyang Zhou,

Anan Li; [pdf]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Tianfang and Zhou, Hongyang and Li, Anan}, title = {MorphoGen: Efficient Unconditional Generation of Long-Range Projection Neuronal Morphology via a Global-to-Local Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13021-13031} }
REDUCIO! Generating 1K Video within 16 Seconds using Extremely Compressed Motion Latents: Rui Tian,

Qi Dai,

Jianmin Bao,

Kai Qiu,

Yifan Yang,

Chong Luo,

Zuxuan Wu,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_ICCV, author = {Tian, Rui and Dai, Qi and Bao, Jianmin and Qiu, Kai and Yang, Yifan and Luo, Chong and Wu, Zuxuan and Jiang, Yu-Gang}, title = {REDUCIO! Generating 1K Video within 16 Seconds using Extremely Compressed Motion Latents}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19237-19247} }
EgoAgent: A Joint Predictive Agent Model in Egocentric Worlds: Lu Chen,

Yizhou Wang,

Shixiang Tang,

Qianhong Ma,

Tong He,

Wanli Ouyang,

Xiaowei Zhou,

Hujun Bao,

Sida Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Lu and Wang, Yizhou and Tang, Shixiang and Ma, Qianhong and He, Tong and Ouyang, Wanli and Zhou, Xiaowei and Bao, Hujun and Peng, Sida}, title = {EgoAgent: A Joint Predictive Agent Model in Egocentric Worlds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6970-6980} }
Growing a Twig to Accelerate Large Vision-Language Models: Zhenwei Shao,

Mingyang Wang,

Zhou Yu,

Wenwen Pan,

Yan Yang,

Tao Wei,

Hongyuan Zhang,

Ning Mao,

Wei Chen,

Jun Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2025_ICCV, author = {Shao, Zhenwei and Wang, Mingyang and Yu, Zhou and Pan, Wenwen and Yang, Yan and Wei, Tao and Zhang, Hongyuan and Mao, Ning and Chen, Wei and Yu, Jun}, title = {Growing a Twig to Accelerate Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20064-20074} }
Is Less More? Exploring Token Condensation as Training-free Test-time Adaptation: Zixin Wang,

Dong Gong,

Sen Wang,

Zi Huang,

Yadan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zixin and Gong, Dong and Wang, Sen and Huang, Zi and Luo, Yadan}, title = {Is Less More? Exploring Token Condensation as Training-free Test-time Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {144-154} }
Structure-Guided Diffusion Models for High-Fidelity Portrait Shadow Removal: Wanchang Yu,

Qing Zhang,

Rongjia Zheng,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Wanchang and Zhang, Qing and Zheng, Rongjia and Zheng, Wei-Shi}, title = {Structure-Guided Diffusion Models for High-Fidelity Portrait Shadow Removal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11675-11684} }
Controllable Latent Space Augmentation for Digital Pathology: Sofiène Boutaj,

Marin Scalbert,

Pierre Marza,

Florent Couzinie-Devy,

Maria Vakalopoulou,

Stergios Christodoulidis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Boutaj_2025_ICCV, author = {Boutaj, Sofi\`ene and Scalbert, Marin and Marza, Pierre and Couzinie-Devy, Florent and Vakalopoulou, Maria and Christodoulidis, Stergios}, title = {Controllable Latent Space Augmentation for Digital Pathology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22165-22174} }
Global Motion Corresponder for 3D Point-Based Scene Interpolation under Large Motion: Junru Lin,

Chirag Vashist,

Mikaela Angelina Uy,

Colton Stearns,

Xuan Luo,

Leonidas Guibas,

Ke Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Junru and Vashist, Chirag and Uy, Mikaela Angelina and Stearns, Colton and Luo, Xuan and Guibas, Leonidas and Li, Ke}, title = {Global Motion Corresponder for 3D Point-Based Scene Interpolation under Large Motion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7884-7893} }
WINS: Winograd Structured Pruning for Fast Winograd Convolution: Cheonjun Park,

Hyun Jae Oh,

Mincheol Park,

Hyunchan Moon,

Minsik Kim,

Suhyun Kim,

Myung Kuk Yoon,

Won Woo Ro; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Cheonjun and Oh, Hyun Jae and Park, Mincheol and Moon, Hyunchan and Kim, Minsik and Kim, Suhyun and Yoon, Myung Kuk and Ro, Won Woo}, title = {WINS: Winograd Structured Pruning for Fast Winograd Convolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22477-22487} }
Revelio: Interpreting and leveraging semantic information in diffusion models: Dahye Kim,

Xavier Thomas,

Deepti Ghadiyaram; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Dahye and Thomas, Xavier and Ghadiyaram, Deepti}, title = {Revelio: Interpreting and leveraging semantic information in diffusion models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4659-4669} }
UniRes: Universal Image Restoration for Complex Degradations: Mo Zhou,

Keren Ye,

Mauricio Delbracio,

Peyman Milanfar,

Vishal M. Patel,

Hossein Talebi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Mo and Ye, Keren and Delbracio, Mauricio and Milanfar, Peyman and Patel, Vishal M. and Talebi, Hossein}, title = {UniRes: Universal Image Restoration for Complex Degradations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13237-13247} }
RayGaussX: Accelerating Gaussian-Based Ray Marching for Real-Time and High-Quality Novel View Synthesis: Hugo Blanc,

Jean-Emmanuel Deschaud,

Alexis Paljic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Blanc_2025_ICCV, author = {Blanc, Hugo and Deschaud, Jean-Emmanuel and Paljic, Alexis}, title = {RayGaussX: Accelerating Gaussian-Based Ray Marching for Real-Time and High-Quality Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27575-27584} }
CoopTrack: Exploring End-to-End Learning for Efficient Cooperative Sequential Perception: Jiaru Zhong,

Jiahao Wang,

Jiahui Xu,

Xiaofan Li,

Zaiqing Nie,

Haibao Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Jiaru and Wang, Jiahao and Xu, Jiahui and Li, Xiaofan and Nie, Zaiqing and Yu, Haibao}, title = {CoopTrack: Exploring End-to-End Learning for Efficient Cooperative Sequential Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26954-26965} }
ContraGS: Codebook-Condensed and Trainable Gaussian Splatting for Fast, Memory-Efficient Reconstruction: Sankeerth Durvasula,

Sharanshangar Muhunthan,

Zain Moustafa,

Richard Chen,

Ruofan Liang,

Yushi Guan,

Nilesh Ahuja,

Nilesh Jain,

Selvakumar Panneer,

Nandita Vijaykumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Durvasula_2025_ICCV, author = {Durvasula, Sankeerth and Muhunthan, Sharanshangar and Moustafa, Zain and Chen, Richard and Liang, Ruofan and Guan, Yushi and Ahuja, Nilesh and Jain, Nilesh and Panneer, Selvakumar and Vijaykumar, Nandita}, title = {ContraGS: Codebook-Condensed and Trainable Gaussian Splatting for Fast, Memory-Efficient Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28935-28945} }
NATRA: Noise-Agnostic Framework for Trajectory Prediction with Noisy Observations: Rongqing Li,

Changsheng Li,

Ruilin Lv,

Yuhang Li,

Yang Gao,

Xiaolu Zhang,

Jun Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Rongqing and Li, Changsheng and Lv, Ruilin and Li, Yuhang and Gao, Yang and Zhang, Xiaolu and Zhou, Jun}, title = {NATRA: Noise-Agnostic Framework for Trajectory Prediction with Noisy Observations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27872-27884} }
Your Text Encoder Can Be An Object-Level Watermarking Controller: Naresh Kumar Devulapally,

Mingzhen Huang,

Vishal Asnani,

Shruti Agarwal,

Siwei Lyu,

Vishnu Suresh Lokhande; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Devulapally_2025_ICCV, author = {Devulapally, Naresh Kumar and Huang, Mingzhen and Asnani, Vishal and Agarwal, Shruti and Lyu, Siwei and Lokhande, Vishnu Suresh}, title = {Your Text Encoder Can Be An Object-Level Watermarking Controller}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16576-16585} }
Spatial-Temporal Aware Visuomotor Diffusion Policy Learning: Zhenyang Liu,

Yikai Wang,

Kuanning Wang,

Longfei Liang,

Xiangyang Xue,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Zhenyang and Wang, Yikai and Wang, Kuanning and Liang, Longfei and Xue, Xiangyang and Fu, Yanwei}, title = {Spatial-Temporal Aware Visuomotor Diffusion Policy Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7122-7131} }
ModalTune: Fine-Tuning Slide-Level Foundation Models with Multi-Modal Information for Multi-task Learning in Digital Pathology: Vishwesh Ramanathan,

Tony Xu,

Pushpak Pati,

Faruk Ahmed,

Maged Goubran,

Anne L. Martel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ramanathan_2025_ICCV, author = {Ramanathan, Vishwesh and Xu, Tony and Pati, Pushpak and Ahmed, Faruk and Goubran, Maged and Martel, Anne L.}, title = {ModalTune: Fine-Tuning Slide-Level Foundation Models with Multi-Modal Information for Multi-task Learning in Digital Pathology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23912-23923} }
Harnessing Vision Foundation Models for High-Performance, Training-Free Open Vocabulary Segmentation: Yuheng Shi,

Minjing Dong,

Chang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Yuheng and Dong, Minjing and Xu, Chang}, title = {Harnessing Vision Foundation Models for High-Performance, Training-Free Open Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23487-23497} }
FixTalk: Taming Identity Leakage for High-Quality Talking Head Generation in Extreme Cases: Shuai Tan,

Bill Gong,

Bin Ji,

Ye Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2025_ICCV, author = {Tan, Shuai and Gong, Bill and Ji, Bin and Pan, Ye}, title = {FixTalk: Taming Identity Leakage for High-Quality Talking Head Generation in Extreme Cases}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24-36} }
MinCD-PnP: Learning 2D-3D Correspondences with Approximate Blind PnP: Pei An,

Jiaqi Yang,

Muyao Peng,

You Yang,

Qiong Liu,

Xiaolin Wu,

Liangliang Nan; [pdf] [supp]
[bibtex]
@InProceedings{An_2025_ICCV, author = {An, Pei and Yang, Jiaqi and Peng, Muyao and Yang, You and Liu, Qiong and Wu, Xiaolin and Nan, Liangliang}, title = {MinCD-PnP: Learning 2D-3D Correspondences with Approximate Blind PnP}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26519-26528} }
Attention to Trajectory: Trajectory-Aware Open-Vocabulary Tracking: Yunhao Li,

Yifan Jiao,

Dan Meng,

Heng Fan,

Libo Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yunhao and Jiao, Yifan and Meng, Dan and Fan, Heng and Zhang, Libo}, title = {Attention to Trajectory: Trajectory-Aware Open-Vocabulary Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14390-14398} }
Hallucinatory Image Tokens: A Training-free EAZY Approach to Detecting and Mitigating Object Hallucinations in LVLMs: Liwei Che,

Tony Qingze Liu,

Jing Jia,

Weiyi Qin,

Ruixiang Tang,

Vladimir Pavlovic; [pdf] [supp]
[bibtex]
@InProceedings{Che_2025_ICCV, author = {Che, Liwei and Liu, Tony Qingze and Jia, Jing and Qin, Weiyi and Tang, Ruixiang and Pavlovic, Vladimir}, title = {Hallucinatory Image Tokens: A Training-free EAZY Approach to Detecting and Mitigating Object Hallucinations in LVLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21635-21644} }
CVPT: Cross Visual Prompt Tuning: Lingyun Huang,

Jianxu Mao,

Junfei Yi,

Ziming Tao,

Yaonan Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Lingyun and Mao, Jianxu and Yi, Junfei and Tao, Ziming and Wang, Yaonan}, title = {CVPT: Cross Visual Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {848-858} }
On the Robustness Tradeoff in Fine-Tuning: Kunyang Li,

Jean-Charles Noirot Ferrand,

Ryan Sheatsley,

Blaine Hoak,

Yohan Beugin,

Eric Pauley,

Patrick McDaniel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Kunyang and Ferrand, Jean-Charles Noirot and Sheatsley, Ryan and Hoak, Blaine and Beugin, Yohan and Pauley, Eric and McDaniel, Patrick}, title = {On the Robustness Tradeoff in Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4898-4907} }
Lay2Story: Extending Diffusion Transformers for Layout-Togglable Story Generation: Ao Ma,

Jiasong Feng,

Ke Cao,

Jing Wang,

Yun Wang,

Quanwei Zhang,

Zhanjie Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Ao and Feng, Jiasong and Cao, Ke and Wang, Jing and Wang, Yun and Zhang, Quanwei and Zhang, Zhanjie}, title = {Lay2Story: Extending Diffusion Transformers for Layout-Togglable Story Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16102-16111} }
Unfolding-Associative Encoder-Decoder Network with Progressive Alignment for Pansharpening: Shijie Fang,

Hongping Gan; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Shijie and Gan, Hongping}, title = {Unfolding-Associative Encoder-Decoder Network with Progressive Alignment for Pansharpening}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13651-13661} }
DeepMesh: Auto-Regressive Artist-mesh Creation with Reinforcement Learning: Ruowen Zhao,

Junliang Ye,

Zhengyi Wang,

Guangce Liu,

Yiwen Chen,

Yikai Wang,

Jun Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Ruowen and Ye, Junliang and Wang, Zhengyi and Liu, Guangce and Chen, Yiwen and Wang, Yikai and Zhu, Jun}, title = {DeepMesh: Auto-Regressive Artist-mesh Creation with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10612-10623} }
VisRL: Intention-Driven Visual Perception via Reinforced Reasoning: Zhangquan Chen,

Xufang Luo,

Dongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Zhangquan and Luo, Xufang and Li, Dongsheng}, title = {VisRL: Intention-Driven Visual Perception via Reinforced Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2545-2555} }
PINO: Person-Interaction Noise Optimization for Long-Duration and Customizable Motion Generation of Arbitrary-Sized Groups: Sakuya Ota,

Qing Yu,

Kent Fujiwara,

Satoshi Ikehata,

Ikuro Sato; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ota_2025_ICCV, author = {Ota, Sakuya and Yu, Qing and Fujiwara, Kent and Ikehata, Satoshi and Sato, Ikuro}, title = {PINO: Person-Interaction Noise Optimization for Long-Duration and Customizable Motion Generation of Arbitrary-Sized Groups}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10676-10685} }
MagicDrive-V2: High-Resolution Long Video Generation for Autonomous Driving with Adaptive Control: Ruiyuan Gao,

Kai Chen,

Bo Xiao,

Lanqing Hong,

Zhenguo Li,

Qiang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Ruiyuan and Chen, Kai and Xiao, Bo and Hong, Lanqing and Li, Zhenguo and Xu, Qiang}, title = {MagicDrive-V2: High-Resolution Long Video Generation for Autonomous Driving with Adaptive Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28135-28144} }
Communication-Efficient Multi-Vehicle Collaborative Semantic Segmentation via Sparse 3D Gaussian Sharing: Tianyu Hong,

Xiaobo Zhou,

Wenkai Hu,

Qi Xie,

Zhihui Ke,

Tie Qiu; [pdf] [supp]
[bibtex]
@InProceedings{Hong_2025_ICCV, author = {Hong, Tianyu and Zhou, Xiaobo and Hu, Wenkai and Xie, Qi and Ke, Zhihui and Qiu, Tie}, title = {Communication-Efficient Multi-Vehicle Collaborative Semantic Segmentation via Sparse 3D Gaussian Sharing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28622-28631} }
Category-Specific Selective Feature Enhancement for Long-Tailed Multi-Label Image Classification: Ruiqi Du,

Xu Tang,

Xiangrong Zhang,

Jingjing Ma; [pdf] [supp]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Ruiqi and Tang, Xu and Zhang, Xiangrong and Ma, Jingjing}, title = {Category-Specific Selective Feature Enhancement for Long-Tailed Multi-Label Image Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3757-3766} }
Sparse-Dense Side-Tuner for efficient Video Temporal Grounding: David Pujol-Perich,

Sergio Escalera,

Albert Clapés; [pdf] [supp]
[bibtex]
@InProceedings{Pujol-Perich_2025_ICCV, author = {Pujol-Perich, David and Escalera, Sergio and Clap\'es, Albert}, title = {Sparse-Dense Side-Tuner for efficient Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21515-21524} }
Corvid: Improving Multimodal Large Language Models Towards Chain-of-Thought Reasoning: Jingjing Jiang,

Chao Ma,

Xurui Song,

Hanwang Zhang,

Jun Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Jingjing and Ma, Chao and Song, Xurui and Zhang, Hanwang and Luo, Jun}, title = {Corvid: Improving Multimodal Large Language Models Towards Chain-of-Thought Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3034-3046} }
CompCap: Improving Multimodal Large Language Models with Composite Captions: Xiaohui Chen,

Satya Narayan Shukla,

Mahmoud Azab,

Aashu Singh,

Qifan Wang,

David Yang,

ShengYun Peng,

Hanchao Yu,

Shen Yan,

Xuewen Zhang,

Baosheng He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Xiaohui and Shukla, Satya Narayan and Azab, Mahmoud and Singh, Aashu and Wang, Qifan and Yang, David and Peng, ShengYun and Yu, Hanchao and Yan, Shen and Zhang, Xuewen and He, Baosheng}, title = {CompCap: Improving Multimodal Large Language Models with Composite Captions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23582-23592} }
COSMO: Combination of Selective Memorization for Low-cost Vision-and-Language Navigation: Siqi Zhang,

Yanyuan Qiao,

Qunbo Wang,

Zike Yan,

Qi Wu,

Zhihua Wei,

Jing Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Siqi and Qiao, Yanyuan and Wang, Qunbo and Yan, Zike and Wu, Qi and Wei, Zhihua and Liu, Jing}, title = {COSMO: Combination of Selective Memorization for Low-cost Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5511-5522} }
STI-Bench: Are MLLMs Ready for Precise Spatial-Temporal World Understanding?: Yun Li,

Yiming Zhang,

Tao Lin,

Xiangrui Liu,

Wenxiao Cai,

Zheng Liu,

Bo Zhao; [pdf]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yun and Zhang, Yiming and Lin, Tao and Liu, Xiangrui and Cai, Wenxiao and Liu, Zheng and Zhao, Bo}, title = {STI-Bench: Are MLLMs Ready for Precise Spatial-Temporal World Understanding?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5622-5632} }
Describe, Adapt and Combine: Empowering CLIP Encoders for Open-set 3D Object Retrieval: Zhichuan Wang,

Yang Zhou,

Zhe Liu,

Rui Yu,

Song Bai,

Yulong Wang,

Xinwei He,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zhichuan and Zhou, Yang and Liu, Zhe and Yu, Rui and Bai, Song and Wang, Yulong and He, Xinwei and Bai, Xiang}, title = {Describe, Adapt and Combine: Empowering CLIP Encoders for Open-set 3D Object Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21026-21036} }
Embodied Image Captioning: Self-supervised Learning Agents for Spatially Coherent Image Descriptions: Tommaso Galliena,

Tommaso Apicella,

Stefano Rosa,

Pietro Morerio,

Alessio Del Bue,

Lorenzo Natale; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Galliena_2025_ICCV, author = {Galliena, Tommaso and Apicella, Tommaso and Rosa, Stefano and Morerio, Pietro and Del Bue, Alessio and Natale, Lorenzo}, title = {Embodied Image Captioning: Self-supervised Learning Agents for Spatially Coherent Image Descriptions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24370-24379} }
CAD-Assistant: Tool-Augmented VLLMs as Generic CAD Task Solvers: Dimitrios Mallis,

Ahmet Serda Karadeniz,

Sebastian Cavada,

Danila Rukhovich,

Niki Foteinopoulou,

Kseniya Cherenkova,

Anis Kacem,

Djamila Aouada; [pdf] [supp]
[bibtex]
@InProceedings{Mallis_2025_ICCV, author = {Mallis, Dimitrios and Karadeniz, Ahmet Serda and Cavada, Sebastian and Rukhovich, Danila and Foteinopoulou, Niki and Cherenkova, Kseniya and Kacem, Anis and Aouada, Djamila}, title = {CAD-Assistant: Tool-Augmented VLLMs as Generic CAD Task Solvers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7284-7294} }
Fast Image Super-Resolution via Consistency Rectified Flow: Jiaqi Xu,

Wenbo Li,

Haoze Sun,

Fan Li,

Zhixin Wang,

Long Peng,

Jingjing Ren,

Haoran Yang,

Xiaowei Hu,

Renjing Pei,

Pheng-Ann Heng; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Jiaqi and Li, Wenbo and Sun, Haoze and Li, Fan and Wang, Zhixin and Peng, Long and Ren, Jingjing and Yang, Haoran and Hu, Xiaowei and Pei, Renjing and Heng, Pheng-Ann}, title = {Fast Image Super-Resolution via Consistency Rectified Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11755-11765} }
Adapt Foundational Segmentation Models with Heterogeneous Searching Space: Li Yi,

Jie Hu,

Songan Zhang,

Guannan Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Yi_2025_ICCV, author = {Yi, Li and Hu, Jie and Zhang, Songan and Jiang, Guannan}, title = {Adapt Foundational Segmentation Models with Heterogeneous Searching Space}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23364-23373} }
Adversarial Exploitation of Data Diversity Improves Visual Localization: Sihang Li,

Siqi Tan,

Bowen Chang,

Jing Zhang,

Chen Feng,

Yiming Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Sihang and Tan, Siqi and Chang, Bowen and Zhang, Jing and Feng, Chen and Li, Yiming}, title = {Adversarial Exploitation of Data Diversity Improves Visual Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26848-26858} }
Learning Implicit Features with Flow-Infused Transformations for Realistic Virtual Try-On: Delong Zhang,

Qiwei Huang,

Yang Sun,

Yuanliu Liu,

Wei-Shi Zheng,

Pengfei Xiong,

Wei Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Delong and Huang, Qiwei and Sun, Yang and Liu, Yuanliu and Zheng, Wei-Shi and Xiong, Pengfei and Zhang, Wei}, title = {Learning Implicit Features with Flow-Infused Transformations for Realistic Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18736-18745} }
BoxDreamer: Dreaming Box Corners for Generalizable Object Pose Estimation: Yuanhong Yu,

Xingyi He,

Chen Zhao,

Junhao Yu,

Jiaqi Yang,

Ruizhen Hu,

Yujun Shen,

Xing Zhu,

Xiaowei Zhou,

Sida Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Yuanhong and He, Xingyi and Zhao, Chen and Yu, Junhao and Yang, Jiaqi and Hu, Ruizhen and Shen, Yujun and Zhu, Xing and Zhou, Xiaowei and Peng, Sida}, title = {BoxDreamer: Dreaming Box Corners for Generalizable Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9374-9384} }
LawDIS: Language-Window-based Controllable Dichotomous Image Segmentation: Xinyu Yan,

Meijun Sun,

Ge-Peng Ji,

Fahad Shahbaz Khan,

Salman Khan,

Deng-Ping Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Xinyu and Sun, Meijun and Ji, Ge-Peng and Khan, Fahad Shahbaz and Khan, Salman and Fan, Deng-Ping}, title = {LawDIS: Language-Window-based Controllable Dichotomous Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23902-23911} }
3DGraphLLM: Combining Semantic Graphs and Large Language Models for 3D Scene Understanding: Tatiana Zemskova,

Dmitry Yudin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zemskova_2025_ICCV, author = {Zemskova, Tatiana and Yudin, Dmitry}, title = {3DGraphLLM: Combining Semantic Graphs and Large Language Models for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8885-8895} }
FineMotion: A Dataset and Benchmark with both Spatial and Temporal Annotation for Fine-grained Motion Generation and Editing: Bizhu Wu,

Jinheng Xie,

Meidan Ding,

Zhe Kong,

Jianfeng Ren,

Ruibin Bai,

Rong Qu,

Linlin Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Bizhu and Xie, Jinheng and Ding, Meidan and Kong, Zhe and Ren, Jianfeng and Bai, Ruibin and Qu, Rong and Shen, Linlin}, title = {FineMotion: A Dataset and Benchmark with both Spatial and Temporal Annotation for Fine-grained Motion Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13837-13846} }
MC-Bench: A Benchmark for Multi-Context Visual Grounding in the Era of MLLMs: Yunqiu Xu,

Linchao Zhu,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Yunqiu and Zhu, Linchao and Yang, Yi}, title = {MC-Bench: A Benchmark for Multi-Context Visual Grounding in the Era of MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17675-17687} }
What Makes for Text to 360-degree Panorama Generation with Stable Diffusion?: Jinhong Ni,

Chang-Bin Zhang,

Qiang Zhang,

Jing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2025_ICCV, author = {Ni, Jinhong and Zhang, Chang-Bin and Zhang, Qiang and Zhang, Jing}, title = {What Makes for Text to 360-degree Panorama Generation with Stable Diffusion?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16555-16564} }
CHORDS: Diffusion Sampling Accelerator with Multi-core Hierarchical ODE Solvers: Jiaqi Han,

Haotian Ye,

Puheng Li,

Minkai Xu,

James Zou,

Stefano Ermon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Jiaqi and Ye, Haotian and Li, Puheng and Xu, Minkai and Zou, James and Ermon, Stefano}, title = {CHORDS: Diffusion Sampling Accelerator with Multi-core Hierarchical ODE Solvers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19386-19395} }
HERMES: temporal-coHERent long-forM understanding with Episodes and Semantics: Gueter Josmy Faure,

Jia-Fong Yeh,

Min-Hung Chen,

Hung-Ting Su,

Shang-Hong Lai,

Winston H. Hsu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Faure_2025_ICCV, author = {Faure, Gueter Josmy and Yeh, Jia-Fong and Chen, Min-Hung and Su, Hung-Ting and Lai, Shang-Hong and Hsu, Winston H.}, title = {HERMES: temporal-coHERent long-forM understanding with Episodes and Semantics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22911-22921} }
CoralSRT: Revisiting Coral Reef Semantic Segmentation by Feature Rectification via Self-supervised Guidance: Ziqiang Zheng,

Yuk-Kwan Wong,

Binh-Son Hua,

Jianbo Shi,

Sai-Kit Yeung; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Ziqiang and Wong, Yuk-Kwan and Hua, Binh-Son and Shi, Jianbo and Yeung, Sai-Kit}, title = {CoralSRT: Revisiting Coral Reef Semantic Segmentation by Feature Rectification via Self-supervised Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19967-19977} }
VGMamba: Attribute-to-Location Clue Reasoning for Quantity-Agnostic 3D Visual Grounding: Yihang Zhu,

Jinhao Zhang,

Yuxuan Wang,

Aming Wu,

Cheng Deng; [pdf]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yihang and Zhang, Jinhao and Wang, Yuxuan and Wu, Aming and Deng, Cheng}, title = {VGMamba: Attribute-to-Location Clue Reasoning for Quantity-Agnostic 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5295-5304} }
Decouple and Track: Benchmarking and Improving Video Diffusion Transformers For Motion Transfer: Qingyu Shi,

Jianzong Wu,

Jinbin Bai,

Jiangning Zhang,

Lu Qi,

Yunhai Tong,

Xiangtai Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Qingyu and Wu, Jianzong and Bai, Jinbin and Zhang, Jiangning and Qi, Lu and Tong, Yunhai and Li, Xiangtai}, title = {Decouple and Track: Benchmarking and Improving Video Diffusion Transformers For Motion Transfer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10995-11005} }
MaskControl: Spatio-Temporal Control for Masked Motion Synthesis: Ekkasit Pinyoanuntapong,

Muhammad Saleem,

Korrawe Karunratanakul,

Pu Wang,

Hongfei Xue,

Chen Chen,

Chuan Guo,

Junli Cao,

Jian Ren,

Sergey Tulyakov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pinyoanuntapong_2025_ICCV, author = {Pinyoanuntapong, Ekkasit and Saleem, Muhammad and Karunratanakul, Korrawe and Wang, Pu and Xue, Hongfei and Chen, Chen and Guo, Chuan and Cao, Junli and Ren, Jian and Tulyakov, Sergey}, title = {MaskControl: Spatio-Temporal Control for Masked Motion Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9955-9965} }
Evidential Knowledge Distillation: Liangyu Xiang,

Junyu Gao,

Changsheng Xu; [pdf] [supp]
[bibtex]
@InProceedings{Xiang_2025_ICCV, author = {Xiang, Liangyu and Gao, Junyu and Xu, Changsheng}, title = {Evidential Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2814-2824} }
Straighten Viscous Rectified Flow via Noise Optimization: Jimin Dai,

Jiexi Yan,

Jian Yang,

Lei Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2025_ICCV, author = {Dai, Jimin and Yan, Jiexi and Yang, Jian and Luo, Lei}, title = {Straighten Viscous Rectified Flow via Noise Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15005-15014} }
A Framework for Double-Blind Federated Adaptation of Foundation Models: Nurbek Tastan,

Karthik Nandakumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tastan_2025_ICCV, author = {Tastan, Nurbek and Nandakumar, Karthik}, title = {A Framework for Double-Blind Federated Adaptation of Foundation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {923-933} }
Omni-scene Perception-oriented Point Cloud Geometry Enhancement for Coordinate Quantization: Wang Liu,

Wei Gao; [pdf]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Wang and Gao, Wei}, title = {Omni-scene Perception-oriented Point Cloud Geometry Enhancement for Coordinate Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26055-26064} }
B-VLLM: A Vision Large Language Model with Balanced Spatio-Temporal Tokens: Zhuqiang Lu,

Zhenfei Yin,

Mengwei He,

Zhihui Wang,

Zicheng Liu,

Zhiyong Wang,

Kun Hu; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Zhuqiang and Yin, Zhenfei and He, Mengwei and Wang, Zhihui and Liu, Zicheng and Wang, Zhiyong and Hu, Kun}, title = {B-VLLM: A Vision Large Language Model with Balanced Spatio-Temporal Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24549-24558} }
ImHead: A Large-scale Implicit Morphable Model for Localized Head Modeling: Rolandos Alexandros Potamias,

Stathis Galanakis,

Jiankang Deng,

Athanasios Papaioannou,

Stefanos Zafeiriou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Potamias_2025_ICCV, author = {Potamias, Rolandos Alexandros and Galanakis, Stathis and Deng, Jiankang and Papaioannou, Athanasios and Zafeiriou, Stefanos}, title = {ImHead: A Large-scale Implicit Morphable Model for Localized Head Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10196-10206} }
SL2A-INR: Single-Layer Learnable Activation for Implicit Neural Representation: Reza Rezaeian,

Moein Heidari,

Reza Azad,

Dorit Merhof,

Hamid Soltanian-Zadeh,

Ilker Hacihaliloglu; [pdf] [supp]
[bibtex]
@InProceedings{Rezaeian_2025_ICCV, author = {Rezaeian, Reza and Heidari, Moein and Azad, Reza and Merhof, Dorit and Soltanian-Zadeh, Hamid and Hacihaliloglu, Ilker}, title = {SL2A-INR: Single-Layer Learnable Activation for Implicit Neural Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26065-26074} }
Authentic 4D Driving Simulation with a Video Generation Model: Lening Wang,

Wenzhao Zheng,

Dalong Du,

Yunpeng Zhang,

Yilong Ren,

Han Jiang,

Zhiyong Cui,

Haiyang Yu,

Jie Zhou,

Shanghang Zhang; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Lening and Zheng, Wenzhao and Du, Dalong and Zhang, Yunpeng and Ren, Yilong and Jiang, Han and Cui, Zhiyong and Yu, Haiyang and Zhou, Jie and Zhang, Shanghang}, title = {Authentic 4D Driving Simulation with a Video Generation Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28892-28902} }
Partial Forward Blocking: A Novel Data Pruning Paradigm for Lossless Training Acceleration: Dongyue Wu,

Zilin Guo,

Jialong Zuo,

Nong Sang,

Changxin Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Dongyue and Guo, Zilin and Zuo, Jialong and Sang, Nong and Gao, Changxin}, title = {Partial Forward Blocking: A Novel Data Pruning Paradigm for Lossless Training Acceleration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {319-328} }
FedPall: Prototype-based Adversarial and Collaborative Learning for Federated Learning with Feature Drift: Yong Zhang,

Feng Liang,

Guanghu Yuan,

Min Yang,

Chengming Li,

Xiping Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yong and Liang, Feng and Yuan, Guanghu and Yang, Min and Li, Chengming and Hu, Xiping}, title = {FedPall: Prototype-based Adversarial and Collaborative Learning for Federated Learning with Feature Drift}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3111-3120} }
From Easy to Hard: Progressive Active Learning Framework for Infrared Small Target Detection with Single Point Supervision: Chuang Yu,

Jinmiao Zhao,

Yunpeng Liu,

Sicheng Zhao,

Yimian Dai,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Chuang and Zhao, Jinmiao and Liu, Yunpeng and Zhao, Sicheng and Dai, Yimian and Yue, Xiangyu}, title = {From Easy to Hard: Progressive Active Learning Framework for Infrared Small Target Detection with Single Point Supervision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2588-2598} }
Asynchronous Event Error-Minimizing Noise for Safeguarding Event Dataset: Ruofei Wang,

Peiqi Duan,

Boxin Shi,

Renjie Wan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ruofei and Duan, Peiqi and Shi, Boxin and Wan, Renjie}, title = {Asynchronous Event Error-Minimizing Noise for Safeguarding Event Dataset}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10141-10150} }
A Constrained Optimization Approach for Gaussian Splatting from Coarsely-posed Images and Noisy Lidar Point Clouds: Jizong Peng,

Tze Ho Elden Tse,

Kai Xu,

Wenchao Gao,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_ICCV, author = {Peng, Jizong and Tse, Tze Ho Elden and Xu, Kai and Gao, Wenchao and Yao, Angela}, title = {A Constrained Optimization Approach for Gaussian Splatting from Coarsely-posed Images and Noisy Lidar Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2961-2970} }
BabyVLM: Data-Efficient Pretraining of VLMs Inspired by Infant Learning: Shengao Wang,

Arjun Chandra,

Aoming Liu,

Venkatesh Saligrama,

Boqing Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Shengao and Chandra, Arjun and Liu, Aoming and Saligrama, Venkatesh and Gong, Boqing}, title = {BabyVLM: Data-Efficient Pretraining of VLMs Inspired by Infant Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1380-1390} }
Efficient Concertormer for Image Deblurring and Beyond: Pin-Hung Kuo,

Jinshan Pan,

Shao-Yi Chien,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kuo_2025_ICCV, author = {Kuo, Pin-Hung and Pan, Jinshan and Chien, Shao-Yi and Yang, Ming-Hsuan}, title = {Efficient Concertormer for Image Deblurring and Beyond}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14665-14675} }
Curve-Aware Gaussian Splatting for 3D Parametric Curve Reconstruction: Zhirui Gao,

Renjiao Yi,

Yaqiao Dai,

Xuening Zhu,

Wei Chen,

Chenyang Zhu,

Kai Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Zhirui and Yi, Renjiao and Dai, Yaqiao and Zhu, Xuening and Chen, Wei and Zhu, Chenyang and Xu, Kai}, title = {Curve-Aware Gaussian Splatting for 3D Parametric Curve Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27531-27541} }
Sim-DETR: Unlock DETR for Temporal Sentence Grounding: Jiajin Tang,

Zhengxuan Wei,

Yuchen Zhu,

Cheng Shi,

Guanbin Li,

Liang Lin,

Sibei Yang; [pdf]
[bibtex]
@InProceedings{Tang_2025_ICCV, author = {Tang, Jiajin and Wei, Zhengxuan and Zhu, Yuchen and Shi, Cheng and Li, Guanbin and Lin, Liang and Yang, Sibei}, title = {Sim-DETR: Unlock DETR for Temporal Sentence Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22760-22771} }
DynamicID: Zero-Shot Multi-ID Image Personalization with Flexible Facial Editability: Xirui Hu,

Jiahao Wang,

Hao Chen,

Weizhan Zhang,

Benqi Wang,

Yikun Li,

Haishun Nan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Xirui and Wang, Jiahao and Chen, Hao and Zhang, Weizhan and Wang, Benqi and Li, Yikun and Nan, Haishun}, title = {DynamicID: Zero-Shot Multi-ID Image Personalization with Flexible Facial Editability}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10549-10559} }
Wavelet Policy: Lifting Scheme for Policy Learning in Long-Horizon Tasks: Hao Huang,

Shuaihang Yuan,

Geeta Chandra Raju Bethala,

Congcong Wen,

Anthony Tzes,

Yi Fang; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Hao and Yuan, Shuaihang and Bethala, Geeta Chandra Raju and Wen, Congcong and Tzes, Anthony and Fang, Yi}, title = {Wavelet Policy: Lifting Scheme for Policy Learning in Long-Horizon Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12349-12359} }
METEOR: Multi-Encoder Collaborative Token Pruning for Efficient Vision Language Models: Yuchen Liu,

Yaoming Wang,

Bowen Shi,

Xiaopeng Zhang,

Wenrui Dai,

Chenglin Li,

Hongkai Xiong,

Qi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yuchen and Wang, Yaoming and Shi, Bowen and Zhang, Xiaopeng and Dai, Wenrui and Li, Chenglin and Xiong, Hongkai and Tian, Qi}, title = {METEOR: Multi-Encoder Collaborative Token Pruning for Efficient Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21492-21504} }
Contact-Aware Amodal Completion for Human-Object Interaction via Multi-Regional Inpainting: Seunggeun Chi,

Enna Sachdeva,

Pin-Hao Huang,

Kwonjoon Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chi_2025_ICCV, author = {Chi, Seunggeun and Sachdeva, Enna and Huang, Pin-Hao and Lee, Kwonjoon}, title = {Contact-Aware Amodal Completion for Human-Object Interaction via Multi-Regional Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9487-9496} }
SeqGrowGraph: Learning Lane Topology as a Chain of Graph Expansions: Mengwei Xie,

Shuang Zeng,

Xinyuan Chang,

Xinran Liu,

Zheng Pan,

Mu Xu,

Xing Wei; [pdf] [arXiv]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Mengwei and Zeng, Shuang and Chang, Xinyuan and Liu, Xinran and Pan, Zheng and Xu, Mu and Wei, Xing}, title = {SeqGrowGraph: Learning Lane Topology as a Chain of Graph Expansions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27166-27175} }
Ultra-Precision 6DoF Pose Estimation Using 2-D Interpolated Discrete Fourier Transform: Guowei Shi,

Zian Mao,

Peisen Huang; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Guowei and Mao, Zian and Huang, Peisen}, title = {Ultra-Precision 6DoF Pose Estimation Using 2-D Interpolated Discrete Fourier Transform}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5802-5810} }
Neural Compression for 3D Geometry Sets: Siyu Ren,

Junhui Hou,

Weiyao Lin,

Wenping Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Siyu and Hou, Junhui and Lin, Weiyao and Wang, Wenping}, title = {Neural Compression for 3D Geometry Sets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25294-25304} }
Lifting the Structural Morphing for Wide-Angle Images Rectification: Unified Content and Boundary Modeling: Wenting Luan,

Siqi Lu,

Yongbin Zheng,

Wanying Xu,

Lang Nie,

Zongtan Zhou,

Kang Liao; [pdf] [supp]
[bibtex]
@InProceedings{Luan_2025_ICCV, author = {Luan, Wenting and Lu, Siqi and Zheng, Yongbin and Xu, Wanying and Nie, Lang and Zhou, Zongtan and Liao, Kang}, title = {Lifting the Structural Morphing for Wide-Angle Images Rectification: Unified Content and Boundary Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25529-25538} }
ASGS: Single-Domain Generalizable Open-Set Object Detection via Adaptive Subgraph Searching: Yuxuan Yuan,

Luyao Tang,

Yixin Chen,

Chaoqi Chen,

Yue Huang,

Xinghao Ding; [pdf] [supp]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Yuxuan and Tang, Luyao and Chen, Yixin and Chen, Chaoqi and Huang, Yue and Ding, Xinghao}, title = {ASGS: Single-Domain Generalizable Open-Set Object Detection via Adaptive Subgraph Searching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20911-20921} }
Dual-Temporal Exemplar Representation Network for Video Semantic Segmentation: Xiaolong Xu,

Lei Zhang,

Jiayi Li,

Lituan Wang,

Yifan Guan,

Yu Yan,

Leyi Zhang,

Hao Song; [pdf]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Xiaolong and Zhang, Lei and Li, Jiayi and Wang, Lituan and Guan, Yifan and Yan, Yu and Zhang, Leyi and Song, Hao}, title = {Dual-Temporal Exemplar Representation Network for Video Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10775-10785} }
UniConvNet: Expanding Effective Receptive Field while Maintaining Asymptotically Gaussian Distribution for ConvNets of Any Scale: Yuhao Wang,

Wei Xi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuhao and Xi, Wei}, title = {UniConvNet: Expanding Effective Receptive Field while Maintaining Asymptotically Gaussian Distribution for ConvNets of Any Scale}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20922-20933} }
Denoising Token Prediction in Masked Autoregressive Models: Ting Yao,

Yehao Li,

Yingwei Pan,

Zhaofan Qiu,

Tao Mei; [pdf]
[bibtex]
@InProceedings{Yao_2025_ICCV, author = {Yao, Ting and Li, Yehao and Pan, Yingwei and Qiu, Zhaofan and Mei, Tao}, title = {Denoising Token Prediction in Masked Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18024-18033} }
Uncertainty-Aware Gradient Stabilization for Small Object Detection: Huixin Sun,

Yanjing Li,

Linlin Yang,

Xianbin Cao,

Baochang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Huixin and Li, Yanjing and Yang, Linlin and Cao, Xianbin and Zhang, Baochang}, title = {Uncertainty-Aware Gradient Stabilization for Small Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8407-8417} }
Radiant Foam: Real-Time Differentiable Ray Tracing: Shrisudhan Govindarajan,

Daniel Rebain,

Kwang Moo Yi,

Andrea Tagliasacchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Govindarajan_2025_ICCV, author = {Govindarajan, Shrisudhan and Rebain, Daniel and Yi, Kwang Moo and Tagliasacchi, Andrea}, title = {Radiant Foam: Real-Time Differentiable Ray Tracing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4135-4145} }
Attention to the Burstiness in Visual Prompt Tuning!: Yuzhu Wang,

Manni Duan,

Shu Kong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuzhu and Duan, Manni and Kong, Shu}, title = {Attention to the Burstiness in Visual Prompt Tuning!}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4253-4263} }
BadVideo: Stealthy Backdoor Attack against Text-to-Video Generation: Ruotong Wang,

Mingli Zhu,

Jiarong Ou,

Rui Chen,

Xin Tao,

Pengfei Wan,

Baoyuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ruotong and Zhu, Mingli and Ou, Jiarong and Chen, Rui and Tao, Xin and Wan, Pengfei and Wu, Baoyuan}, title = {BadVideo: Stealthy Backdoor Attack against Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19075-19084} }
Cassic: Towards Content-Adaptive State-Space Models for Learned Image Compression: Shiyu Qin,

Jinpeng Wang,

Yimin Zhou,

Bin Chen,

Tianci Luo,

Baoyi An,

Tao Dai,

Shu-Tao Xia,

Yaowei Wang; [pdf]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Shiyu and Wang, Jinpeng and Zhou, Yimin and Chen, Bin and Luo, Tianci and An, Baoyi and Dai, Tao and Xia, Shu-Tao and Wang, Yaowei}, title = {Cassic: Towards Content-Adaptive State-Space Models for Learned Image Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15727-15736} }
AIGI-Holmes: Towards Explainable and Generalizable AI-Generated Image Detection via Multimodal Large Language Models: Ziyin Zhou,

Yunpeng Luo,

Yuanchen Wu,

Ke Sun,

Jiayi Ji,

Ke Yan,

Shouhong Ding,

Xiaoshuai Sun,

Yunsheng Wu,

Rongrong Ji; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Ziyin and Luo, Yunpeng and Wu, Yuanchen and Sun, Ke and Ji, Jiayi and Yan, Ke and Ding, Shouhong and Sun, Xiaoshuai and Wu, Yunsheng and Ji, Rongrong}, title = {AIGI-Holmes: Towards Explainable and Generalizable AI-Generated Image Detection via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18746-18758} }
TokensGen: Harnessing Condensed Tokens for Long Video Generation: Wenqi Ouyang,

Zeqi Xiao,

Danni Yang,

Yifan Zhou,

Shuai Yang,

Lei Yang,

Jianlou Si,

Xingang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ouyang_2025_ICCV, author = {Ouyang, Wenqi and Xiao, Zeqi and Yang, Danni and Zhou, Yifan and Yang, Shuai and Yang, Lei and Si, Jianlou and Pan, Xingang}, title = {TokensGen: Harnessing Condensed Tokens for Long Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18197-18206} }
Cross-View Isolated Sign Language Recognition via View Synthesis and Feature Disentanglement: Xin Shen,

Xinyu Wang,

Lei Shen,

Kaihao Zhang,

Xin Yu; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_ICCV, author = {Shen, Xin and Wang, Xinyu and Shen, Lei and Zhang, Kaihao and Yu, Xin}, title = {Cross-View Isolated Sign Language Recognition via View Synthesis and Feature Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20647-20657} }
R1-Onevision: Advancing Generalized Multimodal Reasoning through Cross-Modal Formalization: Yi Yang,

Xiaoxuan He,

Hongkun Pan,

Xiyan Jiang,

Yan Deng,

Xingtao Yang,

Haoyu Lu,

Dacheng Yin,

Fengyun Rao,

Minfeng Zhu,

Bo Zhang,

Wei Chen; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Yi and He, Xiaoxuan and Pan, Hongkun and Jiang, Xiyan and Deng, Yan and Yang, Xingtao and Lu, Haoyu and Yin, Dacheng and Rao, Fengyun and Zhu, Minfeng and Zhang, Bo and Chen, Wei}, title = {R1-Onevision: Advancing Generalized Multimodal Reasoning through Cross-Modal Formalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2376-2385} }
HIS-GPT: Towards 3D Human-In-Scene Multimodal Understanding: Jiahe Zhao,

Ruibing Hou,

Zejie Tian,

Hong Chang,

Shiguang Shan; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Jiahe and Hou, Ruibing and Tian, Zejie and Chang, Hong and Shan, Shiguang}, title = {HIS-GPT: Towards 3D Human-In-Scene Multimodal Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4317-4327} }
Learnable Logit Adjustment for Imbalanced Semi-Supervised Learning under Class Distribution Mismatch: Hyuck Lee,

Taemin Park,

Heeyoung Kim; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Hyuck and Park, Taemin and Kim, Heeyoung}, title = {Learnable Logit Adjustment for Imbalanced Semi-Supervised Learning under Class Distribution Mismatch}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2664-2674} }
Subjective Camera 1.0: Bridging Human Cognition and Visual Reconstruction through Sequence-Aware Sketch-Guided Diffusion: Haoyang Chen,

Dongfang Sun,

Caoyuan Ma,

Shiqin Wang,

Kewei Zhang,

Zheng Wang,

Zhixiang Wang; [pdf]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Haoyang and Sun, Dongfang and Ma, Caoyuan and Wang, Shiqin and Zhang, Kewei and Wang, Zheng and Wang, Zhixiang}, title = {Subjective Camera 1.0: Bridging Human Cognition and Visual Reconstruction through Sequence-Aware Sketch-Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17838-17847} }
Learning Robust Image Watermarking with Lossless Cover Recovery: Jiale Chen,

Wei Wang,

Chongyang Shi,

Li Dong,

Xiping Hu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jiale and Wang, Wei and Shi, Chongyang and Dong, Li and Hu, Xiping}, title = {Learning Robust Image Watermarking with Lossless Cover Recovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15056-15065} }
Enhancing Transformers Through Conditioned Embedded Tokens: Hemanth Saratchandran,

Simon Lucey; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saratchandran_2025_ICCV, author = {Saratchandran, Hemanth and Lucey, Simon}, title = {Enhancing Transformers Through Conditioned Embedded Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4786-4795} }
Drawing Developmental Trajectory from Cortical Surface Reconstruction: Wenxuan Wu,

Ruowen Qu,

Zhongliang Liu,

Zhuoyan Dai,

Dongzi Shi,

Sijin Yu,

Tong Xiong,

Shiping Liu,

Xiangmin Xu,

Xiaofen Xing,

Xin Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Wenxuan and Qu, Ruowen and Liu, Zhongliang and Dai, Zhuoyan and Shi, Dongzi and Yu, Sijin and Xiong, Tong and Liu, Shiping and Xu, Xiangmin and Xing, Xiaofen and Zhang, Xin}, title = {Drawing Developmental Trajectory from Cortical Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11026-11035} }
CryoFastAR: Fast Cryo-EM Ab initio Reconstruction Made Easy: Jiakai Zhang,

Shouchen Zhou,

Haizhao Dai,

Xinhang Liu,

Peihao Wang,

Zhiwen Fan,

Yuan Pei,

Jingyi Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jiakai and Zhou, Shouchen and Dai, Haizhao and Liu, Xinhang and Wang, Peihao and Fan, Zhiwen and Pei, Yuan and Yu, Jingyi}, title = {CryoFastAR: Fast Cryo-EM Ab initio Reconstruction Made Easy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8462-8471} }
3DGS-LM: Faster Gaussian-Splatting Optimization with Levenberg-Marquardt: Lukas Höllein,

Aljaž Božič,

Michael Zollhöfer,

Matthias Nießner; [pdf] [supp]
[bibtex]
@InProceedings{Hollein_2025_ICCV, author = {H\"ollein, Lukas and Bo\v{z}i\v{c}, Alja\v{z} and Zollh\"ofer, Michael and Nie{\ss}ner, Matthias}, title = {3DGS-LM: Faster Gaussian-Splatting Optimization with Levenberg-Marquardt}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26740-26750} }
GauUpdate: New Object Insertion in 3D Gaussian Fields with Consistent Global Illumination: Chengwei Ren,

Fan Zhang,

Liangchao Xu,

Liang Pan,

Ziwei Liu,

Wenping Wang,

Xiao-Ping Zhang,

Yuan Liu; [pdf] [supp]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Chengwei and Zhang, Fan and Xu, Liangchao and Pan, Liang and Liu, Ziwei and Wang, Wenping and Zhang, Xiao-Ping and Liu, Yuan}, title = {GauUpdate: New Object Insertion in 3D Gaussian Fields with Consistent Global Illumination}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28653-28663} }
OphCLIP: Hierarchical Retrieval-Augmented Learning for Ophthalmic Surgical Video-Language Pretraining: Ming Hu,

Kun Yuan,

Yaling Shen,

Feilong Tang,

Xiaohao Xu,

Lin Zhou,

Wei Li,

Ying Chen,

Zhongxing Xu,

Zelin Peng,

Siyuan Yan,

Vinkle Srivastav,

Diping Song,

Tianbin Li,

Danli Shi,

Jin Ye,

Nicolas Padoy,

Nassir Navab,

Junjun He,

Zongyuan Ge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Ming and Yuan, Kun and Shen, Yaling and Tang, Feilong and Xu, Xiaohao and Zhou, Lin and Li, Wei and Chen, Ying and Xu, Zhongxing and Peng, Zelin and Yan, Siyuan and Srivastav, Vinkle and Song, Diping and Li, Tianbin and Shi, Danli and Ye, Jin and Padoy, Nicolas and Navab, Nassir and He, Junjun and Ge, Zongyuan}, title = {OphCLIP: Hierarchical Retrieval-Augmented Learning for Ophthalmic Surgical Video-Language Pretraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19838-19849} }
Hints of Prompt: Enhancing Visual Representation for Multimodal LLMs in Autonomous Driving: Hao Zhou,

Zhanning Gao,

Zhili Chen,

Maosheng Ye,

Qifeng Chen,

Tongyi Cao,

Honggang Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Hao and Gao, Zhanning and Chen, Zhili and Ye, Maosheng and Chen, Qifeng and Cao, Tongyi and Qi, Honggang}, title = {Hints of Prompt: Enhancing Visual Representation for Multimodal LLMs in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6165-6175} }
IM360: Large-scale Indoor Mapping with 360 Cameras: Dongki Jung,

Jaehoon Choi,

Yonghan Lee,

Dinesh Manocha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_ICCV, author = {Jung, Dongki and Choi, Jaehoon and Lee, Yonghan and Manocha, Dinesh}, title = {IM360: Large-scale Indoor Mapping with 360 Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29040-29050} }
RegGS: Unposed Sparse Views Gaussian Splatting with 3DGS Registration: Chong Cheng,

Yu Hu,

Sicheng Yu,

Beizhen Zhao,

Zijian Wang,

Hao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Chong and Hu, Yu and Yu, Sicheng and Zhao, Beizhen and Wang, Zijian and Wang, Hao}, title = {RegGS: Unposed Sparse Views Gaussian Splatting with 3DGS Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8100-8109} }
SU-RGS: Relightable 3D Gaussian Splatting from Sparse Views under Unconstrained Illuminations: Qi Zhang,

Chi Huang,

Qian Zhang,

Nan Li,

Wei Feng; [pdf]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Qi and Huang, Chi and Zhang, Qian and Li, Nan and Feng, Wei}, title = {SU-RGS: Relightable 3D Gaussian Splatting from Sparse Views under Unconstrained Illuminations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26859-26868} }
Representation Shift: Unifying Token Compression with FlashAttention: Joonmyung Choi,

Sanghyeok Lee,

Byungoh Ko,

Eunseo Kim,

Jihyung Kil,

Hyunwoo J. Kim; [pdf] [arXiv]
[bibtex]
@InProceedings{Choi_2025_ICCV, author = {Choi, Joonmyung and Lee, Sanghyeok and Ko, Byungoh and Kim, Eunseo and Kil, Jihyung and Kim, Hyunwoo J.}, title = {Representation Shift: Unifying Token Compression with FlashAttention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20456-20466} }
DGTalker: Disentangled Generative Latent Space Learning for Audio-Driven Gaussian Talking Heads: Xiaoxi Liang,

Yanbo Fan,

Qiya Yang,

Xuan Wang,

Wei Gao,

Ge Li; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Xiaoxi and Fan, Yanbo and Yang, Qiya and Wang, Xuan and Gao, Wei and Li, Ge}, title = {DGTalker: Disentangled Generative Latent Space Learning for Audio-Driven Gaussian Talking Heads}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11079-11088} }
RALoc: Enhancing Outdoor LiDAR Localization via Rotation Awareness: Yuyang Yang,

Wen Li,

Sheng Ao,

Qingshan Xu,

Shangshu Yu,

Yu Guo,

Yin Zhou,

Siqi Shen,

Cheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Yuyang and Li, Wen and Ao, Sheng and Xu, Qingshan and Yu, Shangshu and Guo, Yu and Zhou, Yin and Shen, Siqi and Wang, Cheng}, title = {RALoc: Enhancing Outdoor LiDAR Localization via Rotation Awareness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3304-3313} }
EDiT: Efficient Diffusion Transformers with Linear Compressed Attention: Philipp Becker,

Abhinav Mehrotra,

Ruchika Chavhan,

Malcolm Chadwick,

Luca Morreale,

Mehdi Noroozi,

Alberto Gil C. P. Ramos,

Sourav Bhattacharya; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Becker_2025_ICCV, author = {Becker, Philipp and Mehrotra, Abhinav and Chavhan, Ruchika and Chadwick, Malcolm and Morreale, Luca and Noroozi, Mehdi and Gil C. P. Ramos, Alberto and Bhattacharya, Sourav}, title = {EDiT: Efficient Diffusion Transformers with Linear Compressed Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19608-19616} }
GenHancer: Imperfect Generative Models are Secretly Strong Vision-Centric Enhancers: Shijie Ma,

Yuying Ge,

Teng Wang,

Yuxin Guo,

Yixiao Ge,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Shijie and Ge, Yuying and Wang, Teng and Guo, Yuxin and Ge, Yixiao and Shan, Ying}, title = {GenHancer: Imperfect Generative Models are Secretly Strong Vision-Centric Enhancers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24402-24412} }
LLaVA-3D: A Simple yet Effective Pathway to Empowering LMMs with 3D Capabilities: Chenming Zhu,

Tai Wang,

Wenwei Zhang,

Jiangmiao Pang,

Xihui Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Chenming and Wang, Tai and Zhang, Wenwei and Pang, Jiangmiao and Liu, Xihui}, title = {LLaVA-3D: A Simple yet Effective Pathway to Empowering LMMs with 3D Capabilities}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4295-4305} }
Understanding Museum Exhibits using Vision-Language Reasoning: Ada-Astrid Balauca,

Sanjana Garai,

Stefan Balauca,

Rasesh Udayakumar Shetty,

Naitik Agrawal,

Dhwanil Subhashbhai Shah,

Yuqian Fu,

Xi Wang,

Kristina Toutanova,

Danda Pani Paudel,

Luc Van Gool; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Balauca_2025_ICCV, author = {Balauca, Ada-Astrid and Garai, Sanjana and Balauca, Stefan and Shetty, Rasesh Udayakumar and Agrawal, Naitik and Shah, Dhwanil Subhashbhai and Fu, Yuqian and Wang, Xi and Toutanova, Kristina and Paudel, Danda Pani and Van Gool, Luc}, title = {Understanding Museum Exhibits using Vision-Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2227-2238} }
MINERVA: Evaluating Complex Video Reasoning: Arsha Nagrani,

Sachit Menon,

Ahmet Iscen,

Shyamal Buch,

Ramin Mehran,

Nilpa Jha,

Anja Hauth,

Yukun Zhu,

Carl Vondrick,

Mikhail Sirotenko,

Cordelia Schmid,

Tobias Weyand; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nagrani_2025_ICCV, author = {Nagrani, Arsha and Menon, Sachit and Iscen, Ahmet and Buch, Shyamal and Mehran, Ramin and Jha, Nilpa and Hauth, Anja and Zhu, Yukun and Vondrick, Carl and Sirotenko, Mikhail and Schmid, Cordelia and Weyand, Tobias}, title = {MINERVA: Evaluating Complex Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23968-23978} }
Active Membership Inference Test (aMINT): Enhancing Model Auditability with Multi-Task Learning.: Daniel DeAlcala,

Aythami Morales,

Julian Fierrez,

Gonzalo Mancera,

Ruben Tolosana,

Javier Ortega-Garcia; [pdf] [arXiv]
[bibtex]
@InProceedings{DeAlcala_2025_ICCV, author = {DeAlcala, Daniel and Morales, Aythami and Fierrez, Julian and Mancera, Gonzalo and Tolosana, Ruben and Ortega-Garcia, Javier}, title = {Active Membership Inference Test (aMINT): Enhancing Model Auditability with Multi-Task Learning.}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {647-656} }
One Perturbation is Enough: On Generating Universal Adversarial Perturbations against Vision-Language Pre-training Models: Hao Fang,

Jiawei Kong,

Wenbo Yu,

Bin Chen,

Jiawei Li,

Hao Wu,

Shu-Tao Xia,

Ke Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Hao and Kong, Jiawei and Yu, Wenbo and Chen, Bin and Li, Jiawei and Wu, Hao and Xia, Shu-Tao and Xu, Ke}, title = {One Perturbation is Enough: On Generating Universal Adversarial Perturbations against Vision-Language Pre-training Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4090-4100} }
When Lighting Deceives: Exposing Vision-Language Models' Illumination Vulnerability Through Illumination Transformation Attack: Hanqing Liu,

Shouwei Ruan,

Yao Huang,

Shiji Zhao,

Xingxing Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Hanqing and Ruan, Shouwei and Huang, Yao and Zhao, Shiji and Wei, Xingxing}, title = {When Lighting Deceives: Exposing Vision-Language Models' Illumination Vulnerability Through Illumination Transformation Attack}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10485-10495} }
Exploring View Consistency for Scene-Adaptive Low-Light Light Field Image Enhancement: Shuo Zhang,

Chen Gao,

Youfang Lin; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shuo and Gao, Chen and Lin, Youfang}, title = {Exploring View Consistency for Scene-Adaptive Low-Light Light Field Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7340-7349} }
DIA: The Adversarial Exposure of Deterministic Inversion in Diffusion Models: Seunghoo Hong,

Geonho Son,

Juhun Lee,

Simon S. Woo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_ICCV, author = {Hong, Seunghoo and Son, Geonho and Lee, Juhun and Woo, Simon S.}, title = {DIA: The Adversarial Exposure of Deterministic Inversion in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17994-18003} }
Rectifying Magnitude Neglect in Linear Attention: Qihang Fan,

Huaibo Huang,

Yuang Ai,

Ran He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Qihang and Huang, Huaibo and Ai, Yuang and He, Ran}, title = {Rectifying Magnitude Neglect in Linear Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21505-21514} }
GEMeX: A Large-Scale, Groundable, and Explainable Medical VQA Benchmark for Chest X-ray Diagnosis: Bo Liu,

Ke Zou,

Li-Ming Zhan,

Zexin Lu,

Xiaoyu Dong,

Yidi Chen,

Chengqiang Xie,

Jiannong Cao,

Xiao-Ming Wu,

Huazhu Fu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Bo and Zou, Ke and Zhan, Li-Ming and Lu, Zexin and Dong, Xiaoyu and Chen, Yidi and Xie, Chengqiang and Cao, Jiannong and Wu, Xiao-Ming and Fu, Huazhu}, title = {GEMeX: A Large-Scale, Groundable, and Explainable Medical VQA Benchmark for Chest X-ray Diagnosis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21310-21320} }
Learning to See Inside Opaque Liquid Containers using Speckle Vibrometry: Matan Kichler,

Shai Bagon,

Mark Sheinin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kichler_2025_ICCV, author = {Kichler, Matan and Bagon, Shai and Sheinin, Mark}, title = {Learning to See Inside Opaque Liquid Containers using Speckle Vibrometry}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9466-9476} }
HouseTour: A Virtual Real Estate A(I)gent: Ata Çelen,

Marc Pollefeys,

Daniel Barath,

Iro Armeni; [pdf] [supp]
[bibtex]
@InProceedings{Celen_2025_ICCV, author = {\c{C}elen, Ata and Pollefeys, Marc and Barath, Daniel and Armeni, Iro}, title = {HouseTour: A Virtual Real Estate A(I)gent}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17761-17771} }
Debiased Teacher for Day-to-Night Domain Adaptive Object Detection: Yiming Cui,

Liang Li,

Haibing Yin,

Yuhan Gao,

Yaoqi Sun,

Chenggang Yan; [pdf] [supp]
[bibtex]
@InProceedings{Cui_2025_ICCV, author = {Cui, Yiming and Li, Liang and Yin, Haibing and Gao, Yuhan and Sun, Yaoqi and Yan, Chenggang}, title = {Debiased Teacher for Day-to-Night Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2577-2587} }
Frequency-Aligned Knowledge Distillation for Lightweight Spatiotemporal Forecasting: Yuqi Li,

Chuanguang Yang,

Hansheng Zeng,

Zeyu Dong,

Zhulin An,

Yongjun Xu,

Yingli Tian,

Hao Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yuqi and Yang, Chuanguang and Zeng, Hansheng and Dong, Zeyu and An, Zhulin and Xu, Yongjun and Tian, Yingli and Wu, Hao}, title = {Frequency-Aligned Knowledge Distillation for Lightweight Spatiotemporal Forecasting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7262-7272} }
Colors See Colors Ignore: Clothes Changing ReID with Color Disentanglement: Priyank Pathak,

Yogesh S. Rawat; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pathak_2025_ICCV, author = {Pathak, Priyank and Rawat, Yogesh S.}, title = {Colors See Colors Ignore: Clothes Changing ReID with Color Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16797-16807} }
Image as an IMU: Estimating Camera Motion from a Single Motion-Blurred Image: Jerred Chen,

Ronald Clark; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jerred and Clark, Ronald}, title = {Image as an IMU: Estimating Camera Motion from a Single Motion-Blurred Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {90-99} }
AnyI2V: Animating Any Conditional Image with Motion Control: Ziye Li,

Hao Luo,

Xincheng Shuai,

Henghui Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Ziye and Luo, Hao and Shuai, Xincheng and Ding, Henghui}, title = {AnyI2V: Animating Any Conditional Image with Motion Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17302-17311} }
GENMO: A GENeralist Model for Human MOtion: Jiefeng Li,

Jinkun Cao,

Haotian Zhang,

Davis Rempe,

Jan Kautz,

Umar Iqbal,

Ye Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jiefeng and Cao, Jinkun and Zhang, Haotian and Rempe, Davis and Kautz, Jan and Iqbal, Umar and Yuan, Ye}, title = {GENMO: A GENeralist Model for Human MOtion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11766-11776} }
M-Net: MRI Brain Tumor Sequential Segmentation Network via Mesh-Cast: Jiacheng Lu,

Hui Ding,

Shiyu Zhang,

Guoping Huo; [pdf]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Jiacheng and Ding, Hui and Zhang, Shiyu and Huo, Guoping}, title = {M-Net: MRI Brain Tumor Sequential Segmentation Network via Mesh-Cast}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20116-20125} }
Weakly Supervised Visible-Infrared Person Re-Identification via Heterogeneous Expert Collaborative Consistency Learning: Yafei Zhang,

Lingqi Kong,

Huafeng Li,

Jie Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yafei and Kong, Lingqi and Li, Huafeng and Wen, Jie}, title = {Weakly Supervised Visible-Infrared Person Re-Identification via Heterogeneous Expert Collaborative Consistency Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12659-12669} }
Prompt-A-Video: Prompt Your Video Diffusion Model via Preference-Aligned LLM: Yatai Ji,

Jiacheng Zhang,

Jie Wu,

Shilong Zhang,

Shoufa Chen,

Chongjian Ge,

Peize Sun,

Weifeng Chen,

Wenqi Shao,

Xuefeng Xiao,

Weilin Huang,

Ping Luo; [pdf] [supp]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Yatai and Zhang, Jiacheng and Wu, Jie and Zhang, Shilong and Chen, Shoufa and Ge, Chongjian and Sun, Peize and Chen, Weifeng and Shao, Wenqi and Xiao, Xuefeng and Huang, Weilin and Luo, Ping}, title = {Prompt-A-Video: Prompt Your Video Diffusion Model via Preference-Aligned LLM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18725-18735} }
Diffusion Image Prior: Hamadi Chihaoui,

Paolo Favaro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chihaoui_2025_ICCV, author = {Chihaoui, Hamadi and Favaro, Paolo}, title = {Diffusion Image Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24636-24644} }
Constructing Ophthalmic MLLM for Positioning-diagnosis Collaboration Through Clinical Cognitive Chain Reasoning: Xinyao Liu,

Diping Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Xinyao and Song, Diping}, title = {Constructing Ophthalmic MLLM for Positioning-diagnosis Collaboration Through Clinical Cognitive Chain Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21547-21556} }
MemDistill: Distilling LiDAR Knowledge into Memory for Camera-Only 3D Object Detection: Donghyeon Kwon,

Youngseok Yoon,

Hyeongseok Son,

Suha Kwak; [pdf]
[bibtex]
@InProceedings{Kwon_2025_ICCV, author = {Kwon, Donghyeon and Yoon, Youngseok and Son, Hyeongseok and Kwak, Suha}, title = {MemDistill: Distilling LiDAR Knowledge into Memory for Camera-Only 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6828-6838} }
OpenRSD: Towards Open-prompts for Object Detection in Remote Sensing Images: Ziyue Huang,

Yongchao Feng,

Ziqi Liu,

Shuai Yang,

Qingjie Liu,

Yunhong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Ziyue and Feng, Yongchao and Liu, Ziqi and Yang, Shuai and Liu, Qingjie and Wang, Yunhong}, title = {OpenRSD: Towards Open-prompts for Object Detection in Remote Sensing Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8384-8394} }
Privacy-centric Deep Motion Retargeting for Anonymization of Skeleton-Based Motion Visualization: Thomas Carr,

Depeng Xu,

Shuhan Yuan,

Aidong Lu; [pdf] [supp]
[bibtex]
@InProceedings{Carr_2025_ICCV, author = {Carr, Thomas and Xu, Depeng and Yuan, Shuhan and Lu, Aidong}, title = {Privacy-centric Deep Motion Retargeting for Anonymization of Skeleton-Based Motion Visualization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13162-13170} }
FlashDepth: Real-time Streaming Video Depth Estimation at 2K Resolution: Gene Chou,

Wenqi Xian,

Guandao Yang,

Mohamed Abdelfattah,

Bharath Hariharan,

Noah Snavely,

Ning Yu,

Paul Debevec; [pdf] [arXiv]
[bibtex]
@InProceedings{Chou_2025_ICCV, author = {Chou, Gene and Xian, Wenqi and Yang, Guandao and Abdelfattah, Mohamed and Hariharan, Bharath and Snavely, Noah and Yu, Ning and Debevec, Paul}, title = {FlashDepth: Real-time Streaming Video Depth Estimation at 2K Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9638-9648} }
Towards Performance Consistency in Multi-Level Model Collaboration: Qi Li,

Runpeng Yu,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Qi and Yu, Runpeng and Wang, Xinchao}, title = {Towards Performance Consistency in Multi-Level Model Collaboration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2567-2576} }
Polarimetric Neural Field via Unified Complex-Valued Wave Representation: Chu Zhou,

Yixin Yang,

Junda Liao,

Heng Guo,

Boxin Shi,

Imari Sato; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Chu and Yang, Yixin and Liao, Junda and Guo, Heng and Shi, Boxin and Sato, Imari}, title = {Polarimetric Neural Field via Unified Complex-Valued Wave Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25660-25669} }
CLIP-Adapted Region-to-Text Learning for Generative Open-Vocabulary Semantic Segmentation: Jiannan Ge,

Lingxi Xie,

Hongtao Xie,

Pandeng Li,

Sun-Ao Liu,

Xiaopeng Zhang,

Qi Tian,

Yongdong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Ge_2025_ICCV, author = {Ge, Jiannan and Xie, Lingxi and Xie, Hongtao and Li, Pandeng and Liu, Sun-Ao and Zhang, Xiaopeng and Tian, Qi and Zhang, Yongdong}, title = {CLIP-Adapted Region-to-Text Learning for Generative Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24034-24044} }
Where, What, Why: Towards Explainable Driver Attention Prediction: Yuchen Zhou,

Jiayu Tang,

Xiaoyan Xiao,

Yueyao Lin,

Linkai Liu,

Zipeng Guo,

Hao Fei,

Xiaobo Xia,

Chao Gou; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Yuchen and Tang, Jiayu and Xiao, Xiaoyan and Lin, Yueyao and Liu, Linkai and Guo, Zipeng and Fei, Hao and Xia, Xiaobo and Gou, Chao}, title = {Where, What, Why: Towards Explainable Driver Attention Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2675-2685} }
MeshAnything V2: Artist-Created Mesh Generation with Adjacent Mesh Tokenization: Yiwen Chen,

Yikai Wang,

Yihao Luo,

Zhengyi Wang,

Zilong Chen,

Jun Zhu,

Chi Zhang,

Guosheng Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yiwen and Wang, Yikai and Luo, Yihao and Wang, Zhengyi and Chen, Zilong and Zhu, Jun and Zhang, Chi and Lin, Guosheng}, title = {MeshAnything V2: Artist-Created Mesh Generation with Adjacent Mesh Tokenization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13922-13931} }
Visual Intention Grounding for Egocentric Assistants: Pengzhan Sun,

Junbin Xiao,

Tze Ho Elden Tse,

Yicong Li,

Arjun Akula,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Pengzhan and Xiao, Junbin and Tse, Tze Ho Elden and Li, Yicong and Akula, Arjun and Yao, Angela}, title = {Visual Intention Grounding for Egocentric Assistants}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2512-2522} }
Analyzing Finetuning Representation Shift for Multimodal LLMs Steering: Pegah Khayatan,

Mustafa Shukor,

Jayneel Parekh,

Arnaud Dapogny,

Matthieu Cord; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khayatan_2025_ICCV, author = {Khayatan, Pegah and Shukor, Mustafa and Parekh, Jayneel and Dapogny, Arnaud and Cord, Matthieu}, title = {Analyzing Finetuning Representation Shift for Multimodal LLMs Steering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2206-2216} }
TeethGenerator: A two-stage framework for paired pre- and post-orthodontic 3D dental data generation: Changsong Lei,

Yaqian Liang,

Shaofeng Wang,

Jiajia Dai,

Yong-Jin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Lei_2025_ICCV, author = {Lei, Changsong and Liang, Yaqian and Wang, Shaofeng and Dai, Jiajia and Liu, Yong-Jin}, title = {TeethGenerator: A two-stage framework for paired pre- and post-orthodontic 3D dental data generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25872-25881} }
Enhancing Zero-shot Object Counting via Text-guided Local Ranking and Number-evoked Global Attention: Shiwei Zhang,

Qi Zhou,

Wei Ke; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shiwei and Zhou, Qi and Ke, Wei}, title = {Enhancing Zero-shot Object Counting via Text-guided Local Ranking and Number-evoked Global Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21097-21106} }
Bidirectional Likelihood Estimation with Multi-Modal Large Language Models for Text-Video Retrieval: Dohwan Ko,

Ji Soo Lee,

Minhyuk Choi,

Zihang Meng,

Hyunwoo J. Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ko_2025_ICCV, author = {Ko, Dohwan and Lee, Ji Soo and Choi, Minhyuk and Meng, Zihang and Kim, Hyunwoo J.}, title = {Bidirectional Likelihood Estimation with Multi-Modal Large Language Models for Text-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22263-22273} }
TinyViM: Frequency Decoupling for Tiny Hybrid Vision Mamba: Xiaowen Ma,

Zhenliang Ni,

Xinghao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Xiaowen and Ni, Zhenliang and Chen, Xinghao}, title = {TinyViM: Frequency Decoupling for Tiny Hybrid Vision Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23519-23529} }
Harnessing Massive Satellite Imagery with Efficient Masked Image Modeling: Fengxiang Wang,

Hongzhen Wang,

Di Wang,

Zonghao Guo,

Zhenyu Zhong,

Long Lan,

Wenjing Yang,

Jing Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Fengxiang and Wang, Hongzhen and Wang, Di and Guo, Zonghao and Zhong, Zhenyu and Lan, Long and Yang, Wenjing and Zhang, Jing}, title = {Harnessing Massive Satellite Imagery with Efficient Masked Image Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6935-6947} }
Visual-Oriented Fine-Grained Knowledge Editing for MultiModal Large Language Models: Zhen Zeng,

Leijiang Gu,

Xun Yang,

Zhangling Duan,

Zenglin Shi,

Meng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Zhen and Gu, Leijiang and Yang, Xun and Duan, Zhangling and Shi, Zenglin and Wang, Meng}, title = {Visual-Oriented Fine-Grained Knowledge Editing for MultiModal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2491-2500} }
Bridging Domain Generalization to Multimodal Domain Generalization via Unified Representations: Hai Huang,

Yan Xia,

Sashuai Zhou,

Hanting Wang,

Shulei Wang,

Zhou Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Hai and Xia, Yan and Zhou, Sashuai and Wang, Hanting and Wang, Shulei and Zhao, Zhou}, title = {Bridging Domain Generalization to Multimodal Domain Generalization via Unified Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22488-22498} }
A Recipe for Generating 3D Worlds from a Single Image: Katja Schwarz,

Denis Rozumny,

Samuel Rota Bulò,

Lorenzo Porzi,

Peter Kontschieder; [pdf] [supp]
[bibtex]
@InProceedings{Schwarz_2025_ICCV, author = {Schwarz, Katja and Rozumny, Denis and Bul\`o, Samuel Rota and Porzi, Lorenzo and Kontschieder, Peter}, title = {A Recipe for Generating 3D Worlds from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3520-3530} }
MVGBench: a Comprehensive Benchmark for Multi-view Generation Models: Xianghui Xie,

Jan Eric Lessen,

Gerard Pons-Moll; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Xianghui and Lessen, Jan Eric and Pons-Moll, Gerard}, title = {MVGBench: a Comprehensive Benchmark for Multi-view Generation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8207-8218} }
Fewer Denoising Steps or Cheaper Per-Step Inference: Towards Compute-Optimal Diffusion Model Deployment: Zhenbang Du,

Yonggan Fu,

Lifu Wang,

Jiayi Qian,

Xiao Luo,

Yingyan Celine Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Zhenbang and Fu, Yonggan and Wang, Lifu and Qian, Jiayi and Luo, Xiao and Lin, Yingyan Celine}, title = {Fewer Denoising Steps or Cheaper Per-Step Inference: Towards Compute-Optimal Diffusion Model Deployment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3001-3010} }
Competitive Distillation: A Simple Learning Strategy for Improving Visual Classification: Daqian Shi,

Xiaolei Diao,

Xu Chen,

Cédric M John; [pdf] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Daqian and Diao, Xiaolei and Chen, Xu and John, C\'edric M}, title = {Competitive Distillation: A Simple Learning Strategy for Improving Visual Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2981-2990} }
DynFaceRestore: Balancing Fidelity and Quality in Diffusion-Guided Blind Face Restoration with Dynamic Blur-Level Mapping and Guidance: Huu-Phu Do,

Yu-Wei Chen,

Yi-Cheng Liao,

Chi-Wei Hsiao,

Han-Yang Wang,

Wei-Chen Chiu,

Ching-Chun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Do_2025_ICCV, author = {Do, Huu-Phu and Chen, Yu-Wei and Liao, Yi-Cheng and Hsiao, Chi-Wei and Wang, Han-Yang and Chiu, Wei-Chen and Huang, Ching-Chun}, title = {DynFaceRestore: Balancing Fidelity and Quality in Diffusion-Guided Blind Face Restoration with Dynamic Blur-Level Mapping and Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10432-10441} }
LoRAverse: A Submodular Framework to Retrieve Diverse Adapters for Diffusion Models: Mert Sonmezer,

Matthew Zheng,

Pinar Yanardag; [pdf] [supp]
[bibtex]
@InProceedings{Sonmezer_2025_ICCV, author = {Sonmezer, Mert and Zheng, Matthew and Yanardag, Pinar}, title = {LoRAverse: A Submodular Framework to Retrieve Diverse Adapters for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17879-17888} }
AllTracker: Efficient Dense Point Tracking at High Resolution: Adam W. Harley,

Yang You,

Xinglong Sun,

Yang Zheng,

Nikhil Raghuraman,

Yunqi Gu,

Sheldon Liang,

Wen-Hsuan Chu,

Achal Dave,

Suya You,

Rares Ambrus,

Katerina Fragkiadaki,

Leonidas Guibas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Harley_2025_ICCV, author = {Harley, Adam W. and You, Yang and Sun, Xinglong and Zheng, Yang and Raghuraman, Nikhil and Gu, Yunqi and Liang, Sheldon and Chu, Wen-Hsuan and Dave, Achal and You, Suya and Ambrus, Rares and Fragkiadaki, Katerina and Guibas, Leonidas}, title = {AllTracker: Efficient Dense Point Tracking at High Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5253-5262} }
Gaussian-based World Model: Gaussian Priors for Voxel-Based Occupancy Prediction and Future Motion Prediction: Tuo Feng,

Wenguan Wang,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Tuo and Wang, Wenguan and Yang, Yi}, title = {Gaussian-based World Model: Gaussian Priors for Voxel-Based Occupancy Prediction and Future Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25239-25249} }
CombatVLA: An Efficient Vision-Language-Action Model for Combat Tasks in 3D Action Role-Playing Games: Peng Chen,

Pi Bu,

Yingyao Wang,

Xinyi Wang,

Ziming Wang,

Jie Guo,

Yingxiu Zhao,

Qi Zhu,

Jun Song,

Siran Yang,

Jiamang Wang,

Bo Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Peng and Bu, Pi and Wang, Yingyao and Wang, Xinyi and Wang, Ziming and Guo, Jie and Zhao, Yingxiu and Zhu, Qi and Song, Jun and Yang, Siran and Wang, Jiamang and Zheng, Bo}, title = {CombatVLA: An Efficient Vision-Language-Action Model for Combat Tasks in 3D Action Role-Playing Games}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10919-10928} }
SketchSplat: 3D Edge Reconstruction via Differentiable Multi-view Sketch Splatting: Haiyang Ying,

Matthias Zwicker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ying_2025_ICCV, author = {Ying, Haiyang and Zwicker, Matthias}, title = {SketchSplat: 3D Edge Reconstruction via Differentiable Multi-view Sketch Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25649-25659} }
Social Debiasing for Fair Multi-modal LLMs: Harry Cheng,

Yangyang Guo,

Qingpei Guo,

Ming Yang,

Tian Gan,

Weili Guan,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Harry and Guo, Yangyang and Guo, Qingpei and Yang, Ming and Gan, Tian and Guan, Weili and Nie, Liqiang}, title = {Social Debiasing for Fair Multi-modal LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1740-1750} }
DOGR: Towards Versatile Visual Document Grounding and Referring: Yinan Zhou,

Yuxin Chen,

Haokun Lin,

Yichen Wu,

Shuyu Yang,

Zhongang Qi,

Chen Ma,

Li Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Yinan and Chen, Yuxin and Lin, Haokun and Wu, Yichen and Yang, Shuyu and Qi, Zhongang and Ma, Chen and Zhu, Li}, title = {DOGR: Towards Versatile Visual Document Grounding and Referring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3596-3606} }
Benchmarking and Learning Multi-Dimensional Quality Evaluator for Text-to-3D Generation: Yujie Zhang,

Bingyang Cui,

Qi Yang,

Zhu Li,

Yiling Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yujie and Cui, Bingyang and Yang, Qi and Li, Zhu and Xu, Yiling}, title = {Benchmarking and Learning Multi-Dimensional Quality Evaluator for Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18563-18574} }
Sat2City: 3D City Generation from A Single Satellite Image with Cascaded Latent Diffusion: Tongyan Hua,

Lutao Jiang,

Ying-Cong Chen,

Wufan Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hua_2025_ICCV, author = {Hua, Tongyan and Jiang, Lutao and Chen, Ying-Cong and Zhao, Wufan}, title = {Sat2City: 3D City Generation from A Single Satellite Image with Cascaded Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27978-27988} }
Hi3DGen: High-fidelity 3D Geometry Generation from Images via Normal Bridging: Chongjie Ye,

Yushuang Wu,

Ziteng Lu,

Jiahao Chang,

Xiaoyang Guo,

Jiaqing Zhou,

Hao Zhao,

Xiaoguang Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_ICCV, author = {Ye, Chongjie and Wu, Yushuang and Lu, Ziteng and Chang, Jiahao and Guo, Xiaoyang and Zhou, Jiaqing and Zhao, Hao and Han, Xiaoguang}, title = {Hi3DGen: High-fidelity 3D Geometry Generation from Images via Normal Bridging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25050-25061} }
SC-Captioner: Improving Image Captioning with Self-Correction by Reinforcement Learning: Lin Zhang,

Xianfang Zeng,

Kangcong Li,

Gang Yu,

Tao Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Lin and Zeng, Xianfang and Li, Kangcong and Yu, Gang and Chen, Tao}, title = {SC-Captioner: Improving Image Captioning with Self-Correction by Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23145-23155} }
DreamDance: Animating Human Images by Enriching 3D Geometry Cues from 2D Poses: Yatian Pang,

Bin Zhu,

Bin Lin,

Mingzhe Zheng,

Francis E. H. Tay,

Ser-Nam Lim,

Harry Yang,

Li Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2025_ICCV, author = {Pang, Yatian and Zhu, Bin and Lin, Bin and Zheng, Mingzhe and Tay, Francis E. H. and Lim, Ser-Nam and Yang, Harry and Yuan, Li}, title = {DreamDance: Animating Human Images by Enriching 3D Geometry Cues from 2D Poses}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14039-14050} }
MBTI: Masked Blending Transformers with Implicit Positional Encoding for Frame-rate Agnostic Motion Estimation: Jungwoo Huh,

Yeseung Park,

Seongjean Kim,

Jungsu Kim,

Sanghoon Lee; [pdf] [supp]
[bibtex]
@InProceedings{Huh_2025_ICCV, author = {Huh, Jungwoo and Park, Yeseung and Kim, Seongjean and Kim, Jungsu and Lee, Sanghoon}, title = {MBTI: Masked Blending Transformers with Implicit Positional Encoding for Frame-rate Agnostic Motion Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11568-11578} }
MixRI: Mixing Features of Reference Images for Novel Object Pose Estimation: Xinhang Liu,

Jiawei Shi,

Zheng Dang,

Yuchao Dai; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Xinhang and Shi, Jiawei and Dang, Zheng and Dai, Yuchao}, title = {MixRI: Mixing Features of Reference Images for Novel Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9024-9035} }
p-AVAS: Can Physics-Integrated Audio-Visual Modeling Boost Neural Acoustic Synthesis?: Susan Liang,

Chao Huang,

Yunlong Tang,

Zeliang Zhang,

Chenliang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Susan and Huang, Chao and Tang, Yunlong and Zhang, Zeliang and Xu, Chenliang}, title = {p-AVAS: Can Physics-Integrated Audio-Visual Modeling Boost Neural Acoustic Synthesis?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13942-13951} }
Learning Precise Affordances from Egocentric Videos for Robotic Manipulation: Gen Li,

Nikolaos Tsagkas,

Jifei Song,

Ruaridh Mon-Williams,

Sethu Vijayakumar,

Kun Shao,

Laura Sevilla-Lara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Gen and Tsagkas, Nikolaos and Song, Jifei and Mon-Williams, Ruaridh and Vijayakumar, Sethu and Shao, Kun and Sevilla-Lara, Laura}, title = {Learning Precise Affordances from Egocentric Videos for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10581-10591} }
VAFlow: Video-to-Audio Generation with Cross-Modality Flow Matching: Xihua Wang,

Xin Cheng,

Yuyue Wang,

Ruihua Song,

Yunfeng Wang; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xihua and Cheng, Xin and Wang, Yuyue and Song, Ruihua and Wang, Yunfeng}, title = {VAFlow: Video-to-Audio Generation with Cross-Modality Flow Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11777-11786} }
HDR Image Generation via Gain Map Decomposed Diffusion: Yuanshen Guan,

Ruikang Xu,

Yinuo Liao,

Mingde Yao,

Lizhi Wang,

Zhiwei Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Guan_2025_ICCV, author = {Guan, Yuanshen and Xu, Ruikang and Liao, Yinuo and Yao, Mingde and Wang, Lizhi and Xiong, Zhiwei}, title = {HDR Image Generation via Gain Map Decomposed Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17536-17545} }
DiffuMatch: Category-Agnostic Spectral Diffusion Priors for Robust Non-rigid Shape Matching: Emery Pierson,

Lei Li,

Angela Dai,

Maks Ovsjanikov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pierson_2025_ICCV, author = {Pierson, Emery and Li, Lei and Dai, Angela and Ovsjanikov, Maks}, title = {DiffuMatch: Category-Agnostic Spectral Diffusion Priors for Robust Non-rigid Shape Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5745-5756} }
MoGA: 3D Generative Avatar Prior for Monocular Gaussian Avatar Reconstruction: Zijian Dong,

Longteng Duan,

Jie Song,

Michael J. Black,

Andreas Geiger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Zijian and Duan, Longteng and Song, Jie and Black, Michael J. and Geiger, Andreas}, title = {MoGA: 3D Generative Avatar Prior for Monocular Gaussian Avatar Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13304-13314} }
Aligning Moments in Time using Video Queries: Yogesh Kumar,

Uday Agarwal,

Manish Gupta,

Anand Mishra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumar_2025_ICCV, author = {Kumar, Yogesh and Agarwal, Uday and Gupta, Manish and Mishra, Anand}, title = {Aligning Moments in Time using Video Queries}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20215-20225} }
StreamGS: Online Generalizable Gaussian Splatting Reconstruction for Unposed Image Streams: Yang Li,

Jinglu Wang,

Lei Chu,

Xiao Li,

Shiu-Hong Kao,

Ying-Cong Chen,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yang and Wang, Jinglu and Chu, Lei and Li, Xiao and Kao, Shiu-Hong and Chen, Ying-Cong and Lu, Yan}, title = {StreamGS: Online Generalizable Gaussian Splatting Reconstruction for Unposed Image Streams}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25841-25850} }
Adversarial Attention Perturbations for Large Object Detection Transformers: Zachary Yahn,

Selim Furkan Tekin,

Fatih Ilhan,

Sihao Hu,

Tiansheng Huang,

Yichang Xu,

Margaret Loper,

Ling Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yahn_2025_ICCV, author = {Yahn, Zachary and Tekin, Selim Furkan and Ilhan, Fatih and Hu, Sihao and Huang, Tiansheng and Xu, Yichang and Loper, Margaret and Liu, Ling}, title = {Adversarial Attention Perturbations for Large Object Detection Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3184-3193} }
Meta-Learning Dynamic Center Distance: Hard Sample Mining for Learning with Noisy Labels: Chenyu Mu,

Yijun Qu,

Jiexi Yan,

Erkun Yang,

Cheng Deng; [pdf] [supp]
[bibtex]
@InProceedings{Mu_2025_ICCV, author = {Mu, Chenyu and Qu, Yijun and Yan, Jiexi and Yang, Erkun and Deng, Cheng}, title = {Meta-Learning Dynamic Center Distance: Hard Sample Mining for Learning with Noisy Labels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {415-425} }
Ensemble Foreground Management for Unsupervised Object Discovery: Ziling Wu,

Armaghan Moemeni,

Praminda Caleb-Solly; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Ziling and Moemeni, Armaghan and Caleb-Solly, Praminda}, title = {Ensemble Foreground Management for Unsupervised Object Discovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20268-20279} }
RESCUE: Crowd Evacuation Simulation via Controlling SDM-United Characters: Xiaolin Liu,

Tianyi Zhou,

Hongbo Kang,

Jian Ma,

Ziwen Wang,

Jing Huang,

Wenguo Weng,

Yu-Kun Lai,

Kun Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Xiaolin and Zhou, Tianyi and Kang, Hongbo and Ma, Jian and Wang, Ziwen and Huang, Jing and Weng, Wenguo and Lai, Yu-Kun and Li, Kun}, title = {RESCUE: Crowd Evacuation Simulation via Controlling SDM-United Characters}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24955-24964} }
Joint Diffusion Models in Continual Learning: Paweł Skierś,

Kamil Deja; [pdf] [supp]
[bibtex]
@InProceedings{Skiers_2025_ICCV, author = {Skier\'s, Pawe{\l} and Deja, Kamil}, title = {Joint Diffusion Models in Continual Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4380-4390} }
MissRAG: Addressing the Missing Modality Challenge in Multimodal Large Language Models: Vittorio Pipoli,

Alessia Saporita,

Federico Bolelli,

Marcella Cornia,

Lorenzo Baraldi,

Costantino Grana,

Rita Cucchiara,

Elisa Ficarra; [pdf] [supp]
[bibtex]
@InProceedings{Pipoli_2025_ICCV, author = {Pipoli, Vittorio and Saporita, Alessia and Bolelli, Federico and Cornia, Marcella and Baraldi, Lorenzo and Grana, Costantino and Cucchiara, Rita and Ficarra, Elisa}, title = {MissRAG: Addressing the Missing Modality Challenge in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3215-3224} }
Signs as Tokens: A Retrieval-Enhanced Multilingual Sign Language Generator: Ronglai Zuo,

Rolandos Alexandros Potamias,

Evangelos Ververas,

Jiankang Deng,

Stefanos Zafeiriou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zuo_2025_ICCV, author = {Zuo, Ronglai and Potamias, Rolandos Alexandros and Ververas, Evangelos and Deng, Jiankang and Zafeiriou, Stefanos}, title = {Signs as Tokens: A Retrieval-Enhanced Multilingual Sign Language Generator}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23806-23816} }
Test-Time Prompt Tuning for Zero-Shot Depth Completion: Chanhwi Jeong,

Inhwan Bae,

Jin-Hwi Park,

Hae-Gon Jeon; [pdf] [supp]
[bibtex]
@InProceedings{Jeong_2025_ICCV, author = {Jeong, Chanhwi and Bae, Inhwan and Park, Jin-Hwi and Jeon, Hae-Gon}, title = {Test-Time Prompt Tuning for Zero-Shot Depth Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9443-9454} }
GUIOdyssey: A Comprehensive Dataset for Cross-App GUI Navigation on Mobile Devices: Quanfeng Lu,

Wenqi Shao,

Zitao Liu,

Lingxiao Du,

Fanqing Meng,

Boxuan Li,

Botong Chen,

Siyuan Huang,

Kaipeng Zhang,

Ping Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Quanfeng and Shao, Wenqi and Liu, Zitao and Du, Lingxiao and Meng, Fanqing and Li, Boxuan and Chen, Botong and Huang, Siyuan and Zhang, Kaipeng and Luo, Ping}, title = {GUIOdyssey: A Comprehensive Dataset for Cross-App GUI Navigation on Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22404-22414} }
TOTP: Transferable Online Pedestrian Trajectory Prediction with Temporal-Adaptive Mamba Latent Diffusion: Ziyang Ren,

Ping Wei,

Shangqi Deng,

Haowen Tang,

Jiapeng Li,

Huan Li; [pdf]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Ziyang and Wei, Ping and Deng, Shangqi and Tang, Haowen and Li, Jiapeng and Li, Huan}, title = {TOTP: Transferable Online Pedestrian Trajectory Prediction with Temporal-Adaptive Mamba Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26263-26272} }
AstroLoc: Robust Space to Ground Image Localizer: Gabriele Berton,

Alex Stoken,

Carlo Masone; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Berton_2025_ICCV, author = {Berton, Gabriele and Stoken, Alex and Masone, Carlo}, title = {AstroLoc: Robust Space to Ground Image Localizer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5811-5820} }
Dual Recursive Feedback on Generation and Appearance Latents for Pose-Robust Text-to-Image Diffusion: Jiwon Kim,

Pureum Kim,

SeonHwa Kim,

Soobin Park,

Eunju Cha,

Kyong Hwan Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jiwon and Kim, Pureum and Kim, SeonHwa and Park, Soobin and Cha, Eunju and Jin, Kyong Hwan}, title = {Dual Recursive Feedback on Generation and Appearance Latents for Pose-Robust Text-to-Image Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15491-15500} }
Chimera: Improving Generalist Model with Domain-Specific Experts: Tianshuo Peng,

Mingsheng Li,

Jiakang Yuan,

Hongbin Zhou,

Renqiu Xia,

Renrui Zhang,

Lei Bai,

Song Mao,

Bin Wang,

Aojun Zhou,

Botian Shi,

Tao Chen,

Bo Zhang,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_ICCV, author = {Peng, Tianshuo and Li, Mingsheng and Yuan, Jiakang and Zhou, Hongbin and Xia, Renqiu and Zhang, Renrui and Bai, Lei and Mao, Song and Wang, Bin and Zhou, Aojun and Shi, Botian and Chen, Tao and Zhang, Bo and Yue, Xiangyu}, title = {Chimera: Improving Generalist Model with Domain-Specific Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3011-3022} }
EgoM2P: Egocentric Multimodal Multitask Pretraining: Gen Li,

Yutong Chen,

Yiqian Wu,

Kaifeng Zhao,

Marc Pollefeys,

Siyu Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Gen and Chen, Yutong and Wu, Yiqian and Zhao, Kaifeng and Pollefeys, Marc and Tang, Siyu}, title = {EgoM2P: Egocentric Multimodal Multitask Pretraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10830-10843} }
On Large Multimodal Models as Open-World Image Classifiers: Alessandro Conti,

Massimiliano Mancini,

Enrico Fini,

Yiming Wang,

Paolo Rota,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Conti_2025_ICCV, author = {Conti, Alessandro and Mancini, Massimiliano and Fini, Enrico and Wang, Yiming and Rota, Paolo and Ricci, Elisa}, title = {On Large Multimodal Models as Open-World Image Classifiers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16388-16398} }
Adaptive Hyper-Graph Convolution Network for Skeleton-based Human Action Recognition with Virtual Connections: Youwei Zhou,

Tianyang Xu,

Cong Wu,

Xiaojun Wu,

Josef Kittler; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Youwei and Xu, Tianyang and Wu, Cong and Wu, Xiaojun and Kittler, Josef}, title = {Adaptive Hyper-Graph Convolution Network for Skeleton-based Human Action Recognition with Virtual Connections}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12648-12658} }
Simultaneous Motion And Noise Estimation with Event Cameras: Shintaro Shiba,

Yoshimitsu Aoki,

Guillermo Gallego; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shiba_2025_ICCV, author = {Shiba, Shintaro and Aoki, Yoshimitsu and Gallego, Guillermo}, title = {Simultaneous Motion And Noise Estimation with Event Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6959-6969} }
Supercharging Floorplan Localization with Semantic Rays: Yuval Grader,

Hadar Averbuch-Elor; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Grader_2025_ICCV, author = {Grader, Yuval and Averbuch-Elor, Hadar}, title = {Supercharging Floorplan Localization with Semantic Rays}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27116-27125} }
MDP3: A Training-free Approach for List-wise Frame Selection in Video-LLMs: Hui Sun,

Shiyin Lu,

Huanyu Wang,

Qing-Guo Chen,

Zhao Xu,

Weihua Luo,

Kaifu Zhang,

Ming Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Hui and Lu, Shiyin and Wang, Huanyu and Chen, Qing-Guo and Xu, Zhao and Luo, Weihua and Zhang, Kaifu and Li, Ming}, title = {MDP3: A Training-free Approach for List-wise Frame Selection in Video-LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24090-24101} }
General Compression Framework for Efficient Transformer Object Tracking: Lingyi Hong,

Jinglun Li,

Xinyu Zhou,

Shilin Yan,

Pinxue Guo,

Kaixun Jiang,

Zhaoyu Chen,

Shuyong Gao,

Runze Li,

Xingdong Sheng,

Wei Zhang,

Hong Lu,

Wenqiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_ICCV, author = {Hong, Lingyi and Li, Jinglun and Zhou, Xinyu and Yan, Shilin and Guo, Pinxue and Jiang, Kaixun and Chen, Zhaoyu and Gao, Shuyong and Li, Runze and Sheng, Xingdong and Zhang, Wei and Lu, Hong and Zhang, Wenqiang}, title = {General Compression Framework for Efficient Transformer Object Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13427-13437} }
Toward Long-Tailed Online Anomaly Detection through Class-Agnostic Concepts: Chiao-An Yang,

Kuan-Chuan Peng,

Raymond A. Yeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Chiao-An and Peng, Kuan-Chuan and Yeh, Raymond A.}, title = {Toward Long-Tailed Online Anomaly Detection through Class-Agnostic Concepts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23419-23430} }
CIARD: Cyclic Iterative Adversarial Robustness Distillation: Liming Lu,

Shuchao Pang,

Xu Zheng,

Xiang Gu,

Anan Du,

Yunhuai Liu,

Yongbin Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Liming and Pang, Shuchao and Zheng, Xu and Gu, Xiang and Du, Anan and Liu, Yunhuai and Zhou, Yongbin}, title = {CIARD: Cyclic Iterative Adversarial Robustness Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {350-359} }
DiffTell: A High-Quality Dataset for Describing Image Manipulation Changes: Zonglin Di,

Jing Shi,

Yifei Fan,

Hao Tan,

Alexander Black,

John Collomosse,

Yang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Di_2025_ICCV, author = {Di, Zonglin and Shi, Jing and Fan, Yifei and Tan, Hao and Black, Alexander and Collomosse, John and Liu, Yang}, title = {DiffTell: A High-Quality Dataset for Describing Image Manipulation Changes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24580-24590} }
A Plug-and-Play Physical Motion Restoration Approach for In-the-Wild High-Difficulty Motions: Youliang Zhang,

Ronghui Li,

Yachao Zhang,

Liang Pan,

Jingbo Wang,

Yebin Liu,

Xiu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Youliang and Li, Ronghui and Zhang, Yachao and Pan, Liang and Wang, Jingbo and Liu, Yebin and Li, Xiu}, title = {A Plug-and-Play Physical Motion Restoration Approach for In-the-Wild High-Difficulty Motions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13281-13292} }
Local Dense Logit Relations for Enhanced Knowledge Distillation: Liuchi Xu,

Kang Liu,

Jinshuai Liu,

Lu Wang,

Lisheng Xu,

Jun Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Liuchi and Liu, Kang and Liu, Jinshuai and Wang, Lu and Xu, Lisheng and Cheng, Jun}, title = {Local Dense Logit Relations for Enhanced Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4539-4549} }
Less is More: Empowering GUI Agent with Context-Aware Simplification: Gongwei Chen,

Xurui Zhou,

Rui Shao,

Yibo Lyu,

Kaiwen Zhou,

Shuai Wang,

Wentao Li,

Yinchuan Li,

Zhongang Qi,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Gongwei and Zhou, Xurui and Shao, Rui and Lyu, Yibo and Zhou, Kaiwen and Wang, Shuai and Li, Wentao and Li, Yinchuan and Qi, Zhongang and Nie, Liqiang}, title = {Less is More: Empowering GUI Agent with Context-Aware Simplification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5901-5911} }
TrafficLoc: Localizing Traffic Surveillance Cameras in 3D Scenes: Yan Xia,

Yunxiang Lu,

Rui Song,

Oussema Dhaouadi,

João F. Henriques,

Daniel Cremers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_ICCV, author = {Xia, Yan and Lu, Yunxiang and Song, Rui and Dhaouadi, Oussema and Henriques, Jo\~ao F. and Cremers, Daniel}, title = {TrafficLoc: Localizing Traffic Surveillance Cameras in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28685-28695} }
Detect Anything 3D in the Wild: Hanxue Zhang,

Haoran Jiang,

Qingsong Yao,

Yanan Sun,

Renrui Zhang,

Hao Zhao,

Hongyang Li,

Hongzi Zhu,

Zetong Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hanxue and Jiang, Haoran and Yao, Qingsong and Sun, Yanan and Zhang, Renrui and Zhao, Hao and Li, Hongyang and Zhu, Hongzi and Yang, Zetong}, title = {Detect Anything 3D in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5048-5059} }
Failure Cases Are Better Learned But Boundary Says Sorry: Facilitating Smooth Perception Change for Accuracy-Robustness Trade-Off in Adversarial Training: Yanyun Wang,

Li Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yanyun and Liu, Li}, title = {Failure Cases Are Better Learned But Boundary Says Sorry: Facilitating Smooth Perception Change for Accuracy-Robustness Trade-Off in Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4691-4700} }
CA-I2P: Channel-Adaptive Registration Network with Global Optimal Selection: Zhixin Cheng,

Jiacheng Deng,

Xinjun Li,

Xiaotian Yin,

Bohao Liao,

Baoqun Yin,

Wenfei Yang,

Tianzhu Zhang; [pdf]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Zhixin and Deng, Jiacheng and Li, Xinjun and Yin, Xiaotian and Liao, Bohao and Yin, Baoqun and Yang, Wenfei and Zhang, Tianzhu}, title = {CA-I2P: Channel-Adaptive Registration Network with Global Optimal Selection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27739-27749} }
Exploiting Vision Language Model for Training-Free 3D Point Cloud OOD Detection via Graph Score Propagation: Tiankai Chen,

Yushu Li,

Adam Goodge,

Fei Teng,

Xulei Yang,

Tianrui Li,

Xun Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Tiankai and Li, Yushu and Goodge, Adam and Teng, Fei and Yang, Xulei and Li, Tianrui and Xu, Xun}, title = {Exploiting Vision Language Model for Training-Free 3D Point Cloud OOD Detection via Graph Score Propagation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28797-28807} }
Monocular Facial Appearance Capture in the Wild: Yingyan Xu,

Kate Gadola,

Prashanth Chandran,

Sebastian Weiss,

Markus Gross,

Gaspard Zoss,

Derek Bradley; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Yingyan and Gadola, Kate and Chandran, Prashanth and Weiss, Sebastian and Gross, Markus and Zoss, Gaspard and Bradley, Derek}, title = {Monocular Facial Appearance Capture in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12078-12088} }
Gaussian Variation Field Diffusion for High-fidelity Video-to-4D Synthesis: Bowen Zhang,

Sicheng Xu,

Chuxin Wang,

Jiaolong Yang,

Feng Zhao,

Dong Chen,

Baining Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Bowen and Xu, Sicheng and Wang, Chuxin and Yang, Jiaolong and Zhao, Feng and Chen, Dong and Guo, Baining}, title = {Gaussian Variation Field Diffusion for High-fidelity Video-to-4D Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12502-12513} }
Can3Tok: Canonical 3D Tokenization and Latent Modeling of Scene-Level 3D Gaussians: Quankai Gao,

Iliyan Georgiev,

Tuanfeng Y. Wang,

Krishna Kumar Singh,

Ulrich Neumann,

Jae Shin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Quankai and Georgiev, Iliyan and Wang, Tuanfeng Y. and Singh, Krishna Kumar and Neumann, Ulrich and Yoon, Jae Shin}, title = {Can3Tok: Canonical 3D Tokenization and Latent Modeling of Scene-Level 3D Gaussians}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9320-9331} }
EmotiCrafter: Text-to-Emotional-Image Generation based on Valence-Arousal Model: Shengqi Dang,

Yi He,

Long Ling,

Ziqing Qian,

Nanxuan Zhao,

Nan Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dang_2025_ICCV, author = {Dang, Shengqi and He, Yi and Ling, Long and Qian, Ziqing and Zhao, Nanxuan and Cao, Nan}, title = {EmotiCrafter: Text-to-Emotional-Image Generation based on Valence-Arousal Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15218-15228} }
HFD-Teacher: High-Frequency Depth Distillation from Depth Foundation Models for Enhanced Depth Completion: Zhiyuan Yang,

Anqi Cheng,

Haiyue Zhu,

Tianjiao Li,

Pey Yuen Tao,

Kezhi Mao; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zhiyuan and Cheng, Anqi and Zhu, Haiyue and Li, Tianjiao and Tao, Pey Yuen and Mao, Kezhi}, title = {HFD-Teacher: High-Frequency Depth Distillation from Depth Foundation Models for Enhanced Depth Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8994-9003} }
EAMamba: Efficient All-Around Vision State Space Model for Image Restoration: Yu-Cheng Lin,

Yu-Syuan Xu,

Hao-Wei Chen,

Hsien-Kai Kuo,

Chun-Yi Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Yu-Cheng and Xu, Yu-Syuan and Chen, Hao-Wei and Kuo, Hsien-Kai and Lee, Chun-Yi}, title = {EAMamba: Efficient All-Around Vision State Space Model for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11708-11719} }
AHCPTQ: Accurate and Hardware-Compatible Post-Training Quantization for Segment Anything Model: Wenlun Zhang,

Yunshan Zhong,

Shimpei Ando,

Kentaro Yoshioka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wenlun and Zhong, Yunshan and Ando, Shimpei and Yoshioka, Kentaro}, title = {AHCPTQ: Accurate and Hardware-Compatible Post-Training Quantization for Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22383-22392} }
WonderTurbo: Generating Interactive 3D World in 0.72 Seconds: Chaojun Ni,

Xiaofeng Wang,

Zheng Zhu,

Weijie Wang,

Haoyun Li,

Guosheng Zhao,

Jie Li,

Wenkang Qin,

Guan Huang,

Wenjun Mei; [pdf] [supp]
[bibtex]
@InProceedings{Ni_2025_ICCV, author = {Ni, Chaojun and Wang, Xiaofeng and Zhu, Zheng and Wang, Weijie and Li, Haoyun and Zhao, Guosheng and Li, Jie and Qin, Wenkang and Huang, Guan and Mei, Wenjun}, title = {WonderTurbo: Generating Interactive 3D World in 0.72 Seconds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27423-27434} }
Learning Hierarchical Line Buffer for Image Processing: Jiacheng Li,

Feiran Li,

Daisuke Iso; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jiacheng and Li, Feiran and Iso, Daisuke}, title = {Learning Hierarchical Line Buffer for Image Processing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11132-11141} }
Looking in the Mirror: A Faithful Counterfactual Explanation Method for Interpreting Deep Image Classification Models: Townim Chowdhury,

Vu Minh Hieu Phan,

Kewen Liao,

Nanyu Dong,

Minh-Son To,

Anton van den Hengel,

Johan W. Verjans,

Zhibin Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chowdhury_2025_ICCV, author = {Chowdhury, Townim and Phan, Vu Minh Hieu and Liao, Kewen and Dong, Nanyu and To, Minh-Son and van den Hengel, Anton and Verjans, Johan W. and Liao, Zhibin}, title = {Looking in the Mirror: A Faithful Counterfactual Explanation Method for Interpreting Deep Image Classification Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2239-2249} }
Inference-Time Diffusion Model Distillation: Geon Yeong Park,

Sang Wan Lee,

Jong Chul Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Geon Yeong and Lee, Sang Wan and Ye, Jong Chul}, title = {Inference-Time Diffusion Model Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4049-4058} }
S3E: Self-Supervised State Estimation for Radar-Inertial System: Shengpeng Wang,

Yulong Xie,

Qing Liao,

Wei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Shengpeng and Xie, Yulong and Liao, Qing and Wang, Wei}, title = {S3E: Self-Supervised State Estimation for Radar-Inertial System}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26686-26695} }
Rethinking Key-frame-based Micro-expression Recognition: A Robust and Accurate Framework Against Key-frame Errors: Zheyuan Zhang,

Weihao Tang,

Hong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zheyuan and Tang, Weihao and Chen, Hong}, title = {Rethinking Key-frame-based Micro-expression Recognition: A Robust and Accurate Framework Against Key-frame Errors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12274-12283} }
Target Bias Is All You Need: Zero-Shot Debiasing of Vision-Language Models with Bias Corpus: Taeuk Jang,

Hoin Jung,

Xiaoqian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Jang_2025_ICCV, author = {Jang, Taeuk and Jung, Hoin and Wang, Xiaoqian}, title = {Target Bias Is All You Need: Zero-Shot Debiasing of Vision-Language Models with Bias Corpus}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1935-1946} }
Mastering Collaborative Multi-modal Data Selection: A Focus on Informativeness, Uniqueness, and Representativeness: Qifan Yu,

Zhebei Shen,

Zhongqi Yue,

Yang Wu,

Bosheng Qin,

Wenqiao Zhang,

Yunfei Li,

Juncheng Li,

Siliang Tang,

Yueting Zhuang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Qifan and Shen, Zhebei and Yue, Zhongqi and Wu, Yang and Qin, Bosheng and Zhang, Wenqiao and Li, Yunfei and Li, Juncheng and Tang, Siliang and Zhuang, Yueting}, title = {Mastering Collaborative Multi-modal Data Selection: A Focus on Informativeness, Uniqueness, and Representativeness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {155-165} }
WIR3D: Visually-Informed and Geometry-Aware 3D Shape Abstraction: Richard Liu,

Daniel Fu,

Noah Tan,

Itai Lang,

Rana Hanocka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Richard and Fu, Daniel and Tan, Noah and Lang, Itai and Hanocka, Rana}, title = {WIR3D: Visually-Informed and Geometry-Aware 3D Shape Abstraction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14810-14821} }
XTrack: Multimodal Training Boosts RGB-X Video Object Trackers: Yuedong Tan,

Zongwei Wu,

Yuqian Fu,

Zhuyun Zhou,

Guolei Sun,

Eduard Zamfir,

Chao Ma,

Danda Paudel,

Luc Van Gool,

Radu Timofte; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2025_ICCV, author = {Tan, Yuedong and Wu, Zongwei and Fu, Yuqian and Zhou, Zhuyun and Sun, Guolei and Zamfir, Eduard and Ma, Chao and Paudel, Danda and Van Gool, Luc and Timofte, Radu}, title = {XTrack: Multimodal Training Boosts RGB-X Video Object Trackers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5734-5744} }
FaceCraft4D: Animated 3D Facial Avatar Generation from a Single Image: Fei Yin,

Mallikarjun B R,

Chun-Han Yao,

Rafal K. Mantiuk,

Varun Jampani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_ICCV, author = {Yin, Fei and R, Mallikarjun B and Yao, Chun-Han and Mantiuk, Rafal K. and Jampani, Varun}, title = {FaceCraft4D: Animated 3D Facial Avatar Generation from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11612-11621} }
GenFlow3D: Generative Scene Flow Estimation and Prediction on Point Cloud Sequences: Hanlin Li,

Wenming Weng,

Yueyi Zhang,

Zhiwei Xiong; [pdf]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Hanlin and Weng, Wenming and Zhang, Yueyi and Xiong, Zhiwei}, title = {GenFlow3D: Generative Scene Flow Estimation and Prediction on Point Cloud Sequences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27488-27497} }
Attention to Neural Plagiarism: Diffusion Models Can Plagiarize Your Copyrighted Images!: Zihang Zou,

Boqing Gong,

Liqiang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2025_ICCV, author = {Zou, Zihang and Gong, Boqing and Wang, Liqiang}, title = {Attention to Neural Plagiarism: Diffusion Models Can Plagiarize Your Copyrighted Images!}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19546-19556} }
PBCAT: Patch-Based Composite Adversarial Training against Physically Realizable Attacks on Object Detection: Xiao Li,

Yiming Zhu,

Yifan Huang,

Wei Zhang,

Yingzhe He,

Jie Shi,

Xiaolin Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xiao and Zhu, Yiming and Huang, Yifan and Zhang, Wei and He, Yingzhe and Shi, Jie and Hu, Xiaolin}, title = {PBCAT: Patch-Based Composite Adversarial Training against Physically Realizable Attacks on Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24456-24466} }
TRCE: Towards Reliable Malicious Concept Erasure in Text-to-Image Diffusion Models: Ruidong Chen,

Honglin Guo,

Lanjun Wang,

Chenyu Zhang,

Weizhi Nie,

An-An Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Ruidong and Guo, Honglin and Wang, Lanjun and Zhang, Chenyu and Nie, Weizhi and Liu, An-An}, title = {TRCE: Towards Reliable Malicious Concept Erasure in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18927-18936} }
PossLoss: A Reliable and Sensitive Facial Landmark Detection Loss Function: Qikui Zhu; [pdf]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Qikui}, title = {PossLoss: A Reliable and Sensitive Facial Landmark Detection Loss Function}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24858-24867} }
ChartPoint: Guiding MLLMs with Grounding Reflection for Chart Reasoning: Zhengzhuo Xu,

SiNan Du,

Yiyan Qi,

Siwen Lu,

Chengjin Xu,

Chun Yuan,

Jian Guo; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Zhengzhuo and Du, SiNan and Qi, Yiyan and Lu, Siwen and Xu, Chengjin and Yuan, Chun and Guo, Jian}, title = {ChartPoint: Guiding MLLMs with Grounding Reflection for Chart Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {426-436} }
StealthAttack: Robust 3D Gaussian Splatting Poisoning via Density-Guided Illusions: Bo-Hsu Ke,

You-Zhe Xie,

Yu-Lun Liu,

Wei-Chen Chiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2025_ICCV, author = {Ke, Bo-Hsu and Xie, You-Zhe and Liu, Yu-Lun and Chiu, Wei-Chen}, title = {StealthAttack: Robust 3D Gaussian Splatting Poisoning via Density-Guided Illusions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27400-27411} }
LightCity: An Urban Dataset for Outdoor Inverse Rendering and Reconstruction under Multi-illumination Conditions: Jingjing Wang,

Qirui Hu,

Chong Bao,

Yuke Zhu,

Hujun Bao,

Zhaopeng Cui,

Guofeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jingjing and Hu, Qirui and Bao, Chong and Zhu, Yuke and Bao, Hujun and Cui, Zhaopeng and Zhang, Guofeng}, title = {LightCity: An Urban Dataset for Outdoor Inverse Rendering and Reconstruction under Multi-illumination Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26477-26487} }
On the Generalization of Representation Uncertainty in Earth Observation: Spyros Kondylatos,

Nikolaos Ioannis Bountos,

Dimitrios Michail,

Xiao Xiang Zhu,

Gustau Camps-Valls,

Ioannis Papoutsis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kondylatos_2025_ICCV, author = {Kondylatos, Spyros and Bountos, Nikolaos Ioannis and Michail, Dimitrios and Zhu, Xiao Xiang and Camps-Valls, Gustau and Papoutsis, Ioannis}, title = {On the Generalization of Representation Uncertainty in Earth Observation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6552-6562} }
Region-aware Anchoring Mechanism for Efficient Referring Visual Grounding: Shuyi Ouyang,

Ziwei Niu,

Hongyi Wang,

Yen-Wei Chen,

Lanfen Lin; [pdf] [supp]
[bibtex]
@InProceedings{Ouyang_2025_ICCV, author = {Ouyang, Shuyi and Niu, Ziwei and Wang, Hongyi and Chen, Yen-Wei and Lin, Lanfen}, title = {Region-aware Anchoring Mechanism for Efficient Referring Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24192-24202} }
Mind the Cost of Scaffold! Benign Clients May Even Become Accomplices of Backdoor Attack: Xingshuo Han,

Xuanye Zhang,

Xiang Lan,

Haozhao Wang,

Shengmin Xu,

Shen Ren,

Jason Zeng,

Ming Wu,

Michael Heinrich,

Tianwei Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Xingshuo and Zhang, Xuanye and Lan, Xiang and Wang, Haozhao and Xu, Shengmin and Ren, Shen and Zeng, Jason and Wu, Ming and Heinrich, Michael and Zhang, Tianwei}, title = {Mind the Cost of Scaffold! Benign Clients May Even Become Accomplices of Backdoor Attack}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1580-1589} }
MV-Adapter: Multi-View Consistent Image Generation Made Easy: Zehuan Huang,

Yuan-Chen Guo,

Haoran Wang,

Ran Yi,

Lizhuang Ma,

Yan-Pei Cao,

Lu Sheng; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Zehuan and Guo, Yuan-Chen and Wang, Haoran and Yi, Ran and Ma, Lizhuang and Cao, Yan-Pei and Sheng, Lu}, title = {MV-Adapter: Multi-View Consistent Image Generation Made Easy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16377-16387} }
Hybrid-Tower: Fine-grained Pseudo-query Interaction and Generation for Text-to-Video Retrieval: Bangxiang Lan,

Ruobing Xie,

Ruixiang Zhao,

Xingwu Sun,

Zhanhui Kang,

Gang Yang,

Xirong Li; [pdf] [supp]
[bibtex]
@InProceedings{Lan_2025_ICCV, author = {Lan, Bangxiang and Xie, Ruobing and Zhao, Ruixiang and Sun, Xingwu and Kang, Zhanhui and Yang, Gang and Li, Xirong}, title = {Hybrid-Tower: Fine-grained Pseudo-query Interaction and Generation for Text-to-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24497-24506} }
ATLAS: Decoupling Skeletal and Shape Parameters for Expressive Parametric Human Modeling: Jinhyung Park,

Javier Romero,

Shunsuke Saito,

Fabian Prada,

Takaaki Shiratori,

Yichen Xu,

Federica Bogo,

Shoou-I Yu,

Kris Kitani,

Rawal Khirodkar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Jinhyung and Romero, Javier and Saito, Shunsuke and Prada, Fabian and Shiratori, Takaaki and Xu, Yichen and Bogo, Federica and Yu, Shoou-I and Kitani, Kris and Khirodkar, Rawal}, title = {ATLAS: Decoupling Skeletal and Shape Parameters for Expressive Parametric Human Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6508-6518} }
Enhancing Reward Models for High-quality Image Generation: Beyond Text-Image Alignment: Ying Ba,

Tianyu Zhang,

Yalong Bai,

Wenyi Mo,

Tao Liang,

Bing Su,

Ji-Rong Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ba_2025_ICCV, author = {Ba, Ying and Zhang, Tianyu and Bai, Yalong and Mo, Wenyi and Liang, Tao and Su, Bing and Wen, Ji-Rong}, title = {Enhancing Reward Models for High-quality Image Generation: Beyond Text-Image Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19022-19031} }
PHATNet: A Physics-guided Haze Transfer Network for Domain-adaptive Real-world Image Dehazing: Fu-Jen Tsai,

Yan-Tsung Peng,

Yen-Yu Lin,

Chia-Wen Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tsai_2025_ICCV, author = {Tsai, Fu-Jen and Peng, Yan-Tsung and Lin, Yen-Yu and Lin, Chia-Wen}, title = {PHATNet: A Physics-guided Haze Transfer Network for Domain-adaptive Real-world Image Dehazing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5591-5600} }
Diagnosing Pretrained Models for Out-of-distribution Detection: Haipeng Xiong,

Kai Xu,

Angela Yao; [pdf] [supp]
[bibtex]
@InProceedings{Xiong_2025_ICCV, author = {Xiong, Haipeng and Xu, Kai and Yao, Angela}, title = {Diagnosing Pretrained Models for Out-of-distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1836-1845} }
LD-RPS: Zero-Shot Unified Image Restoration via Latent Diffusion Recurrent Posterior Sampling: Huaqiu Li,

Yong Wang,

Tongwen Huang,

Hailang Huang,

Haoqian Wang,

Xiangxiang Chu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Huaqiu and Wang, Yong and Huang, Tongwen and Huang, Hailang and Wang, Haoqian and Chu, Xiangxiang}, title = {LD-RPS: Zero-Shot Unified Image Restoration via Latent Diffusion Recurrent Posterior Sampling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13684-13694} }
Outlier-Aware Post-Training Quantization for Image Super-Resolution: Hailing Wang,

Jianglin Lu,

Yitian Zhang,

Yun Fu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Hailing and Lu, Jianglin and Zhang, Yitian and Fu, Yun}, title = {Outlier-Aware Post-Training Quantization for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16175-16184} }
Equipping Vision Foundation Model with Mixture of Experts for Out-of-Distribution Detection: Shizhen Zhao,

Jiahui Liu,

Xin Wen,

Haoru Tan,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Shizhen and Liu, Jiahui and Wen, Xin and Tan, Haoru and Qi, Xiaojuan}, title = {Equipping Vision Foundation Model with Mixture of Experts for Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1751-1761} }
The Curse of Conditions: Analyzing and Improving Optimal Transport for Conditional Flow-Based Generation: Ho Kei Cheng,

Alexander Schwing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Ho Kei and Schwing, Alexander}, title = {The Curse of Conditions: Analyzing and Improving Optimal Transport for Conditional Flow-Based Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15875-15884} }
BlinkTrack: Feature Tracking over 80 FPS via Events and Images: Yichen Shen,

Yijin Li,

Shuo Chen,

Guanglin Li,

Zhaoyang Huang,

Hujun Bao,

Zhaopeng Cui,

Guofeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_ICCV, author = {Shen, Yichen and Li, Yijin and Chen, Shuo and Li, Guanglin and Huang, Zhaoyang and Bao, Hujun and Cui, Zhaopeng and Zhang, Guofeng}, title = {BlinkTrack: Feature Tracking over 80 FPS via Events and Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9298-9308} }
Less is More: Improving Motion Diffusion Models with Sparse Keyframes: Jinseok Bae,

Inwoo Hwang,

Young-Yoon Lee,

Ziyu Guo,

Joseph Liu,

Yizhak Ben-Shabat,

Young Min Kim,

Mubbasir Kapadia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bae_2025_ICCV, author = {Bae, Jinseok and Hwang, Inwoo and Lee, Young-Yoon and Guo, Ziyu and Liu, Joseph and Ben-Shabat, Yizhak and Kim, Young Min and Kapadia, Mubbasir}, title = {Less is More: Improving Motion Diffusion Models with Sparse Keyframes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11069-11078} }
CAVIS: Context-Aware Video Instance Segmentation: Seunghun Lee,

Jiwan Seo,

Kiljoon Han,

Minwoo Choi,

Sunghoon Im; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Seunghun and Seo, Jiwan and Han, Kiljoon and Choi, Minwoo and Im, Sunghoon}, title = {CAVIS: Context-Aware Video Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4507-4517} }
LeanVAE: An Ultra-Efficient Reconstruction VAE for Video Diffusion Models: Yu Cheng,

Fajie Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Yu and Yuan, Fajie}, title = {LeanVAE: An Ultra-Efficient Reconstruction VAE for Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15692-15702} }
Monocular Semantic Scene Completion via Masked Recurrent Networks: Xuzhi Wang,

Xinran Wu,

Song Wang,

Lingdong Kong,

Ziping Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xuzhi and Wu, Xinran and Wang, Song and Kong, Lingdong and Zhao, Ziping}, title = {Monocular Semantic Scene Completion via Masked Recurrent Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24811-24822} }
TurboReg: TurboClique for Robust and Efficient Point Cloud Registration: Shaocheng Yan,

Pengcheng Shi,

Zhenjun Zhao,

Kaixin Wang,

Kuang Cao,

Ji Wu,

Jiayuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Shaocheng and Shi, Pengcheng and Zhao, Zhenjun and Wang, Kaixin and Cao, Kuang and Wu, Ji and Li, Jiayuan}, title = {TurboReg: TurboClique for Robust and Efficient Point Cloud Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26371-26381} }
AR-VRM: Imitating Human Motions for Visual Robot Manipulation with Analogical Reasoning: Dejie Yang,

Zijing Zhao,

Yang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Dejie and Zhao, Zijing and Liu, Yang}, title = {AR-VRM: Imitating Human Motions for Visual Robot Manipulation with Analogical Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6818-6827} }
Self-Calibrated Variance-Stabilizing Transformations for Real-World Image Denoising: Sébastien Herbreteau,

Michael Unser; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Herbreteau_2025_ICCV, author = {Herbreteau, S\'ebastien and Unser, Michael}, title = {Self-Calibrated Variance-Stabilizing Transformations for Real-World Image Denoising}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10496-10506} }
HccePose(BF): Predicting Front & Back Surfaces to Construct Ultra-Dense 2D-3D Correspondences for Pose Estimation: Yulin Wang,

Mengting Hu,

Hongli Li,

Chen Luo; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yulin and Hu, Mengting and Li, Hongli and Luo, Chen}, title = {HccePose(BF): Predicting Front \& Back Surfaces to Construct Ultra-Dense 2D-3D Correspondences for Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7166-7175} }
Find Any Part in 3D: Ziqi Ma,

Yisong Yue,

Georgia Gkioxari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Ziqi and Yue, Yisong and Gkioxari, Georgia}, title = {Find Any Part in 3D}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7818-7827} }
CompleteMe: Reference-based Human Image Completion: Yu-Ju Tsai,

Brian Price,

Qing Liu,

Luis Figueroa,

Daniil Pakhomov,

Zhihong Ding,

Scott Cohen,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tsai_2025_ICCV, author = {Tsai, Yu-Ju and Price, Brian and Liu, Qing and Figueroa, Luis and Pakhomov, Daniil and Ding, Zhihong and Cohen, Scott and Yang, Ming-Hsuan}, title = {CompleteMe: Reference-based Human Image Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18252-18261} }
MMOne: Representing Multiple Modalities in One Scene: Zhifeng Gu,

Bing Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2025_ICCV, author = {Gu, Zhifeng and Wang, Bing}, title = {MMOne: Representing Multiple Modalities in One Scene}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1088-1098} }
VA-MoE: Variables-Adaptive Mixture of Experts for Incremental Weather Forecasting: Hao Chen,

Han Tao,

Guo Song,

Jie Zhang,

Yonghan Dong,

Yunlong Yu,

Lei Bai; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Hao and Tao, Han and Song, Guo and Zhang, Jie and Dong, Yonghan and Yu, Yunlong and Bai, Lei}, title = {VA-MoE: Variables-Adaptive Mixture of Experts for Incremental Weather Forecasting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7915-7924} }
2HandedAfforder: Learning Precise Actionable Bimanual Affordances from Human Videos: Marvin Heidinger,

Snehal Jauhri,

Vignesh Prasad,

Georgia Chalvatzaki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Heidinger_2025_ICCV, author = {Heidinger, Marvin and Jauhri, Snehal and Prasad, Vignesh and Chalvatzaki, Georgia}, title = {2HandedAfforder: Learning Precise Actionable Bimanual Affordances from Human Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14743-14753} }
GenDoP: Auto-regressive Camera Trajectory Generation as a Director of Photography: Mengchen Zhang,

Tong Wu,

Jing Tan,

Ziwei Liu,

Gordon Wetzstein,

Dahua Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Mengchen and Wu, Tong and Tan, Jing and Liu, Ziwei and Wetzstein, Gordon and Lin, Dahua}, title = {GenDoP: Auto-regressive Camera Trajectory Generation as a Director of Photography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18229-18239} }
Latent Swap Joint Diffusion for 2D Long-Form Latent Generation: Yusheng Dai,

Chenxi Wang,

Chang Li,

Chen Wang,

Kewei Li,

Jun Du,

Lei Sun,

Jianqing Gao,

Ruoyu Wang,

Jiefeng Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2025_ICCV, author = {Dai, Yusheng and Wang, Chenxi and Li, Chang and Wang, Chen and Li, Kewei and Du, Jun and Sun, Lei and Gao, Jianqing and Wang, Ruoyu and Ma, Jiefeng}, title = {Latent Swap Joint Diffusion for 2D Long-Form Latent Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11006-11015} }
Neural Inverse Rendering for High-Accuracy 3D Measurement of Moving Objects with Fewer Phase-Shifting Patterns: Yuki Urakawa,

Yoshihiro Watanabe; [pdf] [supp]
[bibtex]
@InProceedings{Urakawa_2025_ICCV, author = {Urakawa, Yuki and Watanabe, Yoshihiro}, title = {Neural Inverse Rendering for High-Accuracy 3D Measurement of Moving Objects with Fewer Phase-Shifting Patterns}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27692-27701} }
Embodied Navigation with Auxiliary Task of Action Description Prediction: Haru Kondoh,

Asako Kanezaki; [pdf] [supp]
[bibtex]
@InProceedings{Kondoh_2025_ICCV, author = {Kondoh, Haru and Kanezaki, Asako}, title = {Embodied Navigation with Auxiliary Task of Action Description Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7025-7036} }
Taming the Untamed: Graph-Based Knowledge Retrieval and Reasoning for MLLMs to Conquer the Unknown: Bowen Wang,

Zhouqiang Jiang,

Yasuaki Susumu,

Shotaro Miwa,

Tianwei Chen,

Yuta Nakashima; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Bowen and Jiang, Zhouqiang and Susumu, Yasuaki and Miwa, Shotaro and Chen, Tianwei and Nakashima, Yuta}, title = {Taming the Untamed: Graph-Based Knowledge Retrieval and Reasoning for MLLMs to Conquer the Unknown}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4732-4742} }
From Image to Video: An Empirical Study of Diffusion Representations: Pedro Vélez,

Luisa F. Polanía,

Yi Yang,

Chuhan Zhang,

Rishabh Kabra,

Anurag Arnab,

Mehdi S. M. Sajjadi; [pdf] [supp]
[bibtex]
@InProceedings{Velez_2025_ICCV, author = {V\'elez, Pedro and Polan{\'\i}a, Luisa F. and Yang, Yi and Zhang, Chuhan and Kabra, Rishabh and Arnab, Anurag and Sajjadi, Mehdi S. M.}, title = {From Image to Video: An Empirical Study of Diffusion Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16948-16958} }
Diffusion Curriculum: Synthetic-to-Real Data Curriculum via Image-Guided Diffusion: Yijun Liang,

Shweta Bhardwaj,

Tianyi Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Yijun and Bhardwaj, Shweta and Zhou, Tianyi}, title = {Diffusion Curriculum: Synthetic-to-Real Data Curriculum via Image-Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1697-1707} }
Dynamic-VLM: Simple Dynamic Visual Token Compression for VideoLLM: Han Wang,

Yuxiang Nie,

Yongjie Ye,

Yanjie Wang,

Shuai Li,

Haiyang Yu,

Jinghui Lu,

Can Huang; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Han and Nie, Yuxiang and Ye, Yongjie and Wang, Yanjie and Li, Shuai and Yu, Haiyang and Lu, Jinghui and Huang, Can}, title = {Dynamic-VLM: Simple Dynamic Visual Token Compression for VideoLLM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20812-20823} }
CCL-LGS: Contrastive Codebook Learning for 3D Language Gaussian Splatting: Lei Tian,

Xiaomin Li,

Liqian Ma,

Hao Yin,

Zirui Zheng,

Hefei Huang,

Taiqing Li,

Huchuan Lu,

Xu Jia; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2025_ICCV, author = {Tian, Lei and Li, Xiaomin and Ma, Liqian and Yin, Hao and Zheng, Zirui and Huang, Hefei and Li, Taiqing and Lu, Huchuan and Jia, Xu}, title = {CCL-LGS: Contrastive Codebook Learning for 3D Language Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9855-9864} }
TRACE: Learning 3D Gaussian Physical Dynamics from Multi-view Videos: Jinxi Li,

Ziyang Song,

Bo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jinxi and Song, Ziyang and Yang, Bo}, title = {TRACE: Learning 3D Gaussian Physical Dynamics from Multi-view Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8820-8829} }
Two Losses, One Goal: Balancing Conflict Gradients for Semi-supervised Semantic Segmentation: Rui Sun,

Huayu Mai,

Wangkai Li,

Yujia Chen,

Yuan Wang; [pdf]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Rui and Mai, Huayu and Li, Wangkai and Chen, Yujia and Wang, Yuan}, title = {Two Losses, One Goal: Balancing Conflict Gradients for Semi-supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20357-20367} }
FreeCus: Free Lunch Subject-driven Customization in Diffusion Transformers: Yanbing Zhang,

Zhe Wang,

Qin Zhou,

Mengping Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yanbing and Wang, Zhe and Zhou, Qin and Yang, Mengping}, title = {FreeCus: Free Lunch Subject-driven Customization in Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15521-15531} }
Trokens: Semantic-Aware Relational Trajectory Tokens for Few-Shot Action Recognition: Pulkit Kumar,

Shuaiyi Huang,

Matthew Walmer,

Sai Saketh Rambhatla,

Abhinav Shrivastava; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumar_2025_ICCV, author = {Kumar, Pulkit and Huang, Shuaiyi and Walmer, Matthew and Rambhatla, Sai Saketh and Shrivastava, Abhinav}, title = {Trokens: Semantic-Aware Relational Trajectory Tokens for Few-Shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13544-13556} }
MotionStreamer: Streaming Motion Generation via Diffusion-based Autoregressive Model in Causal Latent Space: Lixing Xiao,

Shunlin Lu,

Huaijin Pi,

Ke Fan,

Liang Pan,

Yueer Zhou,

Ziyong Feng,

Xiaowei Zhou,

Sida Peng,

Jingbo Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_ICCV, author = {Xiao, Lixing and Lu, Shunlin and Pi, Huaijin and Fan, Ke and Pan, Liang and Zhou, Yueer and Feng, Ziyong and Zhou, Xiaowei and Peng, Sida and Wang, Jingbo}, title = {MotionStreamer: Streaming Motion Generation via Diffusion-based Autoregressive Model in Causal Latent Space}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10086-10096} }
RS-vHeat: Heat Conduction Guided Efficient Remote Sensing Foundation Model: Huiyang Hu,

Peijin Wang,

Hanbo Bi,

Boyuan Tong,

Zhaozhi Wang,

Wenhui Diao,

Hao Chang,

Yingchao Feng,

Ziqi Zhang,

Yaowei Wang,

Qixiang Ye,

Kun Fu,

Xian Sun; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Huiyang and Wang, Peijin and Bi, Hanbo and Tong, Boyuan and Wang, Zhaozhi and Diao, Wenhui and Chang, Hao and Feng, Yingchao and Zhang, Ziqi and Wang, Yaowei and Ye, Qixiang and Fu, Kun and Sun, Xian}, title = {RS-vHeat: Heat Conduction Guided Efficient Remote Sensing Foundation Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9876-9887} }
EA-Vit: Efficient Adaptation for Elastic Vision Transformer: Chen Zhu,

Wangbo Zhao,

Huiwen Zhang,

Yuhao Zhou,

Weidong Tang,

Shuo Wang,

Zhihang Yuan,

Yuzhang Shang,

Xiaojiang Peng,

Kai Wang,

Dawei Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Chen and Zhao, Wangbo and Zhang, Huiwen and Zhou, Yuhao and Tang, Weidong and Wang, Shuo and Yuan, Zhihang and Shang, Yuzhang and Peng, Xiaojiang and Wang, Kai and Yang, Dawei}, title = {EA-Vit: Efficient Adaptation for Elastic Vision Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1038-1047} }
Aether: Geometric-Aware Unified World Modeling: Haoyi Zhu,

Yifan Wang,

Jianjun Zhou,

Wenzheng Chang,

Yang Zhou,

Zizun Li,

Junyi Chen,

Chunhua Shen,

Jiangmiao Pang,

Tong He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Haoyi and Wang, Yifan and Zhou, Jianjun and Chang, Wenzheng and Zhou, Yang and Li, Zizun and Chen, Junyi and Shen, Chunhua and Pang, Jiangmiao and He, Tong}, title = {Aether: Geometric-Aware Unified World Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8535-8546} }
ArtEditor: Learning Customized Instructional Image Editor from Few-Shot Examples: Shijie Huang,

Yiren Song,

Yuxuan Zhang,

Hailong Guo,

Xueyin Wang,

Jiaming Liu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Shijie and Song, Yiren and Zhang, Yuxuan and Guo, Hailong and Wang, Xueyin and Liu, Jiaming}, title = {ArtEditor: Learning Customized Instructional Image Editor from Few-Shot Examples}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17651-17662} }
LiON-LoRA: Rethinking LoRA Fusion to Unify Controllable Spatial and Temporal Generation for Video Diffusion: Yisu Zhang,

Chenjie Cao,

Chaohui Yu,

Jianke Zhu; [pdf]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yisu and Cao, Chenjie and Yu, Chaohui and Zhu, Jianke}, title = {LiON-LoRA: Rethinking LoRA Fusion to Unify Controllable Spatial and Temporal Generation for Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14569-14579} }
VLIPP: Towards Physically Plausible Video Generation with Vision and Language Informed Physical Prior: Xindi Yang,

Baolu Li,

Yiming Zhang,

Zhenfei Yin,

Lei Bai,

Liqian Ma,

Zhiyong Wang,

Jianfei Cai,

Tien-Tsin Wong,

Huchuan Lu,

Xu Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Xindi and Li, Baolu and Zhang, Yiming and Yin, Zhenfei and Bai, Lei and Ma, Liqian and Wang, Zhiyong and Cai, Jianfei and Wong, Tien-Tsin and Lu, Huchuan and Jia, Xu}, title = {VLIPP: Towards Physically Plausible Video Generation with Vision and Language Informed Physical Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12360-12370} }
SAM4D: Segment Anything in Camera and LiDAR Streams: Jianyun Xu,

Song Wang,

Ziqian Ni,

Chunyong Hu,

Sheng Yang,

Jianke Zhu,

Qiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Jianyun and Wang, Song and Ni, Ziqian and Hu, Chunyong and Yang, Sheng and Zhu, Jianke and Li, Qiang}, title = {SAM4D: Segment Anything in Camera and LiDAR Streams}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28535-28545} }
DreamRelation: Relation-Centric Video Customization: Yujie Wei,

Shiwei Zhang,

Hangjie Yuan,

Biao Gong,

Longxiang Tang,

Xiang Wang,

Haonan Qiu,

Hengjia Li,

Shuai Tan,

Yingya Zhang,

Hongming Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Yujie and Zhang, Shiwei and Yuan, Hangjie and Gong, Biao and Tang, Longxiang and Wang, Xiang and Qiu, Haonan and Li, Hengjia and Tan, Shuai and Zhang, Yingya and Shan, Hongming}, title = {DreamRelation: Relation-Centric Video Customization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12381-12393} }
DropletVideo: A Dataset and Approach to Explore Integral Spatio-Temporal Consistent Video Generation: Runze Zhang,

Guoguang Du,

Xiaochuan Li,

Qi Jia,

Liang Jin,

Lu Liu,

Jingjing Wang,

Cong Xu,

Zhenhua Guo,

Yaqian Zhao,

Xiaoli Gong,

Rengang Li,

Baoyu Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Runze and Du, Guoguang and Li, Xiaochuan and Jia, Qi and Jin, Liang and Liu, Lu and Wang, Jingjing and Xu, Cong and Guo, Zhenhua and Zhao, Yaqian and Gong, Xiaoli and Li, Rengang and Fan, Baoyu}, title = {DropletVideo: A Dataset and Approach to Explore Integral Spatio-Temporal Consistent Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15583-15593} }
TAB: Transformer Attention Bottlenecks enable User Intervention and Debugging in Vision-Language Models: Pooyan Rahmanzadehgervi,

Hung Huy Nguyen,

Rosanne Liu,

Long Mai,

Anh Totti Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rahmanzadehgervi_2025_ICCV, author = {Rahmanzadehgervi, Pooyan and Nguyen, Hung Huy and Liu, Rosanne and Mai, Long and Nguyen, Anh Totti}, title = {TAB: Transformer Attention Bottlenecks enable User Intervention and Debugging in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22551-22562} }
Towards Stabilized and Efficient Diffusion Transformers through Long-Skip-Connections with Spectral Constraints: Guanjie Chen,

Xinyu Zhao,

Yucheng Zhou,

Xiaoye Qu,

Tianlong Chen,

Yu Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Guanjie and Zhao, Xinyu and Zhou, Yucheng and Qu, Xiaoye and Chen, Tianlong and Cheng, Yu}, title = {Towards Stabilized and Efficient Diffusion Transformers through Long-Skip-Connections with Spectral Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17708-17718} }
Feature Extraction and Representation of Pre-training Point Cloud Based on Diffusion Models: Chang Qiu,

Feipeng Da,

Zilei Zhang; [pdf]
[bibtex]
@InProceedings{Qiu_2025_ICCV, author = {Qiu, Chang and Da, Feipeng and Zhang, Zilei}, title = {Feature Extraction and Representation of Pre-training Point Cloud Based on Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26559-26568} }
FLOAT: Generative Motion Latent Flow Matching for Audio-driven Talking Portrait: Taekyung Ki,

Dongchan Min,

Gyeongsu Chae; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ki_2025_ICCV, author = {Ki, Taekyung and Min, Dongchan and Chae, Gyeongsu}, title = {FLOAT: Generative Motion Latent Flow Matching for Audio-driven Talking Portrait}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14699-14710} }
Membership Inference Attacks with False Discovery Rate Control: Chenxu Zhao,

Wei Qian,

Aobo Chen,

Mengdi Huai; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Chenxu and Qian, Wei and Chen, Aobo and Huai, Mengdi}, title = {Membership Inference Attacks with False Discovery Rate Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1216-1227} }
EmbodiedSplat: Personalized Real-to-Sim-to-Real Navigation with Gaussian Splats from a Mobile Device: Gunjan Chhablani,

Xiaomeng Ye,

Muhammad Zubair Irshad,

Zsolt Kira; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chhablani_2025_ICCV, author = {Chhablani, Gunjan and Ye, Xiaomeng and Irshad, Muhammad Zubair and Kira, Zsolt}, title = {EmbodiedSplat: Personalized Real-to-Sim-to-Real Navigation with Gaussian Splats from a Mobile Device}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25431-25441} }
Sequential keypoint density estimator: an overlooked baseline of skeleton-based video anomaly detection: Anja Delić,

Matej Grcic,

Siniša Šegvić; [pdf] [supp]
[bibtex]
@InProceedings{Delic_2025_ICCV, author = {Deli\'c, Anja and Grcic, Matej and \v{S}egvi\'c, Sini\v{s}a}, title = {Sequential keypoint density estimator: an overlooked baseline of skeleton-based video anomaly detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11579-11589} }
SynCity: Training-Free Generation of 3D Worlds: Paul Engstler,

Aleksandar Shtedritski,

Iro Laina,

Christian Rupprecht,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Engstler_2025_ICCV, author = {Engstler, Paul and Shtedritski, Aleksandar and Laina, Iro and Rupprecht, Christian and Vedaldi, Andrea}, title = {SynCity: Training-Free Generation of 3D Worlds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27585-27595} }
Resonance: Learning to Predict Social-Aware Pedestrian Trajectories as Co-Vibrations: Conghao Wong,

Ziqian Zou,

Beihao Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wong_2025_ICCV, author = {Wong, Conghao and Zou, Ziqian and Xia, Beihao}, title = {Resonance: Learning to Predict Social-Aware Pedestrian Trajectories as Co-Vibrations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25788-25799} }
Adversarial Training for Probabilistic Robustness: Yi Zhang,

Yuhang Chen,

Zhen Chen,

Wenjie Ruan,

Xiaowei Huang,

Siddartha Khastgir,

Xingyu Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yi and Chen, Yuhang and Chen, Zhen and Ruan, Wenjie and Huang, Xiaowei and Khastgir, Siddartha and Zhao, Xingyu}, title = {Adversarial Training for Probabilistic Robustness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1675-1685} }
MUNBa: Machine Unlearning via Nash Bargaining: Jing Wu,

Mehrtash Harandi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Jing and Harandi, Mehrtash}, title = {MUNBa: Machine Unlearning via Nash Bargaining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4754-4765} }
DLF: Extreme Image Compression with Dual-generative Latent Fusion: Naifu Xue,

Zhaoyang Jia,

Jiahao Li,

Bin Li,

Yuan Zhang,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2025_ICCV, author = {Xue, Naifu and Jia, Zhaoyang and Li, Jiahao and Li, Bin and Zhang, Yuan and Lu, Yan}, title = {DLF: Extreme Image Compression with Dual-generative Latent Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19227-19236} }
PanoLlama: Generating Endless and Coherent Panoramas with Next-Token-Prediction LLMs: Teng Zhou,

Xiaoyu Zhang,

Yongchuan Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Teng and Zhang, Xiaoyu and Tang, Yongchuan}, title = {PanoLlama: Generating Endless and Coherent Panoramas with Next-Token-Prediction LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15340-15349} }
Leaps and Bounds: An Improved Point Cloud Winding Number Formulation for Fast Normal Estimation and Surface Reconstruction: Chamin Hewa Koneputugodage,

Dylan Campbell,

Stephen Gould; [pdf] [supp]
[bibtex]
@InProceedings{Koneputugodage_2025_ICCV, author = {Koneputugodage, Chamin Hewa and Campbell, Dylan and Gould, Stephen}, title = {Leaps and Bounds: An Improved Point Cloud Winding Number Formulation for Fast Normal Estimation and Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26116-26125} }
VQ-SGen: A Vector Quantized Stroke Representation for Creative Sketch Generation: Jiawei Wang,

Zhiming Cui,

Changjian Li; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jiawei and Cui, Zhiming and Li, Changjian}, title = {VQ-SGen: A Vector Quantized Stroke Representation for Creative Sketch Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19311-19320} }
Cross-Category Subjectivity Generalization for Style-Adaptive Sketch Re-ID: Zechao Hu,

Zhengwei Yang,

Hao Li,

Zheng Wang,

Yixiong Zou; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Zechao and Yang, Zhengwei and Li, Hao and Wang, Zheng and Zou, Yixiong}, title = {Cross-Category Subjectivity Generalization for Style-Adaptive Sketch Re-ID}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22644-22653} }
VolumetricSMPL: A Neural Volumetric Body Model for Efficient Interactions, Contacts, and Collisions: Marko Mihajlovic,

Siwei Zhang,

Gen Li,

Kaifeng Zhao,

Lea Muller,

Siyu Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mihajlovic_2025_ICCV, author = {Mihajlovic, Marko and Zhang, Siwei and Li, Gen and Zhao, Kaifeng and Muller, Lea and Tang, Siyu}, title = {VolumetricSMPL: A Neural Volumetric Body Model for Efficient Interactions, Contacts, and Collisions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5060-5070} }
Spatial-Temporal Forgery Trace based Forgery Image Identification: Yilin Wang,

Zunlei Feng,

Jiachi Wang,

Hengrui Lou,

Binjia Zhou,

Jie Lei,

Mingli Song,

Yijun Bei; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yilin and Feng, Zunlei and Wang, Jiachi and Lou, Hengrui and Zhou, Binjia and Lei, Jie and Song, Mingli and Bei, Yijun}, title = {Spatial-Temporal Forgery Trace based Forgery Image Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17067-17076} }
Passing the Driving Knowledge Test: Maolin Wei,

Wanzhou Liu,

Eshed Ohn-Bar; [pdf] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Maolin and Liu, Wanzhou and Ohn-Bar, Eshed}, title = {Passing the Driving Knowledge Test}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8395-8406} }
OneGT: One-Shot Geometry-Texture Neural Rendering for Head Avatars: Jinshu Chen,

Bingchuan Li,

Fan Zhang,

Songtao Zhao,

Qian He; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jinshu and Li, Bingchuan and Zhang, Fan and Zhao, Songtao and He, Qian}, title = {OneGT: One-Shot Geometry-Texture Neural Rendering for Head Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11294-11304} }
Joint Asymmetric Loss for Learning with Noisy Labels: Jialiang Wang,

Xianming Liu,

Xiong Zhou,

Gangfeng Hu,

Deming Zhai,

Junjun Jiang,

Xiangyang Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jialiang and Liu, Xianming and Zhou, Xiong and Hu, Gangfeng and Zhai, Deming and Jiang, Junjun and Ji, Xiangyang}, title = {Joint Asymmetric Loss for Learning with Noisy Labels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1947-1956} }
A Unified Framework for Industrial Cel-Animation Colorization with Temporal-Structural Awareness: Xiaoyi Feng,

Tao Huang,

Peng Wang,

Zizhou Huang,

Zhang Haihang,

Yuntao Zou,

Dagang Li,

Kaifeng Zou; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Xiaoyi and Huang, Tao and Wang, Peng and Huang, Zizhou and Haihang, Zhang and Zou, Yuntao and Li, Dagang and Zou, Kaifeng}, title = {A Unified Framework for Industrial Cel-Animation Colorization with Temporal-Structural Awareness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19301-19310} }
Uncalibrated Structure from Motion on a Sphere: Jonathan Ventura,

Viktor Larsson,

Fredrik Kahl; [pdf] [supp]
[bibtex]
@InProceedings{Ventura_2025_ICCV, author = {Ventura, Jonathan and Larsson, Viktor and Kahl, Fredrik}, title = {Uncalibrated Structure from Motion on a Sphere}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {69-78} }
CARL: Causality-guided Architecture Representation Learning for an Interpretable Performance Predictor: Han Ji,

Yuqi Feng,

Jiahao Fan,

Yanan Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Han and Feng, Yuqi and Fan, Jiahao and Sun, Yanan}, title = {CARL: Causality-guided Architecture Representation Learning for an Interpretable Performance Predictor}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23019-23029} }
An OpenMind for 3D Medical Vision Self-supervised Learning: Tassilo Wald,

Constantin Ulrich,

Jonathan Suprijadi,

Sebastian Ziegler,

Michal Nohel,

Robin Peretzke,

Gregor Kohler,

Klaus Maier-Hein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wald_2025_ICCV, author = {Wald, Tassilo and Ulrich, Constantin and Suprijadi, Jonathan and Ziegler, Sebastian and Nohel, Michal and Peretzke, Robin and Kohler, Gregor and Maier-Hein, Klaus}, title = {An OpenMind for 3D Medical Vision Self-supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23839-23879} }
Online Dense Point Tracking with Streaming Memory: Qiaole Dong,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Qiaole and Fu, Yanwei}, title = {Online Dense Point Tracking with Streaming Memory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8710-8720} }
MCID: Multi-aspect Copyright Infringement Detection for Generated Images: Chuanwei Huang,

Zexi Jia,

Hongyan Fei,

Yeshuang Zhu,

Zhiqiang Yuan,

Ying Deng,

Jiapei Zhang,

Xiaoyue Duan,

Jinchao Zhang,

Jie Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Chuanwei and Jia, Zexi and Fei, Hongyan and Zhu, Yeshuang and Yuan, Zhiqiang and Deng, Ying and Zhang, Jiapei and Duan, Xiaoyue and Zhang, Jinchao and Zhou, Jie}, title = {MCID: Multi-aspect Copyright Infringement Detection for Generated Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16154-16164} }
Any2AnyTryon: Leveraging Adaptive Position Embeddings for Versatile Virtual Clothing Tasks: Hailong Guo,

Bohan Zeng,

Yiren Song,

Wentao Zhang,

Jiaming Liu,

Chuang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Hailong and Zeng, Bohan and Song, Yiren and Zhang, Wentao and Liu, Jiaming and Zhang, Chuang}, title = {Any2AnyTryon: Leveraging Adaptive Position Embeddings for Versatile Virtual Clothing Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19085-19096} }
STEP-DETR: Advancing DETR-based Semi-Supervised Object Detection with Super Teacher and Pseudo-Label Guided Text Queries: Tahira Shehzadi,

Khurram Azeem Hashmi,

Shalini Sarode,

Didier Stricker,

Muhammad Zeshan Afzal; [pdf] [supp]
[bibtex]
@InProceedings{Shehzadi_2025_ICCV, author = {Shehzadi, Tahira and Hashmi, Khurram Azeem and Sarode, Shalini and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {STEP-DETR: Advancing DETR-based Semi-Supervised Object Detection with Super Teacher and Pseudo-Label Guided Text Queries}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3069-3079} }
DISTA-Net: Dynamic Closely-Spaced Infrared Small Target Unmixing: Shengdong Han,

Shangdong Yang,

Yuxuan Li,

Xin Zhang,

Xiang Li,

Jian Yang,

Ming-Ming Cheng,

Yimian Dai; [pdf] [supp]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Shengdong and Yang, Shangdong and Li, Yuxuan and Zhang, Xin and Li, Xiang and Yang, Jian and Cheng, Ming-Ming and Dai, Yimian}, title = {DISTA-Net: Dynamic Closely-Spaced Infrared Small Target Unmixing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14655-14664} }
MDD: A Dataset for Text-and-Music Conditioned Duet Dance Generation: Prerit Gupta,

Jason Alexander Fotso-Puepi,

Zhengyuan Li,

Jay Mehta,

Aniket Bera; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gupta_2025_ICCV, author = {Gupta, Prerit and Fotso-Puepi, Jason Alexander and Li, Zhengyuan and Mehta, Jay and Bera, Aniket}, title = {MDD: A Dataset for Text-and-Music Conditioned Duet Dance Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13932-13941} }
Met2Net: A Decoupled Two-Stage Spatio-Temporal Forecasting Model for Complex Meteorological Systems: Shaohan Li,

Hao Yang,

Min Chen,

Xiaolin Qin; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Shaohan and Yang, Hao and Chen, Min and Qin, Xiaolin}, title = {Met2Net: A Decoupled Two-Stage Spatio-Temporal Forecasting Model for Complex Meteorological Systems}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5458-5468} }
SVG-Head: Hybrid Surface-Volumetric Gaussians for High-Fidelity Head Reconstruction and Real-Time Editing: Heyi Sun,

Cong Wang,

Tian-Xing Xu,

Jingwei Huang,

Di Kang,

Chunchao Guo,

Song-Hai Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Heyi and Wang, Cong and Xu, Tian-Xing and Huang, Jingwei and Kang, Di and Guo, Chunchao and Zhang, Song-Hai}, title = {SVG-Head: Hybrid Surface-Volumetric Gaussians for High-Fidelity Head Reconstruction and Real-Time Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13326-13335} }
Dynamic Dictionary Learning for Remote Sensing Image Segmentation: Xuechao Zou,

Yue Li,

Shun Zhang,

Kai Li,

Shiying Wang,

Pin Tao,

Junliang Xing,

Congyan Lang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zou_2025_ICCV, author = {Zou, Xuechao and Li, Yue and Zhang, Shun and Li, Kai and Wang, Shiying and Tao, Pin and Xing, Junliang and Lang, Congyan}, title = {Dynamic Dictionary Learning for Remote Sensing Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22457-22466} }
Video Motion Graphs: Haiyang Liu,

Zhan Xu,

Fa-Ting Hong,

Hsin-Ping Huang,

Yi Zhou,

Yang Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Haiyang and Xu, Zhan and Hong, Fa-Ting and Huang, Hsin-Ping and Zhou, Yi and Zhou, Yang}, title = {Video Motion Graphs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13730-13740} }
Ask and Remember: A Questions-Only Replay Strategy for Continual Visual Question Answering: Imad Eddine Marouf,

Enzo Tartaglione,

Stéphane Lathuilière,

Joost Van De Weijer; [pdf] [supp]
[bibtex]
@InProceedings{Marouf_2025_ICCV, author = {Marouf, Imad Eddine and Tartaglione, Enzo and Lathuili\`ere, St\'ephane and Van De Weijer, Joost}, title = {Ask and Remember: A Questions-Only Replay Strategy for Continual Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18078-18089} }
Dense Policy: Bidirectional Autoregressive Learning of Actions: Yue Su,

Xinyu Zhan,

Hongjie Fang,

Han Xue,

Hao-Shu Fang,

Yong-Lu Li,

Cewu Lu,

Lixin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_ICCV, author = {Su, Yue and Zhan, Xinyu and Fang, Hongjie and Xue, Han and Fang, Hao-Shu and Li, Yong-Lu and Lu, Cewu and Yang, Lixin}, title = {Dense Policy: Bidirectional Autoregressive Learning of Actions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14486-14495} }
Conditional Visual Autoregressive Modeling for Pathological Image Restoration: Ziyi Liu,

Zhe Xu,

Jiabo Ma,

Wenqiang Li,

Ruixuan Wang,

Bo Du,

Hao Chen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Ziyi and Xu, Zhe and Ma, Jiabo and Li, Wenqiang and Wang, Ruixuan and Du, Bo and Chen, Hao}, title = {Conditional Visual Autoregressive Modeling for Pathological Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17828-17837} }
BlueNeg: A 35mm Negative Film Dataset for Restoring Channel-Heterogeneous Deterioration: Hanyuan Liu,

Chengze Li,

Minshan Xie,

Zhenni Wang,

Jiawen Liang,

Chi-Sing Leung,

Tien-Tsin Wong; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Hanyuan and Li, Chengze and Xie, Minshan and Wang, Zhenni and Liang, Jiawen and Leung, Chi-Sing and Wong, Tien-Tsin}, title = {BlueNeg: A 35mm Negative Film Dataset for Restoring Channel-Heterogeneous Deterioration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13119-13128} }
egoPPG: Heart Rate Estimation from Eye-Tracking Cameras in Egocentric Systems to Benefit Downstream Vision Tasks: Björn Braun,

Rayan Armani,

Manuel Meier,

Max Moebus,

Christian Holz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Braun_2025_ICCV, author = {Braun, Bj\"orn and Armani, Rayan and Meier, Manuel and Moebus, Max and Holz, Christian}, title = {egoPPG: Heart Rate Estimation from Eye-Tracking Cameras in Egocentric Systems to Benefit Downstream Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5579-5590} }
Beyond Pixel Uncertainty: Bounding the OoD Objects in Road Scenes: Huachao Zhu,

Zelong Liu,

Zhichao Sun,

Yuda Zou,

Gui-Song Xia,

Yongchao Xu; [pdf]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Huachao and Liu, Zelong and Sun, Zhichao and Zou, Yuda and Xia, Gui-Song and Xu, Yongchao}, title = {Beyond Pixel Uncertainty: Bounding the OoD Objects in Road Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8472-8481} }
Gain-MLP: Improving HDR Gain Map Encoding via a Lightweight MLP: Trevor D. Canham,

SaiKiran Tedla,

Michael J. Murdoch,

Michael S. Brown; [pdf] [supp]
[bibtex]
@InProceedings{Canham_2025_ICCV, author = {Canham, Trevor D. and Tedla, SaiKiran and Murdoch, Michael J. and Brown, Michael S.}, title = {Gain-MLP: Improving HDR Gain Map Encoding via a Lightweight MLP}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18619-18628} }
MaterialMVP: Illumination-Invariant Material Generation via Multi-view PBR Diffusion: Zebin He,

Mingxin Yang,

Shuhui Yang,

Yixuan Tang,

Tao Wang,

Kaihao Zhang,

Guanying Chen,

Yuhong Liu,

Jie Jiang,

Chunchao Guo,

Wenhan Luo; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Zebin and Yang, Mingxin and Yang, Shuhui and Tang, Yixuan and Wang, Tao and Zhang, Kaihao and Chen, Guanying and Liu, Yuhong and Jiang, Jie and Guo, Chunchao and Luo, Wenhan}, title = {MaterialMVP: Illumination-Invariant Material Generation via Multi-view PBR Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26294-26305} }
Learning to See in the Extremely Dark: Hai Jiang,

Binhao Guan,

Zhen Liu,

Xiaohong Liu,

Jian Yu,

Zheng Liu,

Songchen Han,

Shuaicheng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Hai and Guan, Binhao and Liu, Zhen and Liu, Xiaohong and Yu, Jian and Liu, Zheng and Han, Songchen and Liu, Shuaicheng}, title = {Learning to See in the Extremely Dark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7676-7685} }
StyleMotif: Multi-Modal Motion Stylization using Style-Content Cross Fusion: Ziyu Guo,

Young Yoon Lee,

Joseph Liu,

Yizhak Ben-Shabat,

Victor Zordan,

Mubbasir Kapadia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Ziyu and Lee, Young Yoon and Liu, Joseph and Ben-Shabat, Yizhak and Zordan, Victor and Kapadia, Mubbasir}, title = {StyleMotif: Multi-Modal Motion Stylization using Style-Content Cross Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13349-13359} }
SpecGuard: Spectral Projection-based Advanced Invisible Watermarking: Inzamamul Alam,

Md Tanvir Islam,

Simon S. Woo,

Khan Muhammad; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alam_2025_ICCV, author = {Alam, Inzamamul and Islam, Md Tanvir and Woo, Simon S. and Muhammad, Khan}, title = {SpecGuard: Spectral Projection-based Advanced Invisible Watermarking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17984-17993} }
Scaling Laws for Native Multimodal Models: Mustafa Shukor,

Enrico Fini,

Victor Guilherme Turrisi da Costa,

Matthieu Cord,

Joshua Susskind,

Alaaeldin El-Nouby; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shukor_2025_ICCV, author = {Shukor, Mustafa and Fini, Enrico and da Costa, Victor Guilherme Turrisi and Cord, Matthieu and Susskind, Joshua and El-Nouby, Alaaeldin}, title = {Scaling Laws for Native Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12-23} }
Bridging Diffusion Models and 3D Representations: A 3D Consistent Super-Resolution Framework: Yi-Ting Chen,

Ting-Hsuan Liao,

Pengsheng Guo,

Alexander Schwing,

Jia-Bin Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yi-Ting and Liao, Ting-Hsuan and Guo, Pengsheng and Schwing, Alexander and Huang, Jia-Bin}, title = {Bridging Diffusion Models and 3D Representations: A 3D Consistent Super-Resolution Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13481-13490} }
MagicID: Hybrid Preference Optimization for ID-Consistent and Dynamic-Preserved Video Customization: Hengjia Li,

Lifan Jiang,

Xi Xiao,

Tianyang Wang,

Hongwei Yi,

Boxi Wu,

Deng Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Hengjia and Jiang, Lifan and Xiao, Xi and Wang, Tianyang and Yi, Hongwei and Wu, Boxi and Cai, Deng}, title = {MagicID: Hybrid Preference Optimization for ID-Consistent and Dynamic-Preserved Video Customization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12737-12746} }
LookOut: Real-World Humanoid Egocentric Navigation: Boxiao Pan,

Adam W. Harley,

Francis Engelmann,

C. Karen Liu,

Leonidas J. Guibas; [pdf]
[bibtex]
@InProceedings{Pan_2025_ICCV, author = {Pan, Boxiao and Harley, Adam W. and Engelmann, Francis and Liu, C. Karen and Guibas, Leonidas J.}, title = {LookOut: Real-World Humanoid Egocentric Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24977-24988} }
Lightweight Gradient-Aware Upscaling of 3D Gaussian Splatting Images: Simon Niedermayr,

Christoph Neuhauser,

Rüdiger Westermann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Niedermayr_2025_ICCV, author = {Niedermayr, Simon and Neuhauser, Christoph and Westermann, R\"udiger}, title = {Lightweight Gradient-Aware Upscaling of 3D Gaussian Splatting Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25862-25871} }
Motion-2-to-3: Leveraging 2D Motion Data for 3D Motion Generations: Ruoxi Guo,

Huaijin Pi,

Zehong Shen,

Qing Shuai,

Zechen Hu,

Zhumei Wang,

Yajiao Dong,

Ruizhen Hu,

Taku Komura,

Sida Peng,

Xiaowei Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Ruoxi and Pi, Huaijin and Shen, Zehong and Shuai, Qing and Hu, Zechen and Wang, Zhumei and Dong, Yajiao and Hu, Ruizhen and Komura, Taku and Peng, Sida and Zhou, Xiaowei}, title = {Motion-2-to-3: Leveraging 2D Motion Data for 3D Motion Generations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14305-14316} }
Balancing Conservatism and Aggressiveness: Prototype-Affinity Hybrid Network for Few-Shot Segmentation: Tianyu Zou,

Shengwu Xiong,

Ruilin Yao,

Yi Rong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zou_2025_ICCV, author = {Zou, Tianyu and Xiong, Shengwu and Yao, Ruilin and Rong, Yi}, title = {Balancing Conservatism and Aggressiveness: Prototype-Affinity Hybrid Network for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20561-20571} }
Large Scene Generation with Cube-Absorb Discrete Diffusion: Qianjiang Hu,

Wei Hu; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Qianjiang and Hu, Wei}, title = {Large Scene Generation with Cube-Absorb Discrete Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25186-25196} }
Advancing Text-to-3D Generation with Linearized Lookahead Variational Score Distillation: Yu Lei,

Bingde Liu,

Qingsong Xie,

Haonan Lu,

Zhijie Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2025_ICCV, author = {Lei, Yu and Liu, Bingde and Xie, Qingsong and Lu, Haonan and Deng, Zhijie}, title = {Advancing Text-to-3D Generation with Linearized Lookahead Variational Score Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19567-19576} }
MS3D: High-Quality 3D Generation via Multi-Scale Representation Modeling: Guan Luo,

Jianfeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Guan and Zhang, Jianfeng}, title = {MS3D: High-Quality 3D Generation via Multi-Scale Representation Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26336-26348} }
Memory-Efficient 4-bit Preconditioned Stochastic Optimization: Jingyang Li,

Kuangyu Ding,

Kim-Chuan Toh,

Pan Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jingyang and Ding, Kuangyu and Toh, Kim-Chuan and Zhou, Pan}, title = {Memory-Efficient 4-bit Preconditioned Stochastic Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22633-22643} }
On the Recovery of Cameras from Fundamental Matrices: Rakshith Madhavan,

Federica Arrigoni; [pdf] [supp]
[bibtex]
@InProceedings{Madhavan_2025_ICCV, author = {Madhavan, Rakshith and Arrigoni, Federica}, title = {On the Recovery of Cameras from Fundamental Matrices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20934-20943} }
Not All Frame Features Are Equal: Video-to-4D Generation via Decoupling Dynamic-Static Features: Liying Yang,

Chen Liu,

Zhenwei Zhu,

Ajian Liu,

Hui Ma,

Jian Nong,

Yanyan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Liying and Liu, Chen and Zhu, Zhenwei and Liu, Ajian and Ma, Hui and Nong, Jian and Liang, Yanyan}, title = {Not All Frame Features Are Equal: Video-to-4D Generation via Decoupling Dynamic-Static Features}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7494-7504} }
FedDifRC: Unlocking the Potential of Text-to-Image Diffusion Models in Heterogeneous Federated Learning: Huan Wang,

Haoran Li,

Huaming Chen,

Jun Yan,

Jiahua Shi,

Jun Shen; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Huan and Li, Haoran and Chen, Huaming and Yan, Jun and Shi, Jiahua and Shen, Jun}, title = {FedDifRC: Unlocking the Potential of Text-to-Image Diffusion Models in Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3726-3736} }
Vivid4D: Improving 4D Reconstruction from Monocular Video by Video Inpainting: Jiaxin Huang,

Sheng Miao,

Bangbang Yang,

Yuewen Ma,

Yiyi Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Jiaxin and Miao, Sheng and Yang, Bangbang and Ma, Yuewen and Liao, Yiyi}, title = {Vivid4D: Improving 4D Reconstruction from Monocular Video by Video Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12592-12604} }
Feather the Throttle: Revisiting Visual Token Pruning for Vision-Language Model Acceleration: Mark Endo,

Xiaohan Wang,

Serena Yeung-Levy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Endo_2025_ICCV, author = {Endo, Mark and Wang, Xiaohan and Yeung-Levy, Serena}, title = {Feather the Throttle: Revisiting Visual Token Pruning for Vision-Language Model Acceleration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22826-22835} }
RePoseD: Efficient Relative Pose Estimation With Known Depth Information: Yaqing Ding,

Viktor Kocur,

Václav Vávra,

Zuzana Berger Haladová,

Jian Yang,

Torsten Sattler,

Zuzana Kukelova; [pdf] [supp]
[bibtex]
@InProceedings{Ding_2025_ICCV, author = {Ding, Yaqing and Kocur, Viktor and V\'avra, V\'aclav and Haladov\'a, Zuzana Berger and Yang, Jian and Sattler, Torsten and Kukelova, Zuzana}, title = {RePoseD: Efficient Relative Pose Estimation With Known Depth Information}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14876-14886} }
MEGA: Memory-Efficient 4D Gaussian Splatting for Dynamic Scenes: Xinjie Zhang,

Zhening Liu,

Yifan Zhang,

Xingtong Ge,

Dailan He,

Tongda Xu,

Yan Wang,

Zehong Lin,

Shuicheng Yan,

Jun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xinjie and Liu, Zhening and Zhang, Yifan and Ge, Xingtong and He, Dailan and Xu, Tongda and Wang, Yan and Lin, Zehong and Yan, Shuicheng and Zhang, Jun}, title = {MEGA: Memory-Efficient 4D Gaussian Splatting for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27828-27838} }
Beyond Label Semantics: Language-Guided Action Anatomy for Few-shot Action Recognition: Zefeng Qian,

Xincheng Yao,

Yifei Huang,

Chongyang Zhang,

Jiangyong Ying,

Hong Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qian_2025_ICCV, author = {Qian, Zefeng and Yao, Xincheng and Huang, Yifei and Zhang, Chongyang and Ying, Jiangyong and Sun, Hong}, title = {Beyond Label Semantics: Language-Guided Action Anatomy for Few-shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10421-10431} }
VoluMe - Authentic 3D Video Calls from Live Gaussian Splat Prediction: Martin de La Gorce,

Charlie Hewitt,

Tibor Takács,

Robert Gerdisch,

Zafiirah Hosenie,

Givi Meishvili,

Marek Kowalski,

Thomas J. Cashman,

Antonio Criminisi; [pdf] [supp]
[bibtex]
@InProceedings{de_La_Gorce_2025_ICCV, author = {de La Gorce, Martin and Hewitt, Charlie and Tak\'acs, Tibor and Gerdisch, Robert and Hosenie, Zafiirah and Meishvili, Givi and Kowalski, Marek and Cashman, Thomas J. and Criminisi, Antonio}, title = {VoluMe - Authentic 3D Video Calls from Live Gaussian Splat Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13783-13792} }
Bridging 3D Anomaly Localization and Repair via High-Quality Continuous Geometric Representation: Bozhong Zheng,

Jinye Gan,

Xiaohao Xu,

Xintao Chen,

Wenqiao Li,

Xiaonan Huang,

Na Ni,

Yingna Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Bozhong and Gan, Jinye and Xu, Xiaohao and Chen, Xintao and Li, Wenqiao and Huang, Xiaonan and Ni, Na and Wu, Yingna}, title = {Bridging 3D Anomaly Localization and Repair via High-Quality Continuous Geometric Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27063-27072} }
MonoMobility: Zero-Shot 3D Mobility Analysis from Monocular Videos: Hongyi Zhou,

Yulan Guo,

Xiaogang Wang,

Kai Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Hongyi and Guo, Yulan and Wang, Xiaogang and Xu, Kai}, title = {MonoMobility: Zero-Shot 3D Mobility Analysis from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8800-8809} }
Prior-aware Dynamic Temporal Modeling Framework for Sequential 3D Hand Pose Estimation: Pengfei Ren,

Jingyu Wang,

Haifeng Sun,

Qi Qi,

Xingyu Liu,

Menghao Zhang,

Lei Zhang,

Jing Wang,

Jianxin Liao; [pdf] [supp]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Pengfei and Wang, Jingyu and Sun, Haifeng and Qi, Qi and Liu, Xingyu and Zhang, Menghao and Zhang, Lei and Wang, Jing and Liao, Jianxin}, title = {Prior-aware Dynamic Temporal Modeling Framework for Sequential 3D Hand Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6476-6487} }
Generate, Refine, and Encode: Leveraging Synthesized Novel Samples for On-the-Fly Fine-Grained Category Discovery: Xiao Liu,

Nan Pu,

Haiyang Zheng,

Wenjing Li,

Nicu Sebe,

Zhun Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Xiao and Pu, Nan and Zheng, Haiyang and Li, Wenjing and Sebe, Nicu and Zhong, Zhun}, title = {Generate, Refine, and Encode: Leveraging Synthesized Novel Samples for On-the-Fly Fine-Grained Category Discovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1078-1087} }
Visual-RFT: Visual Reinforcement Fine-Tuning: Ziyu Liu,

Zeyi Sun,

Yuhang Zang,

Xiaoyi Dong,

Yuhang Cao,

Haodong Duan,

Dahua Lin,

Jiaqi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Ziyu and Sun, Zeyi and Zang, Yuhang and Dong, Xiaoyi and Cao, Yuhang and Duan, Haodong and Lin, Dahua and Wang, Jiaqi}, title = {Visual-RFT: Visual Reinforcement Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2034-2044} }
Foresight in Motion: Reinforcing Trajectory Prediction with Reward Heuristics: Muleilan Pei,

Shaoshuai Shi,

Xuesong Chen,

Xu Liu,

Shaojie Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pei_2025_ICCV, author = {Pei, Muleilan and Shi, Shaoshuai and Chen, Xuesong and Liu, Xu and Shen, Shaojie}, title = {Foresight in Motion: Reinforcing Trajectory Prediction with Reward Heuristics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28303-28312} }
Spectral Sensitivity Estimation with an Uncalibrated Diffraction Grating: Lilika Makabe,

Hiroaki Santo,

Fumio Okura,

Michael S. Brown,

Yasuyuki Matsushita; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Makabe_2025_ICCV, author = {Makabe, Lilika and Santo, Hiroaki and Okura, Fumio and Brown, Michael S. and Matsushita, Yasuyuki}, title = {Spectral Sensitivity Estimation with an Uncalibrated Diffraction Grating}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27252-27261} }
DISTIL: Data-Free Inversion of Suspicious Trojan Inputs via Latent Diffusion: Hossein Mirzaei,

Zeinab Taghavi,

Sepehr Rezaee,

Masoud Hadi,

Moein Madadi,

Mackenzie W. Mathis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mirzaei_2025_ICCV, author = {Mirzaei, Hossein and Taghavi, Zeinab and Rezaee, Sepehr and Hadi, Masoud and Madadi, Moein and Mathis, Mackenzie W.}, title = {DISTIL: Data-Free Inversion of Suspicious Trojan Inputs via Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3194-3205} }
Moment Quantization for Video Temporal Grounding: Xiaolong Sun,

Le Wang,

Sanping Zhou,

Liushuai Shi,

Kun Xia,

Mengnan Liu,

Yabing Wang,

Gang Hua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Xiaolong and Wang, Le and Zhou, Sanping and Shi, Liushuai and Xia, Kun and Liu, Mengnan and Wang, Yabing and Hua, Gang}, title = {Moment Quantization for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20137-20146} }
ADIEE: Automatic Dataset Creation and Scorer for Instruction-Guided Image Editing Evaluation: Sherry X. Chen,

Yi Wei,

Luowei Zhou,

Suren Kumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Sherry X. and Wei, Yi and Zhou, Luowei and Kumar, Suren}, title = {ADIEE: Automatic Dataset Creation and Scorer for Instruction-Guided Image Editing Evaluation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18345-18356} }
Dynamic Reconstruction of Hand-Object Interaction with Distributed Force-aware Contact Representation: Zhenjun Yu,

Wenqiang Xu,

Pengfei Xie,

Yutong Li,

Brian W. Anthony,

Zhuorui Zhang,

Cewu Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Zhenjun and Xu, Wenqiang and Xie, Pengfei and Li, Yutong and Anthony, Brian W. and Zhang, Zhuorui and Lu, Cewu}, title = {Dynamic Reconstruction of Hand-Object Interaction with Distributed Force-aware Contact Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8590-8599} }
CSD-VAR: Content-Style Decomposition in Visual Autoregressive Models: Quang-Binh Nguyen,

Minh Luu,

Quang Nguyen,

Anh Tran,

Khoi Nguyen; [pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Quang-Binh and Luu, Minh and Nguyen, Quang and Tran, Anh and Nguyen, Khoi}, title = {CSD-VAR: Content-Style Decomposition in Visual Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17013-17023} }
MSQ: Memory-Efficient Bit Sparsification Quantization: Seokho Han,

Seoyeon Yoon,

Jinhee Kim,

Dongwei Wang,

Kang Eun Jeon,

Huanrui Yang,

Jong Hwan Ko; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Seokho and Yoon, Seoyeon and Kim, Jinhee and Wang, Dongwei and Jeon, Kang Eun and Yang, Huanrui and Ko, Jong Hwan}, title = {MSQ: Memory-Efficient Bit Sparsification Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21885-21894} }
Disrupting Model Merging: A Parameter-Level Defense Without Sacrificing Accuracy: Wei Junhao,

Yu Zhe,

Jun Sakuma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Junhao_2025_ICCV, author = {Junhao, Wei and Zhe, Yu and Sakuma, Jun}, title = {Disrupting Model Merging: A Parameter-Level Defense Without Sacrificing Accuracy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17698-17707} }
VSSD: Vision Mamba with Non-Causal State Space Duality: Yuheng Shi,

Mingjia Li,

Minjing Dong,

Chang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Yuheng and Li, Mingjia and Dong, Minjing and Xu, Chang}, title = {VSSD: Vision Mamba with Non-Causal State Space Duality}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10819-10829} }
Epipolar Consistent Attention Aggregation Network for Unsupervised Light Field Disparity Estimation: Chen Gao,

Shuo Zhang,

Youfang Lin; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Chen and Zhang, Shuo and Lin, Youfang}, title = {Epipolar Consistent Attention Aggregation Network for Unsupervised Light Field Disparity Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6488-6497} }
LLaVA-KD: A Framework of Distilling Multimodal Large Language Models: Yuxuan Cai,

Jiangning Zhang,

Haoyang He,

Xinwei He,

Ao Tong,

Zhenye Gan,

Chengjie Wang,

Zhucun Xue,

Yong Liu,

Xiang Bai; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2025_ICCV, author = {Cai, Yuxuan and Zhang, Jiangning and He, Haoyang and He, Xinwei and Tong, Ao and Gan, Zhenye and Wang, Chengjie and Xue, Zhucun and Liu, Yong and Bai, Xiang}, title = {LLaVA-KD: A Framework of Distilling Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {239-249} }
CLIP-GS: Unifying Vision-Language Representation with 3D Gaussian Splatting: Siyu Jiao,

Haoye Dong,

Yuyang Yin,

Zequn Jie,

Yinlong Qian,

Yao Zhao,

Humphrey Shi,

Yunchao Wei; [pdf] [supp]
[bibtex]
@InProceedings{Jiao_2025_ICCV, author = {Jiao, Siyu and Dong, Haoye and Yin, Yuyang and Jie, Zequn and Qian, Yinlong and Zhao, Yao and Shi, Humphrey and Wei, Yunchao}, title = {CLIP-GS: Unifying Vision-Language Representation with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4670-4680} }
Region-Level Data Attribution for Text-to-Image Generative Models: Trong Bang Nguyen,

Phi Le Nguyen,

Simon Lucey,

Minh Hoai; [pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Trong Bang and Le Nguyen, Phi and Lucey, Simon and Hoai, Minh}, title = {Region-Level Data Attribution for Text-to-Image Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18825-18833} }
TAPNext: Tracking Any Point (TAP) as Next Token Prediction: Artem Zholus,

Carl Doersch,

Yi Yang,

Skanda Koppula,

Viorica Patraucean,

Xu Owen He,

Ignacio Rocco,

Mehdi S. M. Sajjadi,

Sarath Chandar,

Ross Goroshin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zholus_2025_ICCV, author = {Zholus, Artem and Doersch, Carl and Yang, Yi and Koppula, Skanda and Patraucean, Viorica and He, Xu Owen and Rocco, Ignacio and Sajjadi, Mehdi S. M. and Chandar, Sarath and Goroshin, Ross}, title = {TAPNext: Tracking Any Point (TAP) as Next Token Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9693-9703} }
Learnable Feature Patches and Vectors for Boosting Low-light Image Enhancement without External Knowledge: Xiaogang Xu,

Jiafei Wu,

Qingsen Yan,

Jiequan Cui,

Richang Hong,

Bei Yu; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Xiaogang and Wu, Jiafei and Yan, Qingsen and Cui, Jiequan and Hong, Richang and Yu, Bei}, title = {Learnable Feature Patches and Vectors for Boosting Low-light Image Enhancement without External Knowledge}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7761-7770} }
Cycle Consistency as Reward: Learning Image-Text Alignment without Human Preferences: Hyojin Bahng,

Caroline Chan,

Fredo Durand,

Phillip Isola; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bahng_2025_ICCV, author = {Bahng, Hyojin and Chan, Caroline and Durand, Fredo and Isola, Phillip}, title = {Cycle Consistency as Reward: Learning Image-Text Alignment without Human Preferences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22934-22946} }
Vid-Group: Temporal Video Grounding Pretraining from Unlabeled Videos in the Wild: Peijun Bao,

Chenqi Kong,

Siyuan Yang,

Zihao Shao,

Xinghao Jiang,

Boon Poh Ng,

Meng Hwa Er,

Alex Kot; [pdf] [supp]
[bibtex]
@InProceedings{Bao_2025_ICCV, author = {Bao, Peijun and Kong, Chenqi and Yang, Siyuan and Shao, Zihao and Jiang, Xinghao and Ng, Boon Poh and Er, Meng Hwa and Kot, Alex}, title = {Vid-Group: Temporal Video Grounding Pretraining from Unlabeled Videos in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20541-20550} }
BANet: Bilateral Aggregation Network for Mobile Stereo Matching: Gangwei Xu,

Jiaxin Liu,

Xianqi Wang,

Junda Cheng,

Yong Deng,

Jinliang Zang,

Yurui Chen,

Xin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Gangwei and Liu, Jiaxin and Wang, Xianqi and Cheng, Junda and Deng, Yong and Zang, Jinliang and Chen, Yurui and Yang, Xin}, title = {BANet: Bilateral Aggregation Network for Mobile Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28870-28880} }
Diffuman4D: 4D Consistent Human View Synthesis from Sparse-View Videos with Spatio-Temporal Diffusion Models: Yudong Jin,

Sida Peng,

Xuan Wang,

Tao Xie,

Zhen Xu,

Yifan Yang,

Yujun Shen,

Hujun Bao,

Xiaowei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Yudong and Peng, Sida and Wang, Xuan and Xie, Tao and Xu, Zhen and Yang, Yifan and Shen, Yujun and Bao, Hujun and Zhou, Xiaowei}, title = {Diffuman4D: 4D Consistent Human View Synthesis from Sparse-View Videos with Spatio-Temporal Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11047-11057} }
AnnofreeOD: Detecting All Classes at Low Frame Rates Without Human Annotations: Boyi Sun,

Yuhang Liu,

Houxin He,

Yonglin Tian,

Fei-Yue Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Boyi and Liu, Yuhang and He, Houxin and Tian, Yonglin and Wang, Fei-Yue}, title = {AnnofreeOD: Detecting All Classes at Low Frame Rates Without Human Annotations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5315-5325} }
AIM: Adaptive Inference of Multi-Modal LLMs via Token Merging and Pruning: Yiwu Zhong,

Zhuoming Liu,

Yin Li,

Liwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Yiwu and Liu, Zhuoming and Li, Yin and Wang, Liwei}, title = {AIM: Adaptive Inference of Multi-Modal LLMs via Token Merging and Pruning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20180-20192} }
PanSt3R: Multi-view Consistent Panoptic Segmentation: Lojze Zust,

Yohann Cabon,

Juliette Marrie,

Leonid Antsfeld,

Boris Chidlovskii,

Jerome Revaud,

Gabriela Csurka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zust_2025_ICCV, author = {Zust, Lojze and Cabon, Yohann and Marrie, Juliette and Antsfeld, Leonid and Chidlovskii, Boris and Revaud, Jerome and Csurka, Gabriela}, title = {PanSt3R: Multi-view Consistent Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5856-5866} }
G2SF: Geometry-Guided Score Fusion for Multimodal Industrial Anomaly Detection: Chengyu Tao,

Xuanming Cao,

Juan Du; [pdf] [supp]
[bibtex]
@InProceedings{Tao_2025_ICCV, author = {Tao, Chengyu and Cao, Xuanming and Du, Juan}, title = {G2SF: Geometry-Guided Score Fusion for Multimodal Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20551-20560} }
SA-Occ: Satellite-Assisted 3D Occupancy Prediction in Real World: Chen Chen,

Zhirui Wang,

Taowei Sheng,

Yi Jiang,

Yundu Li,

Peirui Cheng,

Luning Zhang,

Kaiqiang Chen,

Yanfeng Hu,

Xue Yang,

Xian Sun; [pdf]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Chen and Wang, Zhirui and Sheng, Taowei and Jiang, Yi and Li, Yundu and Cheng, Peirui and Zhang, Luning and Chen, Kaiqiang and Hu, Yanfeng and Yang, Xue and Sun, Xian}, title = {SA-Occ: Satellite-Assisted 3D Occupancy Prediction in Real World}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27021-27030} }
Generic Event Boundary Detection via Denoising Diffusion: Jaejun Hwang,

Dayoung Gong,

Manjin Kim,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hwang_2025_ICCV, author = {Hwang, Jaejun and Gong, Dayoung and Kim, Manjin and Cho, Minsu}, title = {Generic Event Boundary Detection via Denoising Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14084-14094} }
ImageGem: In-the-wild Generative Image Interaction Dataset for Generative Model Personalization: Yuanhe Guo,

Linxi Xie,

Zhuoran Chen,

Kangrui Yu,

Ryan Po,

Guandao Yang,

Gordon Wetzstein,

Hongyi Wen; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Yuanhe and Xie, Linxi and Chen, Zhuoran and Yu, Kangrui and Po, Ryan and Yang, Guandao and Wetzstein, Gordon and Wen, Hongyi}, title = {ImageGem: In-the-wild Generative Image Interaction Dataset for Generative Model Personalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19577-19586} }
Implicit Counterfactual Learning for Audio-Visual Segmentation: Mingfeng Zha,

Tianyu Li,

Guoqing Wang,

Peng Wang,

Yangyang Wu,

Yang Yang,

Heng Tao Shen; [pdf] [arXiv]
[bibtex]
@InProceedings{Zha_2025_ICCV, author = {Zha, Mingfeng and Li, Tianyu and Wang, Guoqing and Wang, Peng and Wu, Yangyang and Yang, Yang and Shen, Heng Tao}, title = {Implicit Counterfactual Learning for Audio-Visual Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22349-22360} }
Context Guided Transformer Entropy Modeling for Video Compression: Junlong Tong,

Wei Zhang,

Yaohui Jin,

Xiaoyu Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tong_2025_ICCV, author = {Tong, Junlong and Zhang, Wei and Jin, Yaohui and Shen, Xiaoyu}, title = {Context Guided Transformer Entropy Modeling for Video Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18885-18894} }
FlowEdit: Inversion-Free Text-Based Editing Using Pre-Trained Flow Models: Vladimir Kulikov,

Matan Kleiner,

Inbar Huberman-Spiegelglas,

Tomer Michaeli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kulikov_2025_ICCV, author = {Kulikov, Vladimir and Kleiner, Matan and Huberman-Spiegelglas, Inbar and Michaeli, Tomer}, title = {FlowEdit: Inversion-Free Text-Based Editing Using Pre-Trained Flow Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19721-19730} }
CMT: A Cascade MAR with Topology Predictor for Multimodal Conditional CAD Generation: Jianyu Wu,

Yizhou Wang,

Xiangyu Yue,

Xinzhu Ma,

Jinyang Guo,

Dongzhan Zhou,

Wanli Ouyang,

Shixiang Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Jianyu and Wang, Yizhou and Yue, Xiangyu and Ma, Xinzhu and Guo, Jinyang and Zhou, Dongzhan and Ouyang, Wanli and Tang, Shixiang}, title = {CMT: A Cascade MAR with Topology Predictor for Multimodal Conditional CAD Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7014-7024} }
CLIPSym: Delving into Symmetry Detection with CLIP: Tinghan Yang,

Md Ashiqur Rahman,

Raymond A. Yeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Tinghan and Rahman, Md Ashiqur and Yeh, Raymond A.}, title = {CLIPSym: Delving into Symmetry Detection with CLIP}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21003-21013} }
MRGen: Segmentation Data Engine For Underrepresented MRI Modalities: Haoning Wu,

Ziheng Zhao,

Ya Zhang,

Yanfeng Wang,

Weidi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Haoning and Zhao, Ziheng and Zhang, Ya and Wang, Yanfeng and Xie, Weidi}, title = {MRGen: Segmentation Data Engine For Underrepresented MRI Modalities}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19903-19913} }
PrimHOI: Compositional Human-Object Interaction via Reusable Primitives: Kai Jia,

Tengyu Liu,

Mingtao Pei,

Yixin Zhu,

Siyuan Huang; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2025_ICCV, author = {Jia, Kai and Liu, Tengyu and Pei, Mingtao and Zhu, Yixin and Huang, Siyuan}, title = {PrimHOI: Compositional Human-Object Interaction via Reusable Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11491-11501} }
Nautilus: Locality-aware Autoencoder for Scalable Mesh Generation: Yuxuan Wang,

Xuanyu Yi,

Haohan Weng,

Qingshan Xu,

Xiaokang Wei,

Xianghui Yang,

Chunchao Guo,

Long Chen,

Hanwang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuxuan and Yi, Xuanyu and Weng, Haohan and Xu, Qingshan and Wei, Xiaokang and Yang, Xianghui and Guo, Chunchao and Chen, Long and Zhang, Hanwang}, title = {Nautilus: Locality-aware Autoencoder for Scalable Mesh Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10961-10970} }
What Changed and What Could Have Changed? State-Change Counterfactuals for Procedure-Aware Video Representation Learning: Chi-Hsi Kung,

Frangil Ramirez,

Juhyung Ha,

Yi-Ting Chen,

David Crandall,

Yi-Hsuan Tsai; [pdf] [supp]
[bibtex]
@InProceedings{Kung_2025_ICCV, author = {Kung, Chi-Hsi and Ramirez, Frangil and Ha, Juhyung and Chen, Yi-Ting and Crandall, David and Tsai, Yi-Hsuan}, title = {What Changed and What Could Have Changed? State-Change Counterfactuals for Procedure-Aware Video Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12294-12306} }
Accelerate 3D Object Detection Models via Zero-Shot Attention Key Pruning: Lizhen Xu,

Xiuxiu Bai,

Xiaojun Jia,

Jianwu Fang,

Shanmin Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Lizhen and Bai, Xiuxiu and Jia, Xiaojun and Fang, Jianwu and Pang, Shanmin}, title = {Accelerate 3D Object Detection Models via Zero-Shot Attention Key Pruning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23085-23094} }
Adaptive Learning of High-Value Regions for Semi-Supervised Medical Image Segmentation: Tao Lei,

Ziyao Yang,

Xingwu Wang,

Yi Wang,

Xuan Wang,

Feiman Sun,

Asoke K. Nandi; [pdf] [supp]
[bibtex]
@InProceedings{Lei_2025_ICCV, author = {Lei, Tao and Yang, Ziyao and Wang, Xingwu and Wang, Yi and Wang, Xuan and Sun, Feiman and Nandi, Asoke K.}, title = {Adaptive Learning of High-Value Regions for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21450-21459} }
X-Dancer: Expressive Music to Human Dance Video Generation: Zeyuan Chen,

Hongyi Xu,

Guoxian Song,

You Xie,

Chenxu Zhang,

Xin Chen,

Chao Wang,

Di Chang,

Linjie Luo; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Zeyuan and Xu, Hongyi and Song, Guoxian and Xie, You and Zhang, Chenxu and Chen, Xin and Wang, Chao and Chang, Di and Luo, Linjie}, title = {X-Dancer: Expressive Music to Human Dance Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10602-10611} }
IDF: Iterative Dynamic Filtering Networks for Generalizable Image Denoising: Dongjin Kim,

Jaekyun Ko,

Muhammad Kashif Ali,

Tae Hyun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Dongjin and Ko, Jaekyun and Ali, Muhammad Kashif and Kim, Tae Hyun}, title = {IDF: Iterative Dynamic Filtering Networks for Generalizable Image Denoising}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12180-12190} }
LayerLock: Non-collapsing Representation Learning with Progressive Freezing: Goker Erdogan,

Nikhil Parthasarathy,

Catalin Ionescu,

Drew A. Hudson,

Alexander Lerchner,

Andrew Zisserman,

Mehdi S. M. Sajjadi,

Joao Carreira; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Erdogan_2025_ICCV, author = {Erdogan, Goker and Parthasarathy, Nikhil and Ionescu, Catalin and Hudson, Drew A. and Lerchner, Alexander and Zisserman, Andrew and Sajjadi, Mehdi S. M. and Carreira, Joao}, title = {LayerLock: Non-collapsing Representation Learning with Progressive Freezing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19461-19470} }
Augmenting Moment Retrieval: Zero-Dependency Two-Stage Learning: Zhengxuan Wei,

Jiajin Tang,

Sibei Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Zhengxuan and Tang, Jiajin and Yang, Sibei}, title = {Augmenting Moment Retrieval: Zero-Dependency Two-Stage Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3401-3412} }
Derm1M: A Million-scale Vision-Language Dataset Aligned with Clinical Ontology Knowledge for Dermatology: Siyuan Yan,

Ming Hu,

Yiwen Jiang,

Xieji Li,

Hao Fei,

Philipp Tschandl,

Harald Kittler,

Zongyuan Ge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Siyuan and Hu, Ming and Jiang, Yiwen and Li, Xieji and Fei, Hao and Tschandl, Philipp and Kittler, Harald and Ge, Zongyuan}, title = {Derm1M: A Million-scale Vision-Language Dataset Aligned with Clinical Ontology Knowledge for Dermatology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12681-12690} }
Expressive Talking Human from Single-Image with Imperfect Priors: Jun Xiang,

Yudong Guo,

Leipeng Hu,

Boyang Guo,

Yancheng Yuan,

Juyong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xiang_2025_ICCV, author = {Xiang, Jun and Guo, Yudong and Hu, Leipeng and Guo, Boyang and Yuan, Yancheng and Zhang, Juyong}, title = {Expressive Talking Human from Single-Image with Imperfect Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10398-10409} }
DADM: Dual Alignment of Domain and Modality for Face Anti-spoofing: Jingyi Yang,

Xun Lin,

Zitong Yu,

Liepiao Zhang,

Xin Liu,

Hui Li,

Xiaochen Yuan,

Xiaochun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Jingyi and Lin, Xun and Yu, Zitong and Zhang, Liepiao and Liu, Xin and Li, Hui and Yuan, Xiaochen and Cao, Xiaochun}, title = {DADM: Dual Alignment of Domain and Modality for Face Anti-spoofing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12045-12056} }
IntroStyle: Training-Free Introspective Style Attribution using Diffusion Features: Anand Kumar,

Jiteng Mu,

Nuno Vasconcelos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumar_2025_ICCV, author = {Kumar, Anand and Mu, Jiteng and Vasconcelos, Nuno}, title = {IntroStyle: Training-Free Introspective Style Attribution using Diffusion Features}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14909-14918} }
Learning Deblurring Texture Prior from Unpaired Data with Diffusion Model: Chengxu Liu,

Lu Qi,

Jinshan Pan,

Xueming Qian,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Chengxu and Qi, Lu and Pan, Jinshan and Qian, Xueming and Yang, Ming-Hsuan}, title = {Learning Deblurring Texture Prior from Unpaired Data with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14195-14204} }
Oasis: One Image is All You Need for Multimodal Instruction Data Synthesis: Letian Zhang,

Quan Cui,

Bingchen Zhao,

Cheng Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Letian and Cui, Quan and Zhao, Bingchen and Yang, Cheng}, title = {Oasis: One Image is All You Need for Multimodal Instruction Data Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3542-3551} }
Learning Neural Scene Representation from iToF Imaging: Wenjie Chang,

Hanzhi Chang,

Yueyi Zhang,

Wenfei Yang,

Tianzhu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2025_ICCV, author = {Chang, Wenjie and Chang, Hanzhi and Zhang, Yueyi and Yang, Wenfei and Zhang, Tianzhu}, title = {Learning Neural Scene Representation from iToF Imaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27937-27946} }
GeometryCrafter: Consistent Geometry Estimation for Open-world Videos with Diffusion Priors: Tian-Xing Xu,

Xiangjun Gao,

Wenbo Hu,

Xiaoyu Li,

Song-Hai Zhang,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Tian-Xing and Gao, Xiangjun and Hu, Wenbo and Li, Xiaoyu and Zhang, Song-Hai and Shan, Ying}, title = {GeometryCrafter: Consistent Geometry Estimation for Open-world Videos with Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6632-6644} }
Multi-modal Multi-platform Person Re-Identification: Benchmark and Method: Ruiyang Ha,

Songyi Jiang,

Bin Li,

Bikang Pan,

Yihang Zhu,

Junjie Zhang,

Xiatian Zhu,

Shaogang Gong,

Jingya Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ha_2025_ICCV, author = {Ha, Ruiyang and Jiang, Songyi and Li, Bin and Pan, Bikang and Zhu, Yihang and Zhang, Junjie and Zhu, Xiatian and Gong, Shaogang and Wang, Jingya}, title = {Multi-modal Multi-platform Person Re-Identification: Benchmark and Method}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10251-10261} }
CAP: Evaluation of Persuasive and Creative Image Generation: Aysan Aghazadeh,

Adriana Kovashka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aghazadeh_2025_ICCV, author = {Aghazadeh, Aysan and Kovashka, Adriana}, title = {CAP: Evaluation of Persuasive and Creative Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16970-16980} }
SpatialTrackerV2: Advancing 3D Point Tracking with Explicit Camera Motion: Yuxi Xiao,

Jianyuan Wang,

Nan Xue,

Nikita Karaev,

Yuri Makarov,

Bingyi Kang,

Xing Zhu,

Hujun Bao,

Yujun Shen,

Xiaowei Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2025_ICCV, author = {Xiao, Yuxi and Wang, Jianyuan and Xue, Nan and Karaev, Nikita and Makarov, Yuri and Kang, Bingyi and Zhu, Xing and Bao, Hujun and Shen, Yujun and Zhou, Xiaowei}, title = {SpatialTrackerV2: Advancing 3D Point Tracking with Explicit Camera Motion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6726-6737} }
monoVLN: Bridging the Observation Gap between Monocular and Panoramic Vision and Language Navigation: Renjie Lu,

Yu Zhou,

Hao Cheng,

Jingke Meng,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Renjie and Zhou, Yu and Cheng, Hao and Meng, Jingke and Zheng, Wei-Shi}, title = {monoVLN: Bridging the Observation Gap between Monocular and Panoramic Vision and Language Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9477-9486} }
Know "No" Better: A Data-Driven Approach for Enhancing Negation Awareness in CLIP: Junsung Park,

Jungbeom Lee,

Jongyoon Song,

Sangwon Yu,

Dahuin Jung,

Sungroh Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Junsung and Lee, Jungbeom and Song, Jongyoon and Yu, Sangwon and Jung, Dahuin and Yoon, Sungroh}, title = {Know ''No'' Better: A Data-Driven Approach for Enhancing Negation Awareness in CLIP}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2825-2835} }
Neurons: Emulating the Human Visual Cortex Improves Fidelity and Interpretability in fMRI-to-Video Reconstruction: Haonan Wang,

Qixiang Zhang,

Lehan Wang,

Xuanqi Huang,

Xiaomeng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haonan and Zhang, Qixiang and Wang, Lehan and Huang, Xuanqi and Li, Xiaomeng}, title = {Neurons: Emulating the Human Visual Cortex Improves Fidelity and Interpretability in fMRI-to-Video Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18367-18376} }
LACONIC: A 3D Layout Adapter for Controllable Image Creation: Léopold Maillard,

Tom Durand,

Adrien Ramanana Rahary,

Maks Ovsjanikov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maillard_2025_ICCV, author = {Maillard, L\'eopold and Durand, Tom and Rahary, Adrien Ramanana and Ovsjanikov, Maks}, title = {LACONIC: A 3D Layout Adapter for Controllable Image Creation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18046-18057} }
InfiniDreamer: Arbitrarily Long Human Motion Generation via Segment Score Distillation: Wenjie Zhuo,

Fan Ma,

Hehe Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuo_2025_ICCV, author = {Zhuo, Wenjie and Ma, Fan and Fan, Hehe}, title = {InfiniDreamer: Arbitrarily Long Human Motion Generation via Segment Score Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14688-14698} }
Robust Machine Unlearning for Quantized Neural Networks via Adaptive Gradient Reweighting with Similar Labels: Yujia Tong,

Yuze Wang,

Jingling Yuan,

Chuang Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tong_2025_ICCV, author = {Tong, Yujia and Wang, Yuze and Yuan, Jingling and Hu, Chuang}, title = {Robust Machine Unlearning for Quantized Neural Networks via Adaptive Gradient Reweighting with Similar Labels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20603-20612} }
FairGen: Enhancing Fairness in Text-to-Image Diffusion Models via Self-Discovering Latent Directions: Yilei Jiang,

Wei-Hong Li,

Yiyuan Zhang,

Minghong Cai,

Xiangyu Yue; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Yilei and Li, Wei-Hong and Zhang, Yiyuan and Cai, Minghong and Yue, Xiangyu}, title = {FairGen: Enhancing Fairness in Text-to-Image Diffusion Models via Self-Discovering Latent Directions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18411-18420} }
Gaze-Language Alignment for Zero-Shot Prediction of Visual Search Targets from Human Gaze Scanpaths: Sounak Mondal,

Naveen Sendhilnathan,

Ting Zhang,

Yue Liu,

Michael Proulx,

Michael Louis Iuzzolino,

Chuan Qin,

Tanya R. Jonker; [pdf] [supp]
[bibtex]
@InProceedings{Mondal_2025_ICCV, author = {Mondal, Sounak and Sendhilnathan, Naveen and Zhang, Ting and Liu, Yue and Proulx, Michael and Iuzzolino, Michael Louis and Qin, Chuan and Jonker, Tanya R.}, title = {Gaze-Language Alignment for Zero-Shot Prediction of Visual Search Targets from Human Gaze Scanpaths}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2738-2749} }
ReCoT: Reflective Self-Correction Training for Mitigating Confirmation Bias in Large Vision-Language Models: Mengxue Qu,

Yibo Hu,

Kunyang Han,

Yunchao Wei,

Yao Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Qu_2025_ICCV, author = {Qu, Mengxue and Hu, Yibo and Han, Kunyang and Wei, Yunchao and Zhao, Yao}, title = {ReCoT: Reflective Self-Correction Training for Mitigating Confirmation Bias in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9147-9157} }
Bridging Continuous and Discrete Tokens for Autoregressive Visual Generation: Yuqing Wang,

Zhijie Lin,

Yao Teng,

Yuanzhi Zhu,

Shuhuai Ren,

Jiashi Feng,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuqing and Lin, Zhijie and Teng, Yao and Zhu, Yuanzhi and Ren, Shuhuai and Feng, Jiashi and Liu, Xihui}, title = {Bridging Continuous and Discrete Tokens for Autoregressive Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18596-18605} }
Trade-offs in Image Generation: How Do Different Dimensions Interact?: Sicheng Zhang,

Binzhu Xie,

Zhonghao Yan,

Yuli Zhang,

Donghao Zhou,

Xiaofei Chen,

Shi Qiu,

Jiaqi Liu,

Guoyang Xie,

Zhichao Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Sicheng and Xie, Binzhu and Yan, Zhonghao and Zhang, Yuli and Zhou, Donghao and Chen, Xiaofei and Qiu, Shi and Liu, Jiaqi and Xie, Guoyang and Lu, Zhichao}, title = {Trade-offs in Image Generation: How Do Different Dimensions Interact?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17256-17267} }
From Easy to Hard: The MIR Benchmark for Progressive Interleaved Multi-Image Reasoning: Hang Du,

Jiayang Zhang,

Guoshun Nan,

Wendi Deng,

Zhenyan Chen,

Chenyang Zhang,

Wang Xiao,

Shan Huang,

Yuqi Pan,

Tao Qi,

Sicong Leng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Hang and Zhang, Jiayang and Nan, Guoshun and Deng, Wendi and Chen, Zhenyan and Zhang, Chenyang and Xiao, Wang and Huang, Shan and Pan, Yuqi and Qi, Tao and Leng, Sicong}, title = {From Easy to Hard: The MIR Benchmark for Progressive Interleaved Multi-Image Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {859-869} }
Rethinking Layered Graphic Design Generation with a Top-Down Approach: Jingye Chen,

Zhaowen Wang,

Nanxuan Zhao,

Li Zhang,

Difan Liu,

Jimei Yang,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jingye and Wang, Zhaowen and Zhao, Nanxuan and Zhang, Li and Liu, Difan and Yang, Jimei and Chen, Qifeng}, title = {Rethinking Layered Graphic Design Generation with a Top-Down Approach}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16861-16870} }
SegAnyPET: Universal Promptable Segmentation from Positron Emission Tomography Images: Yichi Zhang,

Le Xue,

Wenbo Zhang,

Lanlan Li,

Yuchen Liu,

Chen Jiang,

Yuan Cheng,

Yuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yichi and Xue, Le and Zhang, Wenbo and Li, Lanlan and Liu, Yuchen and Jiang, Chen and Cheng, Yuan and Qi, Yuan}, title = {SegAnyPET: Universal Promptable Segmentation from Positron Emission Tomography Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21107-21116} }
StolenLoRA: Exploring LoRA Extraction Attacks via Synthetic Data: Yixu Wang,

Yan Teng,

Yingchun Wang,

Xingjun Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yixu and Teng, Yan and Wang, Yingchun and Ma, Xingjun}, title = {StolenLoRA: Exploring LoRA Extraction Attacks via Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {263-272} }
The Best of Both Worlds: Integrating Language Models and Diffusion Models for Video Generation: Aoxiong Yin,

Xu Tan,

Kai Shen,

Yichong Leng,

Xinyu Zhou,

Juncheng Li,

Siliang Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_ICCV, author = {Yin, Aoxiong and Tan, Xu and Shen, Kai and Leng, Yichong and Zhou, Xinyu and Li, Juncheng and Tang, Siliang}, title = {The Best of Both Worlds: Integrating Language Models and Diffusion Models for Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15604-15615} }
SV4D 2.0: Enhancing Spatio-Temporal Consistency in Multi-View Video Diffusion for High-Quality 4D Generation: Chun-Han Yao,

Yiming Xie,

Vikram Voleti,

Huaizu Jiang,

Varun Jampani; [pdf] [supp]
[bibtex]
@InProceedings{Yao_2025_ICCV, author = {Yao, Chun-Han and Xie, Yiming and Voleti, Vikram and Jiang, Huaizu and Jampani, Varun}, title = {SV4D 2.0: Enhancing Spatio-Temporal Consistency in Multi-View Video Diffusion for High-Quality 4D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13248-13258} }
Long Context Tuning for Video Generation: Yuwei Guo,

Ceyuan Yang,

Ziyan Yang,

Zhibei Ma,

Zhijie Lin,

Zhenheng Yang,

Dahua Lin,

Lu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Yuwei and Yang, Ceyuan and Yang, Ziyan and Ma, Zhibei and Lin, Zhijie and Yang, Zhenheng and Lin, Dahua and Jiang, Lu}, title = {Long Context Tuning for Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17281-17291} }
TCFG: Truncated Classifier-Free Guidance for Efficient and Scalable Text-to-Image Acceleration: Xiaomeng Fu,

Jia Li; [pdf]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Xiaomeng and Li, Jia}, title = {TCFG: Truncated Classifier-Free Guidance for Efficient and Scalable Text-to-Image Acceleration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18552-18562} }
Removing Out-of-Focus Reflective Flares via Color Alignment: Fengbo Lan,

Chang Wen Chen; [pdf]
[bibtex]
@InProceedings{Lan_2025_ICCV, author = {Lan, Fengbo and Chen, Chang Wen}, title = {Removing Out-of-Focus Reflective Flares via Color Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9770-9779} }
ILLUME: Illuminating Your LLMs to See, Draw, and Self-Enhance: Chunwei Wang,

Guansong Lu,

Junwei Yang,

Runhui Huang,

Jianhua Han,

Lu Hou,

Wei Zhang,

Hang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Chunwei and Lu, Guansong and Yang, Junwei and Huang, Runhui and Han, Jianhua and Hou, Lu and Zhang, Wei and Xu, Hang}, title = {ILLUME: Illuminating Your LLMs to See, Draw, and Self-Enhance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21612-21622} }
No Pose at All: Self-Supervised Pose-Free 3D Gaussian Splatting from Sparse Views: Ranran Huang,

Krystian Mikolajczyk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Ranran and Mikolajczyk, Krystian}, title = {No Pose at All: Self-Supervised Pose-Free 3D Gaussian Splatting from Sparse Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27947-27957} }
Skeleton Motion Words for Unsupervised Skeleton-Based Temporal Action Segmentation: Uzay Gökay,

Federico Spurio,

Dominik R. Bach,

Juergen Gall; [pdf] [supp]
[bibtex]
@InProceedings{Gokay_2025_ICCV, author = {G\"okay, Uzay and Spurio, Federico and Bach, Dominik R. and Gall, Juergen}, title = {Skeleton Motion Words for Unsupervised Skeleton-Based Temporal Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12101-12111} }
Towards Robust Defense against Customization via Protective Perturbation Resistant to Diffusion-based Purification: Wenkui Yang,

Jie Cao,

Junxian Duan,

Ran He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Wenkui and Cao, Jie and Duan, Junxian and He, Ran}, title = {Towards Robust Defense against Customization via Protective Perturbation Resistant to Diffusion-based Purification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19290-19300} }
EmbodiedOcc: Embodied 3D Occupancy Prediction for Vision-based Online Scene Understanding: Yuqi Wu,

Wenzhao Zheng,

Sicheng Zuo,

Yuanhui Huang,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Yuqi and Zheng, Wenzhao and Zuo, Sicheng and Huang, Yuanhui and Zhou, Jie and Lu, Jiwen}, title = {EmbodiedOcc: Embodied 3D Occupancy Prediction for Vision-based Online Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26360-26370} }
MAVias: Mitigate any Visual Bias: Ioannis Sarridis,

Christos Koutlis,

Symeon Papadopoulos,

Christos Diou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sarridis_2025_ICCV, author = {Sarridis, Ioannis and Koutlis, Christos and Papadopoulos, Symeon and Diou, Christos}, title = {MAVias: Mitigate any Visual Bias}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1271-1281} }
Calibrating MLLM-as-a-judge via Multimodal Bayesian Prompt Ensembles: Eric Slyman,

Mehrab Tanjim,

Kushal Kafle,

Stefan Lee; [pdf] [supp]
[bibtex]
@InProceedings{Slyman_2025_ICCV, author = {Slyman, Eric and Tanjim, Mehrab and Kafle, Kushal and Lee, Stefan}, title = {Calibrating MLLM-as-a-judge via Multimodal Bayesian Prompt Ensembles}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17224-17234} }
SCFlow: Implicitly Learning Style and Content Disentanglement with Flow Models: Pingchuan Ma,

Xiaopei Yang,

Yusong Li,

Ming Gui,

Felix Krause,

Johannes Schusterbauer,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Pingchuan and Yang, Xiaopei and Li, Yusong and Gui, Ming and Krause, Felix and Schusterbauer, Johannes and Ommer, Bj\"orn}, title = {SCFlow: Implicitly Learning Style and Content Disentanglement with Flow Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14919-14929} }
Lay-Your-Scene: Natural Scene Layout Generation with Diffusion Transformers: Divyansh Srivastava,

Xiang Zhang,

He Wen,

Chenru Wen,

Zhuowen Tu; [pdf] [supp]
[bibtex]
@InProceedings{Srivastava_2025_ICCV, author = {Srivastava, Divyansh and Zhang, Xiang and Wen, He and Wen, Chenru and Tu, Zhuowen}, title = {Lay-Your-Scene: Natural Scene Layout Generation with Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17909-17919} }
DiST-4D: Disentangled Spatiotemporal Diffusion with Metric Depth for 4D Driving Scene Generation: Jiazhe Guo,

Yikang Ding,

Xiwu Chen,

Shuo Chen,

Bohan Li,

Yingshuang Zou,

Xiaoyang Lyu,

Feiyang Tan,

Xiaojuan Qi,

Zhiheng Li,

Hao Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Jiazhe and Ding, Yikang and Chen, Xiwu and Chen, Shuo and Li, Bohan and Zou, Yingshuang and Lyu, Xiaoyang and Tan, Feiyang and Qi, Xiaojuan and Li, Zhiheng and Zhao, Hao}, title = {DiST-4D: Disentangled Spatiotemporal Diffusion with Metric Depth for 4D Driving Scene Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27231-27241} }
Enhancing Prompt Generation with Adaptive Refinement for Camouflaged Object Detection: Xuehan Chen,

Guangyu Ren,

Tianhong Dai,

Tania Stathaki,

Hengyan Liu; [pdf]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Xuehan and Ren, Guangyu and Dai, Tianhong and Stathaki, Tania and Liu, Hengyan}, title = {Enhancing Prompt Generation with Adaptive Refinement for Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20672-20682} }
Generalizable Non-Line-of-Sight Imaging with Learnable Physical Priors: Shida Sun,

Yue Li,

Yueyi Zhang,

Zhiwei Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Shida and Li, Yue and Zhang, Yueyi and Xiong, Zhiwei}, title = {Generalizable Non-Line-of-Sight Imaging with Learnable Physical Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25040-25049} }
Contrastive Flow Matching: George Stoica,

Vivek Ramanujan,

Xiang Fan,

Ali Farhadi,

Ranjay Krishna,

Judy Hoffman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stoica_2025_ICCV, author = {Stoica, George and Ramanujan, Vivek and Fan, Xiang and Farhadi, Ali and Krishna, Ranjay and Hoffman, Judy}, title = {Contrastive Flow Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1185-1194} }
Multi-scenario Overlapping Text Segmentation with Depth Awareness: Yang Liu,

Xudong Xie,

Yuliang Liu,

Xiang Bai; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yang and Xie, Xudong and Liu, Yuliang and Bai, Xiang}, title = {Multi-scenario Overlapping Text Segmentation with Depth Awareness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17454-17463} }
SemiVisBooster: Boosting Semi-Supervised Learning for Fine-Grained Classification through Pseudo-Label Semantic Guidance: Wenjin Zhang,

Xinyu Li,

Chenyang Gao,

Ivan Marsic; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wenjin and Li, Xinyu and Gao, Chenyang and Marsic, Ivan}, title = {SemiVisBooster: Boosting Semi-Supervised Learning for Fine-Grained Classification through Pseudo-Label Semantic Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1195-1204} }
Backdooring Self-Supervised Contrastive Learning by Noisy Alignment: Tuo Chen,

Jie Gui,

Minjing Dong,

Ju Jia,

Lanting Fang,

Jian Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Tuo and Gui, Jie and Dong, Minjing and Jia, Ju and Fang, Lanting and Liu, Jian}, title = {Backdooring Self-Supervised Contrastive Learning by Noisy Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3684-3693} }
VisualCloze: A Universal Image Generation Framework via Visual In-Context Learning: Zhong-Yu Li,

Ruoyi Du,

Juncheng Yan,

Le Zhuo,

Zhen Li,

Peng Gao,

Zhanyu Ma,

Ming-Ming Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhong-Yu and Du, Ruoyi and Yan, Juncheng and Zhuo, Le and Li, Zhen and Gao, Peng and Ma, Zhanyu and Cheng, Ming-Ming}, title = {VisualCloze: A Universal Image Generation Framework via Visual In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18969-18979} }
MagicHOI: Leveraging 3D Priors for Accurate Hand-object Reconstruction from Short Monocular Video Clips: Shibo Wang,

Haonan He,

Maria Parelli,

Christoph Gebhardt,

Zicong Fan,

Jie Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Shibo and He, Haonan and Parelli, Maria and Gebhardt, Christoph and Fan, Zicong and Song, Jie}, title = {MagicHOI: Leveraging 3D Priors for Accurate Hand-object Reconstruction from Short Monocular Video Clips}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5957-5968} }
ReMP-AD: Retrieval-enhanced Multi-modal Prompt Fusion for Few-Shot Industrial Visual Anomaly Detection: Hongchi Ma,

Guanglei Yang,

Debin Zhao,

Yanli Ji,

Wangmeng Zuo; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Hongchi and Yang, Guanglei and Zhao, Debin and Ji, Yanli and Zuo, Wangmeng}, title = {ReMP-AD: Retrieval-enhanced Multi-modal Prompt Fusion for Few-Shot Industrial Visual Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20425-20434} }
Trial-Oriented Visual Rearrangement: Yuyi Liu,

Xinhang Song,

Tianliang Qi,

Shuqiang Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yuyi and Song, Xinhang and Qi, Tianliang and Jiang, Shuqiang}, title = {Trial-Oriented Visual Rearrangement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8022-8031} }
TOGA: Temporally Grounded Open-Ended Video QA with Weak Supervision: Ayush Gupta,

Anirban Roy,

Rama Chellappa,

Nathaniel D. Bastian,

Alvaro Velasquez,

Susmit Jha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gupta_2025_ICCV, author = {Gupta, Ayush and Roy, Anirban and Chellappa, Rama and Bastian, Nathaniel D. and Velasquez, Alvaro and Jha, Susmit}, title = {TOGA: Temporally Grounded Open-Ended Video QA with Weak Supervision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23593-23603} }
Multi-Object Sketch Animation by Scene Decomposition and Motion Planning: Jingyu Liu,

Zijie Xin,

Yuhan Fu,

Ruixiang Zhao,

Bangxiang Lan,

Xirong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Jingyu and Xin, Zijie and Fu, Yuhan and Zhao, Ruixiang and Lan, Bangxiang and Li, Xirong}, title = {Multi-Object Sketch Animation by Scene Decomposition and Motion Planning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11537-11546} }
MUSE-VL: Modeling Unified VLM through Semantic Discrete Encoding: Rongchang Xie,

Chen Du,

Ping Song,

Chang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Rongchang and Du, Chen and Song, Ping and Liu, Chang}, title = {MUSE-VL: Modeling Unified VLM through Semantic Discrete Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24135-24146} }
MistSense: Versatile Online Detection of Procedural and Execution Mistakes: Constantin Patsch,

Yuankai Wu,

Marsil Zakour,

Driton Salihu,

Eckehard Steinbach; [pdf]
[bibtex]
@InProceedings{Patsch_2025_ICCV, author = {Patsch, Constantin and Wu, Yuankai and Zakour, Marsil and Salihu, Driton and Steinbach, Eckehard}, title = {MistSense: Versatile Online Detection of Procedural and Execution Mistakes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14528-14537} }
FusionPhys: A Flexible Framework for Fusing Complementary Sensing Modalities in Remote Physiological Measurement: Chenhang Ying,

Huiyu Yang,

Jieyi Ge,

Zhaodong Sun,

Xu Cheng,

Kui Ren,

Xiaobai Li; [pdf]
[bibtex]
@InProceedings{Ying_2025_ICCV, author = {Ying, Chenhang and Yang, Huiyu and Ge, Jieyi and Sun, Zhaodong and Cheng, Xu and Ren, Kui and Li, Xiaobai}, title = {FusionPhys: A Flexible Framework for Fusing Complementary Sensing Modalities in Remote Physiological Measurement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9363-9373} }
Task Vector Quantization for Memory-Efficient Model Merging: Youngeun Kim,

Seunghwan Lee,

Aecheon Jung,

Bogon Ryu,

Sungeun Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Youngeun and Lee, Seunghwan and Jung, Aecheon and Ryu, Bogon and Hong, Sungeun}, title = {Task Vector Quantization for Memory-Efficient Model Merging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20105-20115} }
Dirichlet-Constrained Variational Codebook Learning for Temporally Coherent Video Face Restoration: Baoyou Chen,

Ce Liu,

Weihao Yuan,

Zilong Dong,

Siyu Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Baoyou and Liu, Ce and Yuan, Weihao and Dong, Zilong and Zhu, Siyu}, title = {Dirichlet-Constrained Variational Codebook Learning for Temporally Coherent Video Face Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14507-14516} }
Dual-Expert Consistency Model for Efficient and High-Quality Video Generation: Zhengyao Lv,

Chenyang Si,

Tianlin Pan,

Zhaoxi Chen,

Kwan-Yee K. Wong,

Yu Qiao,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2025_ICCV, author = {Lv, Zhengyao and Si, Chenyang and Pan, Tianlin and Chen, Zhaoxi and Wong, Kwan-Yee K. and Qiao, Yu and Liu, Ziwei}, title = {Dual-Expert Consistency Model for Efficient and High-Quality Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14983-14993} }
GeoFormer: Geometry Point Encoder for 3D Object Detection with Graph-based Transformer: Xin Jin,

Haisheng Su,

Cong Ma,

Kai Liu,

Wei Wu,

Fei Hui,

Junchi Yan; [pdf]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Xin and Su, Haisheng and Ma, Cong and Liu, Kai and Wu, Wei and Hui, Fei and Yan, Junchi}, title = {GeoFormer: Geometry Point Encoder for 3D Object Detection with Graph-based Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26879-26889} }
Unraveling the Effects of Synthetic Data on End-to-End Autonomous Driving: Junhao Ge,

Zuhong Liu,

Longteng Fan,

Yifan Jiang,

Jiaqi Su,

Yiming Li,

Zhejun Zhang,

Siheng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2025_ICCV, author = {Ge, Junhao and Liu, Zuhong and Fan, Longteng and Jiang, Yifan and Su, Jiaqi and Li, Yiming and Zhang, Zhejun and Chen, Siheng}, title = {Unraveling the Effects of Synthetic Data on End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28859-28869} }
Dense2MoE: Restructuring Diffusion Transformer to MoE for Efficient Text-to-Image Generation: Youwei Zheng,

Yuxi Ren,

Xin Xia,

Xuefeng Xiao,

Xiaohua Xie; [pdf] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Youwei and Ren, Yuxi and Xia, Xin and Xiao, Xuefeng and Xie, Xiaohua}, title = {Dense2MoE: Restructuring Diffusion Transformer to MoE for Efficient Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18661-18670} }
KinMo: Kinematic-aware Human Motion Understanding and Generation: Pengfei Zhang,

Pinxin Liu,

Pablo Garrido,

Hyeongwoo Kim,

Bindita Chaudhuri; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Pengfei and Liu, Pinxin and Garrido, Pablo and Kim, Hyeongwoo and Chaudhuri, Bindita}, title = {KinMo: Kinematic-aware Human Motion Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11187-11197} }
NeuFrameQ: Neural Frame Fields for Scalable and Generalizable Anisotropic Quadrangulation: Ying-Tian Liu,

Jiajun Li,

Yu-Tao Liu,

Xin Yu,

Yuan-Chen Guo,

Yan-Pei Cao,

Ding Liang,

Ariel Shamir,

Song-Hai Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Ying-Tian and Li, Jiajun and Liu, Yu-Tao and Yu, Xin and Guo, Yuan-Chen and Cao, Yan-Pei and Liang, Ding and Shamir, Ariel and Zhang, Song-Hai}, title = {NeuFrameQ: Neural Frame Fields for Scalable and Generalizable Anisotropic Quadrangulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28000-28009} }
Keyframe-oriented Vision Token Pruning: Enhancing Efficiency of Large Vision Language Models on Long-Form Video Processing: Yudong Liu,

Jingwei Sun,

Yueqian Lin,

Jianyi Zhang,

Jingyang Zhang,

Ming Yin,

Qinsi Wang,

Hai Li,

Yiran Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yudong and Sun, Jingwei and Lin, Yueqian and Zhang, Jianyi and Zhang, Jingyang and Yin, Ming and Wang, Qinsi and Li, Hai and Chen, Yiran}, title = {Keyframe-oriented Vision Token Pruning: Enhancing Efficiency of Large Vision Language Models on Long-Form Video Processing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20802-20811} }
LongSplat: Robust Unposed 3D Gaussian Splatting for Casual Long Videos: Chin-Yang Lin,

Cheng Sun,

Fu-En Yang,

Min-Hung Chen,

Yen-Yu Lin,

Yu-Lun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Chin-Yang and Sun, Cheng and Yang, Fu-En and Chen, Min-Hung and Lin, Yen-Yu and Liu, Yu-Lun}, title = {LongSplat: Robust Unposed 3D Gaussian Splatting for Casual Long Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27412-27422} }
AVAM: a Universal Training-free Adaptive Visual Anchoring Embedded into Multimodal Large Language Model for Multi-image Question Answering: Kang Zeng,

Guojin Zhong,

Jintao Cheng,

Jin Yuan,

Zhiyong Li; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Kang and Zhong, Guojin and Cheng, Jintao and Yuan, Jin and Li, Zhiyong}, title = {AVAM: a Universal Training-free Adaptive Visual Anchoring Embedded into Multimodal Large Language Model for Multi-image Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2292-2302} }
WikiAutoGen: Towards Multi-Modal Wikipedia-Style Article Generation: Zhongyu Yang,

Jun Chen,

Dannong Xu,

Junjie Fei,

Xiaoqian Shen,

Liangbing Zhao,

Chun-Mei Feng,

Mohamed Elhoseiny; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zhongyu and Chen, Jun and Xu, Dannong and Fei, Junjie and Shen, Xiaoqian and Zhao, Liangbing and Feng, Chun-Mei and Elhoseiny, Mohamed}, title = {WikiAutoGen: Towards Multi-Modal Wikipedia-Style Article Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15532-15541} }
GMMamba: Group Masking Mamba for Whole Slide Image Classification: Tingting Zheng,

Hongxun Yao,

Kui Jiang,

Yi Xiao,

Sicheng Zhao; [pdf]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Tingting and Yao, Hongxun and Jiang, Kui and Xiao, Yi and Zhao, Sicheng}, title = {GMMamba: Group Masking Mamba for Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9935-9944} }
S3R-GS: Streamlining the Pipeline for Large-Scale Street Scene Reconstruction: Guangting Zheng,

Jiajun Deng,

Xiaomeng Chu,

Yu Yuan,

Houqiang Li,

Yanyong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Guangting and Deng, Jiajun and Chu, Xiaomeng and Yuan, Yu and Li, Houqiang and Zhang, Yanyong}, title = {S3R-GS: Streamlining the Pipeline for Large-Scale Street Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25594-25604} }
LIRA: Reasoning Reconstruction via Multimodal Large Language Models: Zhen Zhou,

Tong Wang,

Yunkai Ma,

Xiao Tan,

Fengshui Jing; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zhen and Wang, Tong and Ma, Yunkai and Tan, Xiao and Jing, Fengshui}, title = {LIRA: Reasoning Reconstruction via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1762-1772} }
Generate, Transduct, Adapt: Iterative Transduction with VLMs: Oindrila Saha,

Logan Lawrence,

Grant Van Horn,

Subhransu Maji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saha_2025_ICCV, author = {Saha, Oindrila and Lawrence, Logan and Van Horn, Grant and Maji, Subhransu}, title = {Generate, Transduct, Adapt: Iterative Transduction with VLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1369-1379} }
Beyond One Shot, Beyond One Perspective: Cross-View and Long-Horizon Distillation for Better LiDAR Representations: Xiang Xu,

Lingdong Kong,

Song Wang,

Chuanwei Zhou,

Qingshan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Xiang and Kong, Lingdong and Wang, Song and Zhou, Chuanwei and Liu, Qingshan}, title = {Beyond One Shot, Beyond One Perspective: Cross-View and Long-Horizon Distillation for Better LiDAR Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25506-25518} }
Test-Time Retrieval-Augmented Adaptation for Vision-Language Models: Xinqi Fan,

Xueli Chen,

Luoxiao Yang,

Chuin Hong Yap,

Rizwan Qureshi,

Qi Dou,

Moi Hoon Yap,

Mubarak Shah; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Xinqi and Chen, Xueli and Yang, Luoxiao and Yap, Chuin Hong and Qureshi, Rizwan and Dou, Qi and Yap, Moi Hoon and Shah, Mubarak}, title = {Test-Time Retrieval-Augmented Adaptation for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8810-8819} }
Causal Disentanglement and Cross-Modal Alignment for Enhanced Few-Shot Learning: Tianjiao Jiang,

Zhen Zhang,

Yuhang Liu,

Javen Qinfeng Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Tianjiao and Zhang, Zhen and Liu, Yuhang and Shi, Javen Qinfeng}, title = {Causal Disentanglement and Cross-Modal Alignment for Enhanced Few-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {890-900} }
Seeing and Seeing Through the Glass: Real and Synthetic Data for Multi-Layer Depth Estimation: Hongyu Wen,

Yiming Zuo,

Venkat Subramanian,

Patrick Chen,

Jia Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2025_ICCV, author = {Wen, Hongyu and Zuo, Yiming and Subramanian, Venkat and Chen, Patrick and Deng, Jia}, title = {Seeing and Seeing Through the Glass: Real and Synthetic Data for Multi-Layer Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6715-6725} }
GaussianUpdate: Continual 3D Gaussian Splatting Update for Changing Environments: Lin Zeng,

Boming Zhao,

Jiarui Hu,

Xujie Shen,

Ziqiang Dang,

Hujun Bao,

Zhaopeng Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Lin and Zhao, Boming and Hu, Jiarui and Shen, Xujie and Dang, Ziqiang and Bao, Hujun and Cui, Zhaopeng}, title = {GaussianUpdate: Continual 3D Gaussian Splatting Update for Changing Environments}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25800-25809} }
NavMorph: A Self-Evolving World Model for Vision-and-Language Navigation in Continuous Environments: Xuan Yao,

Junyu Gao,

Changsheng Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_ICCV, author = {Yao, Xuan and Gao, Junyu and Xu, Changsheng}, title = {NavMorph: A Self-Evolving World Model for Vision-and-Language Navigation in Continuous Environments}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5536-5546} }
SiM3D: Single-instance Multiview Multimodal and Multisetup 3D Anomaly Detection Benchmark: Alex Costanzino,

Pierluigi Zama Ramirez,

Luigi Lella,

Matteo Ragaglia,

Alessandro Oliva,

Giuseppe Lisanti,

Luigi Di Stefano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Costanzino_2025_ICCV, author = {Costanzino, Alex and Ramirez, Pierluigi Zama and Lella, Luigi and Ragaglia, Matteo and Oliva, Alessandro and Lisanti, Giuseppe and Di Stefano, Luigi}, title = {SiM3D: Single-instance Multiview Multimodal and Multisetup 3D Anomaly Detection Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20944-20953} }
Distilling Diffusion Models to Efficient 3D LiDAR Scene Completion: Shengyuan Zhang,

An Zhao,

Ling Yang,

Zejian Li,

Chenye Meng,

Haoran Xu,

Tianrun Chen,

AnYang Wei,

Perry Pengyun Gu,

Lingyun Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shengyuan and Zhao, An and Yang, Ling and Li, Zejian and Meng, Chenye and Xu, Haoran and Chen, Tianrun and Wei, AnYang and Gu, Perry Pengyun and Sun, Lingyun}, title = {Distilling Diffusion Models to Efficient 3D LiDAR Scene Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5007-5016} }
Self-supervised Learning of Hybrid Part-aware 3D Representations of 2D Gaussians and Superquadrics: Zhirui Gao,

Renjiao Yi,

Yuhang Huang,

Wei Chen,

Chenyang Zhu,

Kai Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Zhirui and Yi, Renjiao and Huang, Yuhang and Chen, Wei and Zhu, Chenyang and Xu, Kai}, title = {Self-supervised Learning of Hybrid Part-aware 3D Representations of 2D Gaussians and Superquadrics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9649-9659} }
Teaching AI the Anatomy Behind the Scan: Addressing Anatomical Flaws in Medical Image Segmentation with Learnable Prior: Young Seok Jeon,

Hongfei Yang,

Huazhu Fu,

Mengling Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeon_2025_ICCV, author = {Jeon, Young Seok and Yang, Hongfei and Fu, Huazhu and Feng, Mengling}, title = {Teaching AI the Anatomy Behind the Scan: Addressing Anatomical Flaws in Medical Image Segmentation with Learnable Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24024-24033} }
OCK: Unsupervised Dynamic Video Prediction with Object-Centric Kinematics: Yeon-Ji Song,

Jaein Kim,

Suhyung Choi,

Jin-Hwa Kim,

Byoung-Tak Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_ICCV, author = {Song, Yeon-Ji and Kim, Jaein and Choi, Suhyung and Kim, Jin-Hwa and Zhang, Byoung-Tak}, title = {OCK: Unsupervised Dynamic Video Prediction with Object-Centric Kinematics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11359-11368} }
Holistic Unlearning Benchmark: A Multi-Faceted Evaluation for Text-to-Image Diffusion Model Unlearning: Saemi Moon,

Minjong Lee,

Sangdon Park,

Dongwoo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2025_ICCV, author = {Moon, Saemi and Lee, Minjong and Park, Sangdon and Kim, Dongwoo}, title = {Holistic Unlearning Benchmark: A Multi-Faceted Evaluation for Text-to-Image Diffusion Model Unlearning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16356-16366} }
AutoComPose: Automatic Generation of Pose Transition Descriptions for Composed Pose Retrieval Using Multimodal LLMs: Yi-Ting Shen,

Sungmin Eum,

Doheon Lee,

Rohit Shete,

Chiao-Yi Wang,

Heesung Kwon,

Shuvra S. Bhattacharyya; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_ICCV, author = {Shen, Yi-Ting and Eum, Sungmin and Lee, Doheon and Shete, Rohit and Wang, Chiao-Yi and Kwon, Heesung and Bhattacharyya, Shuvra S.}, title = {AutoComPose: Automatic Generation of Pose Transition Descriptions for Composed Pose Retrieval Using Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7409-7418} }
WorldScore: A Unified Evaluation Benchmark for World Generation: Haoyi Duan,

Hong-Xing Yu,

Sirui Chen,

Li Fei-Fei,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duan_2025_ICCV, author = {Duan, Haoyi and Yu, Hong-Xing and Chen, Sirui and Fei-Fei, Li and Wu, Jiajun}, title = {WorldScore: A Unified Evaluation Benchmark for World Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27713-27724} }
PhysTwin: Physics-Informed Reconstruction and Simulation of Deformable Objects from Videos: Hanxiao Jiang,

Hao-Yu Hsu,

Kaifeng Zhang,

Hsin-Ni Yu,

Shenlong Wang,

Yunzhu Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Hanxiao and Hsu, Hao-Yu and Zhang, Kaifeng and Yu, Hsin-Ni and Wang, Shenlong and Li, Yunzhu}, title = {PhysTwin: Physics-Informed Reconstruction and Simulation of Deformable Objects from Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7219-7230} }
Physical Degradation Model-Guided Interferometric Hyperspectral Reconstruction with Unfolding Transformer: Yuansheng Li,

Yunhao Zou,

Linwei Chen,

Ying Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yuansheng and Zou, Yunhao and Chen, Linwei and Fu, Ying}, title = {Physical Degradation Model-Guided Interferometric Hyperspectral Reconstruction with Unfolding Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13815-13825} }
EvaGaussians: Event Stream Assisted Gaussian Splatting from Blurry Images: Wangbo Yu,

Chaoran Feng,

Jianing Li,

Jiye Tang,

Jiashu Yang,

Zhenyu Tang,

Meng Cao,

Xu Jia,

Yuchao Yang,

Li Yuan,

Yonghong Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Wangbo and Feng, Chaoran and Li, Jianing and Tang, Jiye and Yang, Jiashu and Tang, Zhenyu and Cao, Meng and Jia, Xu and Yang, Yuchao and Yuan, Li and Tian, Yonghong}, title = {EvaGaussians: Event Stream Assisted Gaussian Splatting from Blurry Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24780-24790} }
RainbowPrompt: Diversity-Enhanced Prompt-Evolving for Continual Learning: Kiseong Hong,

Gyeong-hyeon Kim,

Eunwoo Kim; [pdf] [arXiv]
[bibtex]
@InProceedings{Hong_2025_ICCV, author = {Hong, Kiseong and Kim, Gyeong-hyeon and Kim, Eunwoo}, title = {RainbowPrompt: Diversity-Enhanced Prompt-Evolving for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1130-1140} }
OmniHuman-1: Rethinking the Scaling-Up of One-Stage Conditioned Human Animation Models: Gaojie Lin,

Jianwen Jiang,

Jiaqi Yang,

Zerong Zheng,

Chao Liang,

Yuan Zhang,

Jingtuo Liu; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Gaojie and Jiang, Jianwen and Yang, Jiaqi and Zheng, Zerong and Liang, Chao and Zhang, Yuan and Liu, Jingtuo}, title = {OmniHuman-1: Rethinking the Scaling-Up of One-Stage Conditioned Human Animation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13847-13858} }
"Principal Components" Enable A New Language of Images: Xin Wen,

Bingchen Zhao,

Ismail Elezi,

Jiankang Deng,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2025_ICCV, author = {Wen, Xin and Zhao, Bingchen and Elezi, Ismail and Deng, Jiankang and Qi, Xiaojuan}, title = {''Principal Components'' Enable A New Language of Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16641-16651} }
Fair Generation without Unfair Distortions: Debiasing Text-to-Image Generation with Entanglement-Free Attention: Jeonghoon Park,

Juyoung Lee,

Chaeyeon Chung,

Jaeseong Lee,

Jaegul Choo,

Jindong Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Jeonghoon and Lee, Juyoung and Chung, Chaeyeon and Lee, Jaeseong and Choo, Jaegul and Gu, Jindong}, title = {Fair Generation without Unfair Distortions: Debiasing Text-to-Image Generation with Entanglement-Free Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17567-17576} }
GM-MoE: Low-Light Enhancement with Gated-Mechanism Mixture-of-Experts: Minwen Liao,

Haobo Dong,

Xinyi Wang,

Kurban Ubul,

Yihua Shao,

Ziyang Yan; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Minwen and Dong, Haobo and Wang, Xinyi and Ubul, Kurban and Shao, Yihua and Yan, Ziyang}, title = {GM-MoE: Low-Light Enhancement with Gated-Mechanism Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8766-8776} }
Beyond Low-Rank Tuning: Model Prior-Guided Rank Allocation for Effective Transfer in Low-Data and Large-Gap Regimes.: Chuyan Zhang,

Kefan Wang,

Yun Gu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Chuyan and Wang, Kefan and Gu, Yun}, title = {Beyond Low-Rank Tuning: Model Prior-Guided Rank Allocation for Effective Transfer in Low-Data and Large-Gap Regimes.}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3337-3345} }
Reference-based Super-Resolution via Image-based Retrieval-Augmented Generation Diffusion: Byeonghun Lee,

Hyunmin Cho,

Hong Gyu Choi,

Soo Min Kang,

Iljun Ahn,

Kyong Hwan Jin; [pdf]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Byeonghun and Cho, Hyunmin and Choi, Hong Gyu and Kang, Soo Min and Ahn, Iljun and Jin, Kyong Hwan}, title = {Reference-based Super-Resolution via Image-based Retrieval-Augmented Generation Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10764-10774} }
Backdoor Mitigation by Distance-Driven Detoxification: Shaokui Wei,

Jiayin Liu,

Hongyuan Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Shaokui and Liu, Jiayin and Zha, Hongyuan}, title = {Backdoor Mitigation by Distance-Driven Detoxification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4465-4474} }
FakeRadar: Probing Forgery Outliers to Detect Unknown Deepfake Videos: Zhaolun Li,

Jichang Li,

Yinqi Cai,

Junye Chen,

Xiaonan Luo,

Guanbin Li,

Rushi Lan; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhaolun and Li, Jichang and Cai, Yinqi and Chen, Junye and Luo, Xiaonan and Li, Guanbin and Lan, Rushi}, title = {FakeRadar: Probing Forgery Outliers to Detect Unknown Deepfake Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13382-13392} }
CuMPerLay: Learning Cubical Multiparameter Persistence Vectorizations: Caner Korkmaz,

Brighton Nuwagira,

Baris Coskunuzer,

Tolga Birdal; [pdf] [supp]
[bibtex]
@InProceedings{Korkmaz_2025_ICCV, author = {Korkmaz, Caner and Nuwagira, Brighton and Coskunuzer, Baris and Birdal, Tolga}, title = {CuMPerLay: Learning Cubical Multiparameter Persistence Vectorizations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27084-27094} }
Towards Privacy-preserved Pre-training of Remote Sensing Foundation Models with Federated Mutual-guidance Learning: Jieyi Tan,

Chengwei Zhang,

Bo Dang,

Yansheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2025_ICCV, author = {Tan, Jieyi and Zhang, Chengwei and Dang, Bo and Li, Yansheng}, title = {Towards Privacy-preserved Pre-training of Remote Sensing Foundation Models with Federated Mutual-guidance Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1804-1814} }
SynTag: Enhancing the Geometric Robustness of Inversion-based Generative Image Watermarking: Han Fang,

Kejiang Chen,

Zehua Ma,

Jiajun Deng,

Yicong Li,

Weiming Zhang,

Ee-Chien Chang; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Han and Chen, Kejiang and Ma, Zehua and Deng, Jiajun and Li, Yicong and Zhang, Weiming and Chang, Ee-Chien}, title = {SynTag: Enhancing the Geometric Robustness of Inversion-based Generative Image Watermarking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15416-15425} }
Recovering Parametric Scenes from Very Few Time-of-Flight Pixels: Carter Sifferman,

Yiquan Li,

Yiming Li,

Fangzhou Mu,

Michael Gleicher,

Mohit Gupta,

Yin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sifferman_2025_ICCV, author = {Sifferman, Carter and Li, Yiquan and Li, Yiming and Mu, Fangzhou and Gleicher, Michael and Gupta, Mohit and Li, Yin}, title = {Recovering Parametric Scenes from Very Few Time-of-Flight Pixels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27989-27999} }
DIMCIM: A Quantitative Evaluation Framework for Default-mode Diversity and Generalization in Text-to-Image Generative Models: Revant Teotia,

Candace Ross,

Karen Ullrich,

Sumit Chopra,

Adriana Romero-Soriano,

Melissa Hall,

Matthew Muckley; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Teotia_2025_ICCV, author = {Teotia, Revant and Ross, Candace and Ullrich, Karen and Chopra, Sumit and Romero-Soriano, Adriana and Hall, Melissa and Muckley, Matthew}, title = {DIMCIM: A Quantitative Evaluation Framework for Default-mode Diversity and Generalization in Text-to-Image Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16431-16440} }
SAUCE: Selective Concept Unlearning in Vision-Language Models with Sparse Autoencoders: Jiahui Geng,

Qing Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Geng_2025_ICCV, author = {Geng, Jiahui and Li, Qing}, title = {SAUCE: Selective Concept Unlearning in Vision-Language Models with Sparse Autoencoders}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3023-3033} }
ForCenNet: Foreground-Centric Network for Document Image Rectification: Peng Cai,

Qiang Li,

Kaicheng Yang,

Dong Guo,

Jia Li,

Nan Zhou,

Xiang An,

Ninghua Yang,

Jiankang Deng; [pdf] [arXiv]
[bibtex]
@InProceedings{Cai_2025_ICCV, author = {Cai, Peng and Li, Qiang and Yang, Kaicheng and Guo, Dong and Li, Jia and Zhou, Nan and An, Xiang and Yang, Ninghua and Deng, Jiankang}, title = {ForCenNet: Foreground-Centric Network for Document Image Rectification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15137-15146} }
Adversarial Data Augmentation for Single Domain Generalization via Lyapunov Exponent-Guided Optimization: Zuyu Zhang,

Ning Chen,

Yongshan Liu,

Qinghua Zhang,

Xu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zuyu and Chen, Ning and Liu, Yongshan and Zhang, Qinghua and Zhang, Xu}, title = {Adversarial Data Augmentation for Single Domain Generalization via Lyapunov Exponent-Guided Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {552-561} }
Always Skip Attention: Yiping Ji,

Hemanth Saratchandran,

Peyman Moghadam,

Simon Lucey; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Yiping and Saratchandran, Hemanth and Moghadam, Peyman and Lucey, Simon}, title = {Always Skip Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23115-23123} }
ExCap3D: Expressive 3D Scene Understanding via Object Captioning with Varying Detail: Chandan Yeshwanth,

Dávid Rozenberszki,

Angela Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yeshwanth_2025_ICCV, author = {Yeshwanth, Chandan and Rozenberszki, D\'avid and Dai, Angela}, title = {ExCap3D: Expressive 3D Scene Understanding via Object Captioning with Varying Detail}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21699-21709} }
CMAD: Correlation-Aware and Modalities-Aware Distillation for Multimodal Sentiment Analysis with Missing Modalities: Yan Zhuang,

Minhao Liu,

Wei Bai,

Yanru Zhang,

Xiaoyue Zhang,

Jiawen Deng,

Fuji Ren; [pdf] [supp]
[bibtex]
@InProceedings{Zhuang_2025_ICCV, author = {Zhuang, Yan and Liu, Minhao and Bai, Wei and Zhang, Yanru and Zhang, Xiaoyue and Deng, Jiawen and Ren, Fuji}, title = {CMAD: Correlation-Aware and Modalities-Aware Distillation for Multimodal Sentiment Analysis with Missing Modalities}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4626-4636} }
Describe, Don't Dictate: Semantic Image Editing with Natural Language Intent: En Ci,

Shanyan Guan,

Yanhao Ge,

Yilin Zhang,

Wei Li,

Zhenyu Zhang,

Jian Yang,

Ying Tai; [pdf] [supp]
[bibtex]
@InProceedings{Ci_2025_ICCV, author = {Ci, En and Guan, Shanyan and Ge, Yanhao and Zhang, Yilin and Li, Wei and Zhang, Zhenyu and Yang, Jian and Tai, Ying}, title = {Describe, Don't Dictate: Semantic Image Editing with Natural Language Intent}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19185-19194} }
LiT: Delving into a Simple Linear Diffusion Transformer for Image Generation: Jiahao Wang,

Ning Kang,

Lewei Yao,

Mengzhao Chen,

Chengyue Wu,

Songyang Zhang,

Shuchen Xue,

Yong Liu,

Taiqiang Wu,

Xihui Liu,

Kaipeng Zhang,

Shifeng Zhang,

Wenqi Shao,

Zhenguo Li,

Ping Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jiahao and Kang, Ning and Yao, Lewei and Chen, Mengzhao and Wu, Chengyue and Zhang, Songyang and Xue, Shuchen and Liu, Yong and Wu, Taiqiang and Liu, Xihui and Zhang, Kaipeng and Zhang, Shifeng and Shao, Wenqi and Li, Zhenguo and Luo, Ping}, title = {LiT: Delving into a Simple Linear Diffusion Transformer for Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16068-16078} }
LeGrad: An Explainability Method for Vision Transformers via Feature Formation Sensitivity: Walid Bousselham,

Angie Boggust,

Sofian Chaybouti,

Hendrik Strobelt,

Hilde Kuehne; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bousselham_2025_ICCV, author = {Bousselham, Walid and Boggust, Angie and Chaybouti, Sofian and Strobelt, Hendrik and Kuehne, Hilde}, title = {LeGrad: An Explainability Method for Vision Transformers via Feature Formation Sensitivity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20336-20345} }
GaussianFlowOcc: Sparse and Weakly Supervised Occupancy Estimation using Gaussian Splatting and Temporal Flow: Simon Boeder,

Fabian Gigengack,

Benjamin Risse; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Boeder_2025_ICCV, author = {Boeder, Simon and Gigengack, Fabian and Risse, Benjamin}, title = {GaussianFlowOcc: Sparse and Weakly Supervised Occupancy Estimation using Gaussian Splatting and Temporal Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24943-24954} }
DreamLayer: Simultaneous Multi-Layer Generation via Diffusion Model: Junjia Huang,

Pengxiang Yan,

Jinhang Cai,

Jiyang Liu,

Zhao Wang,

Yitong Wang,

Xinglong Wu,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Junjia and Yan, Pengxiang and Cai, Jinhang and Liu, Jiyang and Wang, Zhao and Wang, Yitong and Wu, Xinglong and Li, Guanbin}, title = {DreamLayer: Simultaneous Multi-Layer Generation via Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3357-3366} }
OD-RASE: Ontology-Driven Risk Assessment and Safety Enhancement for Autonomous Driving: Kota Shimomura,

Masaki Nambata,

Atsuya Ishikawa,

Ryota Mimura,

Koki Inoue,

Takayoshi Yamashita,

Takayuki Kawabuchi; [pdf] [supp]
[bibtex]
@InProceedings{Shimomura_2025_ICCV, author = {Shimomura, Kota and Nambata, Masaki and Ishikawa, Atsuya and Mimura, Ryota and Inoue, Koki and Yamashita, Takayoshi and Kawabuchi, Takayuki}, title = {OD-RASE: Ontology-Driven Risk Assessment and Safety Enhancement for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26167-26177} }
FREE-Merging: Fourier Transform for Efficient Model Merging: Shenghe Zheng,

Hongzhi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Shenghe and Wang, Hongzhi}, title = {FREE-Merging: Fourier Transform for Efficient Model Merging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3863-3873} }
ForgeLens: Data-Efficient Forgery Focus for Generalizable Forgery Image Detection: Yingjian Chen,

Lei Zhang,

Yakun Niu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yingjian and Zhang, Lei and Niu, Yakun}, title = {ForgeLens: Data-Efficient Forgery Focus for Generalizable Forgery Image Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16270-16280} }
Serialization based Point Cloud Oversegmentation: Chenghui Lu,

Jianlong Kwan,

Dilong Li,

Ziyi Chen,

Haiyan Guan; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Chenghui and Kwan, Jianlong and Li, Dilong and Chen, Ziyi and Guan, Haiyan}, title = {Serialization based Point Cloud Oversegmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25831-25840} }
Latent Expression Generation for Referring Image Segmentation and Grounding: Seonghoon Yu,

Joonbeom Hong,

Joonseok Lee,

Jeany Son; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Seonghoon and Hong, Joonbeom and Lee, Joonseok and Son, Jeany}, title = {Latent Expression Generation for Referring Image Segmentation and Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21374-21383} }
NoiseController: Towards Consistent Multi-view Video Generation via Noise Decomposition and Collaboration: Haotian Dong,

Xin Wang,

Di Lin,

Yipeng Wu,

Qin Chen,

Ruonan Liu,

Kairui Yang,

Ping Li,

Qing Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Haotian and Wang, Xin and Lin, Di and Wu, Yipeng and Chen, Qin and Liu, Ruonan and Yang, Kairui and Li, Ping and Guo, Qing}, title = {NoiseController: Towards Consistent Multi-view Video Generation via Noise Decomposition and Collaboration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14443-14452} }
Rethinking Cross-Modal Interaction in Multimodal Diffusion Transformers: Zhengyao Lv,

Tianlin Pan,

Chenyang Si,

Zhaoxi Chen,

Wangmeng Zuo,

Ziwei Liu,

Kwan-Yee K. Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2025_ICCV, author = {Lv, Zhengyao and Pan, Tianlin and Si, Chenyang and Chen, Zhaoxi and Zuo, Wangmeng and Liu, Ziwei and Wong, Kwan-Yee K.}, title = {Rethinking Cross-Modal Interaction in Multimodal Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5934-5943} }
MOERL: When Mixture-of-Experts Meet Reinforcement Learning for Adverse Weather Image Restoration: Tao Wang,

Peiwen Xia,

Bo Li,

Peng-Tao Jiang,

Zhe Kong,

Kaihao Zhang,

Tong Lu,

Wenhan Luo; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Tao and Xia, Peiwen and Li, Bo and Jiang, Peng-Tao and Kong, Zhe and Zhang, Kaihao and Lu, Tong and Luo, Wenhan}, title = {MOERL: When Mixture-of-Experts Meet Reinforcement Learning for Adverse Weather Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13673-13683} }
DACoN: DINO for Anime Paint Bucket Colorization with Any Number of Reference Images: Kazuma Nagata,

Naoshi Kaneko; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nagata_2025_ICCV, author = {Nagata, Kazuma and Kaneko, Naoshi}, title = {DACoN: DINO for Anime Paint Bucket Colorization with Any Number of Reference Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17899-17908} }
Beyond Single Images: Retrieval Self-Augmented Unsupervised Camouflaged Object Detection: Ji Du,

Xin Wang,

Fangwei Hao,

Mingyang Yu,

Chunyuan Chen,

Jiesheng Wu,

Bin Wang,

Jing Xu,

Ping Li; [pdf] [supp]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Ji and Wang, Xin and Hao, Fangwei and Yu, Mingyang and Chen, Chunyuan and Wu, Jiesheng and Wang, Bin and Xu, Jing and Li, Ping}, title = {Beyond Single Images: Retrieval Self-Augmented Unsupervised Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22131-22142} }
Multi-identity Human Image Animation with Structural Video Diffusion: Zhenzhi Wang,

Yixuan Li,

Yanhong Zeng,

Yuwei Guo,

Dahua Lin,

Tianfan Xue,

Bo Dai; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zhenzhi and Li, Yixuan and Zeng, Yanhong and Guo, Yuwei and Lin, Dahua and Xue, Tianfan and Dai, Bo}, title = {Multi-identity Human Image Animation with Structural Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11937-11947} }
UniVG: A Generalist Diffusion Model for Unified Image Generation and Editing: Tsu-Jui Fu,

Yusu Qian,

Chen Chen,

Wenze Hu,

Zhe Gan,

Yinfei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Tsu-Jui and Qian, Yusu and Chen, Chen and Hu, Wenze and Gan, Zhe and Yang, Yinfei}, title = {UniVG: A Generalist Diffusion Model for Unified Image Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17160-17170} }
CLOT: Closed Loop Optimal Transport for Unsupervised Action Segmentation: Elena Bueno-Benito,

Mariella Dimiccoli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bueno-Benito_2025_ICCV, author = {Bueno-Benito, Elena and Dimiccoli, Mariella}, title = {CLOT: Closed Loop Optimal Transport for Unsupervised Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10719-10729} }
GeoSplatting: Towards Geometry Guided Gaussian Splatting for Physically-based Inverse Rendering: Kai Ye,

Chong Gao,

Guanbin Li,

Wenzheng Chen,

Baoquan Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_ICCV, author = {Ye, Kai and Gao, Chong and Li, Guanbin and Chen, Wenzheng and Chen, Baoquan}, title = {GeoSplatting: Towards Geometry Guided Gaussian Splatting for Physically-based Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28991-29000} }
Temporal-aware Query Routing for Real-time Video Instance Segmentation: Zesen Cheng,

Kehan Li,

Yian Zhao,

Hang Zhang,

Chang Liu,

Jie Chen; [pdf]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Zesen and Li, Kehan and Zhao, Yian and Zhang, Hang and Liu, Chang and Chen, Jie}, title = {Temporal-aware Query Routing for Real-time Video Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22467-22476} }
Accelerating Diffusion Sampling via Exploiting Local Transition Coherence: Shangwen Zhu,

Han Zhang,

Zhantao Yang,

Qianyu Peng,

Zhao Pu,

Huangji Wang,

Fan Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Shangwen and Zhang, Han and Yang, Zhantao and Peng, Qianyu and Pu, Zhao and Wang, Huangji and Cheng, Fan}, title = {Accelerating Diffusion Sampling via Exploiting Local Transition Coherence}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18284-18293} }
What Changed? Detecting and Evaluating Instruction-Guided Image Edits with Multimodal Large Language Models: Lorenzo Baraldi,

Davide Bucciarelli,

Federico Betti,

Marcella Cornia,

Lorenzo Baraldi,

Nicu Sebe,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Baraldi_2025_ICCV, author = {Baraldi, Lorenzo and Bucciarelli, Davide and Betti, Federico and Cornia, Marcella and Baraldi, Lorenzo and Sebe, Nicu and Cucchiara, Rita}, title = {What Changed? Detecting and Evaluating Instruction-Guided Image Edits with Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16217-16226} }
Saliency-Aware Quantized Imitation Learning for Efficient Robotic Control: Seongmin Park,

Hyungmin Kim,

Sangwoo Kim,

Wonseok Jeon,

Juyoung Yang,

Byeongwook Jeon,

Yoonseon Oh,

Jungwook Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Seongmin and Kim, Hyungmin and Kim, Sangwoo and Jeon, Wonseok and Yang, Juyoung and Jeon, Byeongwook and Oh, Yoonseon and Choi, Jungwook}, title = {Saliency-Aware Quantized Imitation Learning for Efficient Robotic Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13140-13150} }
Where am I? Cross-View Geo-localization with Natural Language Descriptions: Junyan Ye,

Honglin Lin,

Leyan Ou,

Dairong Chen,

Zihao Wang,

Qi Zhu,

Conghui He,

Weijia Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Ye_2025_ICCV, author = {Ye, Junyan and Lin, Honglin and Ou, Leyan and Chen, Dairong and Wang, Zihao and Zhu, Qi and He, Conghui and Li, Weijia}, title = {Where am I? Cross-View Geo-localization with Natural Language Descriptions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5890-5900} }
Boost 3D Reconstruction using Diffusion-based Monocular Camera Calibration: Junyuan Deng,

Wei Yin,

Xiaoyang Guo,

Qian Zhang,

Xiaotao Hu,

Weiqiang Ren,

Xiao-Xiao Long,

Ping Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Junyuan and Yin, Wei and Guo, Xiaoyang and Zhang, Qian and Hu, Xiaotao and Ren, Weiqiang and Long, Xiao-Xiao and Tan, Ping}, title = {Boost 3D Reconstruction using Diffusion-based Monocular Camera Calibration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7110-7121} }
GTR: Guided Thought Reinforcement Prevents Thought Collapse in RL-based VLM Agent Training: Tong Wei,

Yijun Yang,

Junliang Xing,

Yuanchun Shi,

Zongqing Lu,

Deheng Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Tong and Yang, Yijun and Xing, Junliang and Shi, Yuanchun and Lu, Zongqing and Ye, Deheng}, title = {GTR: Guided Thought Reinforcement Prevents Thought Collapse in RL-based VLM Agent Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18855-18865} }
Multi-view Gaze Target Estimation: Qiaomu Miao,

Vivek Raju Golani,

Jingyi Xu,

Progga Paromita Dutta,

Minh Hoai,

Dimitris Samaras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miao_2025_ICCV, author = {Miao, Qiaomu and Golani, Vivek Raju and Xu, Jingyi and Dutta, Progga Paromita and Hoai, Minh and Samaras, Dimitris}, title = {Multi-view Gaze Target Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5371-5381} }
Semantic Discrepancy-aware Detector for Image Forgery Identification: Ziye Wang,

Minghang Yu,

Chunyan Xu,

Zhen Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ziye and Yu, Minghang and Xu, Chunyan and Cui, Zhen}, title = {Semantic Discrepancy-aware Detector for Image Forgery Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18388-18398} }
CapeLLM: Support-Free Category-Agnostic Pose Estimation with Multimodal Large Language Models: Junho Kim,

Hyungjin Chung,

Byung-Hoon Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Junho and Chung, Hyungjin and Kim, Byung-Hoon}, title = {CapeLLM: Support-Free Category-Agnostic Pose Estimation with Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22889-22898} }
VisHall3D: Monocular Semantic Scene Completion from Reconstructing the Visible Regions to Hallucinating the Invisible Regions: Haoang Lu,

Yuanqi Su,

Xiaoning Zhang,

Longjun Gao,

Yu Xue,

Le Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Haoang and Su, Yuanqi and Zhang, Xiaoning and Gao, Longjun and Xue, Yu and Wang, Le}, title = {VisHall3D: Monocular Semantic Scene Completion from Reconstructing the Visible Regions to Hallucinating the Invisible Regions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28674-28684} }
Controllable 3D Outdoor Scene Generation via Scene Graphs: Yuheng Liu,

Xinke Li,

Yuning Zhang,

Lu Qi,

Xin Li,

Wenping Wang,

Chongshou Li,

Xueting Li,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yuheng and Li, Xinke and Zhang, Yuning and Qi, Lu and Li, Xin and Wang, Wenping and Li, Chongshou and Li, Xueting and Yang, Ming-Hsuan}, title = {Controllable 3D Outdoor Scene Generation via Scene Graphs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28052-28062} }
JPEG Processing Neural Operator for Backward-Compatible Coding: Woo Kyoung Han,

Yongjun Lee,

Byeonghun Lee,

Sang Hyun Park,

Sunghoon Im,

Kyong Hwan Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Woo Kyoung and Lee, Yongjun and Lee, Byeonghun and Park, Sang Hyun and Im, Sunghoon and Jin, Kyong Hwan}, title = {JPEG Processing Neural Operator for Backward-Compatible Coding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19503-19512} }
DPoser-X: Diffusion Model as Robust 3D Whole-body Human Pose Prior: Junzhe Lu,

Jing Lin,

Hongkun Dou,

Ailing Zeng,

Yue Deng,

Xian Liu,

Zhongang Cai,

Lei Yang,

Yulun Zhang,

Haoqian Wang,

Ziwei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Junzhe and Lin, Jing and Dou, Hongkun and Zeng, Ailing and Deng, Yue and Liu, Xian and Cai, Zhongang and Yang, Lei and Zhang, Yulun and Wang, Haoqian and Liu, Ziwei}, title = {DPoser-X: Diffusion Model as Robust 3D Whole-body Human Pose Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9988-9997} }
Learning 4D Embodied World Models: Haoyu Zhen,

Qiao Sun,

Hongxin Zhang,

Junyan Li,

Siyuan Zhou,

Yilun Du,

Chuang Gan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhen_2025_ICCV, author = {Zhen, Haoyu and Sun, Qiao and Zhang, Hongxin and Li, Junyan and Zhou, Siyuan and Du, Yilun and Gan, Chuang}, title = {Learning 4D Embodied World Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5337-5347} }
FlowTok: Flowing Seamlessly Across Text and Image Tokens: Ju He,

Qihang Yu,

Qihao Liu,

Liang-Chieh Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Ju and Yu, Qihang and Liu, Qihao and Chen, Liang-Chieh}, title = {FlowTok: Flowing Seamlessly Across Text and Image Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16629-16640} }
PROGRESSOR: A Perceptually Guided Reward Estimator with Self-Supervised Online Refinement: Tewodros W. Ayalew,

Xiao Zhang,

Kevin Yuanbo Wu,

Tianchong Jiang,

Michael Maire,

Matthew R. Walter; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ayalew_2025_ICCV, author = {Ayalew, Tewodros W. and Zhang, Xiao and Wu, Kevin Yuanbo and Jiang, Tianchong and Maire, Michael and Walter, Matthew R.}, title = {PROGRESSOR: A Perceptually Guided Reward Estimator with Self-Supervised Online Refinement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10297-10306} }
UniOcc: A Unified Benchmark for Occupancy Forecasting and Prediction in Autonomous Driving: Yuping Wang,

Xiangyu Huang,

Xiaokang Sun,

Mingxuan Yan,

Shuo Xing,

Zhengzhong Tu,

Jiachen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuping and Huang, Xiangyu and Sun, Xiaokang and Yan, Mingxuan and Xing, Shuo and Tu, Zhengzhong and Li, Jiachen}, title = {UniOcc: A Unified Benchmark for Occupancy Forecasting and Prediction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25560-25570} }
AdaHuman: Animatable Detailed 3D Human Generation with Compositional Multiview Diffusion: Yangyi Huang,

Ye Yuan,

Xueting Li,

Jan Kautz,

Umar Iqbal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Yangyi and Yuan, Ye and Li, Xueting and Kautz, Jan and Iqbal, Umar}, title = {AdaHuman: Animatable Detailed 3D Human Generation with Compositional Multiview Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13533-13543} }
ReCamMaster: Camera-Controlled Generative Rendering from A Single Video: Jianhong Bai,

Menghan Xia,

Xiao Fu,

Xintao Wang,

Lianrui Mu,

Jinwen Cao,

Zuozhu Liu,

Haoji Hu,

Xiang Bai,

Pengfei Wan,

Di Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2025_ICCV, author = {Bai, Jianhong and Xia, Menghan and Fu, Xiao and Wang, Xintao and Mu, Lianrui and Cao, Jinwen and Liu, Zuozhu and Hu, Haoji and Bai, Xiang and Wan, Pengfei and Zhang, Di}, title = {ReCamMaster: Camera-Controlled Generative Rendering from A Single Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14834-14844} }
MaskHand: Generative Masked Modeling for Robust Hand Mesh Reconstruction in the Wild: Muhammad Usama Saleem,

Ekkasit Pinyoanuntapong,

Mayur Jagdishbhai Patel,

Hongfei Xue,

Ahmed Helmy,

Srijan Das,

Pu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saleem_2025_ICCV, author = {Saleem, Muhammad Usama and Pinyoanuntapong, Ekkasit and Patel, Mayur Jagdishbhai and Xue, Hongfei and Helmy, Ahmed and Das, Srijan and Wang, Pu}, title = {MaskHand: Generative Masked Modeling for Robust Hand Mesh Reconstruction in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8372-8383} }
Image-Guided Shape-from-Template Using Mesh Inextensibility Constraints: Thuy Tran,

Ruochen Chen,

Shaifali Parashar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2025_ICCV, author = {Tran, Thuy and Chen, Ruochen and Parashar, Shaifali}, title = {Image-Guided Shape-from-Template Using Mesh Inextensibility Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7419-7428} }
Confound from All Sides, Distill with Resilience: Multi-Objective Adversarial Paths to Zero-Shot Robustness: Junhao Dong,

Jiao Liu,

Xinghua Qu,

Yew-Soon Ong; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Junhao and Liu, Jiao and Qu, Xinghua and Ong, Yew-Soon}, title = {Confound from All Sides, Distill with Resilience: Multi-Objective Adversarial Paths to Zero-Shot Robustness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {624-634} }
NGD: Neural Gradient Based Deformation for Monocular Garment Reconstruction: Soham Dasgupta,

Shanthika Naik,

Preet Savalia,

Sujay Kumar Ingle,

Avinash Sharma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dasgupta_2025_ICCV, author = {Dasgupta, Soham and Naik, Shanthika and Savalia, Preet and Ingle, Sujay Kumar and Sharma, Avinash}, title = {NGD: Neural Gradient Based Deformation for Monocular Garment Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25485-25495} }
Learned Image Compression with Hierarchical Progressive Context Modeling: Yuqi Li,

Haotian Zhang,

Li Li,

Dong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yuqi and Zhang, Haotian and Li, Li and Liu, Dong}, title = {Learned Image Compression with Hierarchical Progressive Context Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18834-18843} }
U-ViLAR: Uncertainty-Aware Visual Localization for Autonomous Driving via Differentiable Association and Registration: Xiaofan Li,

Zhihao Xu,

Chenming Wu,

Zhao Yang,

Yumeng Zhang,

Jiang-Jiang Liu,

Haibao Yu,

Xiaoqing Ye,

Yuan Wang,

Shirui Li,

Xun Sun,

Ji Wan,

Jun Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xiaofan and Xu, Zhihao and Wu, Chenming and Yang, Zhao and Zhang, Yumeng and Liu, Jiang-Jiang and Yu, Haibao and Ye, Xiaoqing and Wang, Yuan and Li, Shirui and Sun, Xun and Wan, Ji and Wang, Jun}, title = {U-ViLAR: Uncertainty-Aware Visual Localization for Autonomous Driving via Differentiable Association and Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24889-24898} }
CF3: Compact and Fast 3D Feature Fields: Hyunjoon Lee,

Joonkyu Min,

Jaesik Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Hyunjoon and Min, Joonkyu and Park, Jaesik}, title = {CF3: Compact and Fast 3D Feature Fields}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27906-27916} }
Joint Self-Supervised Video Alignment and Action Segmentation: Ali Shah Ali,

Syed Ahmed Mahmood,

Mubin Saeed,

Andrey Konin,

M. Zeeshan Zia,

Quoc-Huy Tran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ali_2025_ICCV, author = {Ali, Ali Shah and Mahmood, Syed Ahmed and Saeed, Mubin and Konin, Andrey and Zia, M. Zeeshan and Tran, Quoc-Huy}, title = {Joint Self-Supervised Video Alignment and Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10807-10818} }
2.5 Years in Class: A Multimodal Textbook for Vision-Language Pretraining: Wenqi Zhang,

Hang Zhang,

Xin Li,

Jiashuo Sun,

Yongliang Shen,

Weiming Lu,

Deli Zhao,

Yueting Zhuang,

Lidong Bing; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wenqi and Zhang, Hang and Li, Xin and Sun, Jiashuo and Shen, Yongliang and Lu, Weiming and Zhao, Deli and Zhuang, Yueting and Bing, Lidong}, title = {2.5 Years in Class: A Multimodal Textbook for Vision-Language Pretraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4647-4658} }
Learning 3D Scene Analogies with Neural Contextual Scene Maps: Junho Kim,

Gwangtak Bae,

Eun Sun Lee,

Young Min Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Junho and Bae, Gwangtak and Lee, Eun Sun and Kim, Young Min}, title = {Learning 3D Scene Analogies with Neural Contextual Scene Maps}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7828-7840} }
Are They the Same? Exploring Visual Correspondence Shortcomings of Multimodal LLMs: Yikang Zhou,

Tao Zhang,

Shilin Xu,

Shihao Chen,

Qianyu Zhou,

Yunhai Tong,

Shunping Ji,

Jiangning Zhang,

Lu Qi,

Xiangtai Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Yikang and Zhang, Tao and Xu, Shilin and Chen, Shihao and Zhou, Qianyu and Tong, Yunhai and Ji, Shunping and Zhang, Jiangning and Qi, Lu and Li, Xiangtai}, title = {Are They the Same? Exploring Visual Correspondence Shortcomings of Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17663-17674} }
FlowSeek: Optical Flow Made Easier with Depth Foundation Models and Motion Bases: Matteo Poggi,

Fabio Tosi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Poggi_2025_ICCV, author = {Poggi, Matteo and Tosi, Fabio}, title = {FlowSeek: Optical Flow Made Easier with Depth Foundation Models and Motion Bases}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5667-5679} }
EventUPS: Uncalibrated Photometric Stereo Using an Event Camera: Jinxiu Liang,

Bohan Yu,

Siqi Yang,

Haotian Zhuang,

Jieji Ren,

Peiqi Duan,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Jinxiu and Yu, Bohan and Yang, Siqi and Zhuang, Haotian and Ren, Jieji and Duan, Peiqi and Shi, Boxin}, title = {EventUPS: Uncalibrated Photometric Stereo Using an Event Camera}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7516-7525} }
WildSAT: Learning Satellite Image Representations from Wildlife Observations: Rangel Daroya,

Elijah Cole,

Oisin Mac Aodha,

Grant Van Horn,

Subhransu Maji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Daroya_2025_ICCV, author = {Daroya, Rangel and Cole, Elijah and Mac Aodha, Oisin and Van Horn, Grant and Maji, Subhransu}, title = {WildSAT: Learning Satellite Image Representations from Wildlife Observations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6143-6154} }
Flow to the Mode: Mode-Seeking Diffusion Autoencoders for State-of-the-Art Image Tokenization: Kyle Sargent,

Kyle Hsu,

Justin Johnson,

Li Fei-Fei,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sargent_2025_ICCV, author = {Sargent, Kyle and Hsu, Kyle and Johnson, Justin and Fei-Fei, Li and Wu, Jiajun}, title = {Flow to the Mode: Mode-Seeking Diffusion Autoencoders for State-of-the-Art Image Tokenization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19471-19481} }
UniPhys: Unified Planner and Controller with Diffusion for Flexible Physics-Based Character Control: Yan Wu,

Korrawe Karunratanakul,

Zhengyi Luo,

Siyu Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Yan and Karunratanakul, Korrawe and Luo, Zhengyi and Tang, Siyu}, title = {UniPhys: Unified Planner and Controller with Diffusion for Flexible Physics-Based Character Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13214-13224} }
SPADE: Spatial-Aware Denoising Network for Open-vocabulary Panoptic Scene Graph Generation with Long- and Local-range Context Reasoning: Xin Hu,

Ke Qin,

Guiduo Duan,

Ming Li,

Yuan-Fang Li,

Tao He; [pdf]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Xin and Qin, Ke and Duan, Guiduo and Li, Ming and Li, Yuan-Fang and He, Tao}, title = {SPADE: Spatial-Aware Denoising Network for Open-vocabulary Panoptic Scene Graph Generation with Long- and Local-range Context Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15562-15572} }
Few-Shot Pattern Detection via Template Matching and Regression: Eunchan Jo,

Dahyun Kang,

Sanghyun Kim,

Yunseon Choi,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jo_2025_ICCV, author = {Jo, Eunchan and Kang, Dahyun and Kim, Sanghyun and Choi, Yunseon and Cho, Minsu}, title = {Few-Shot Pattern Detection via Template Matching and Regression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21578-21588} }
Humans as Checkerboards: Calibrating Camera Motion Scale for World-Coordinate Human Mesh Recovery: Fengyuan Yang,

Kerui Gu,

Ha Linh Nguyen,

Tze Ho Elden Tse,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Fengyuan and Gu, Kerui and Nguyen, Ha Linh and Tse, Tze Ho Elden and Yao, Angela}, title = {Humans as Checkerboards: Calibrating Camera Motion Scale for World-Coordinate Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6069-6079} }
Leveraging BEV Paradigm for Ground-to-Aerial Image Synthesis: Junyan Ye,

Jun He,

Weijia Li,

Zhutao Lv,

Yi Lin,

Jinhua Yu,

Haote Yang,

Conghui He; [pdf] [arXiv]
[bibtex]
@InProceedings{Ye_2025_ICCV, author = {Ye, Junyan and He, Jun and Li, Weijia and Lv, Zhutao and Lin, Yi and Yu, Jinhua and Yang, Haote and He, Conghui}, title = {Leveraging BEV Paradigm for Ground-to-Aerial Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28451-28461} }
Deciphering Cross-Modal Alignment in Large Vision-Language Models via Modality Integration Rate: Qidong Huang,

Xiaoyi Dong,

Pan Zhang,

Yuhang Zang,

Yuhang Cao,

Jiaqi Wang,

Weiming Zhang,

Nenghai Yu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Qidong and Dong, Xiaoyi and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and Wang, Jiaqi and Zhang, Weiming and Yu, Nenghai}, title = {Deciphering Cross-Modal Alignment in Large Vision-Language Models via Modality Integration Rate}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {218-227} }
FPEM: Face Prior Enhanced Facial Attractiveness Prediction for Live Videos with Face Retouching: Hui Li,

Xiaoyu Ren,

Hongjiu Yu,

Ying Chen,

Kai Li,

L Wang,

Xiongkuo Min,

Huiyu Duan,

Guangtao Zhai,

Xu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Hui and Ren, Xiaoyu and Yu, Hongjiu and Chen, Ying and Li, Kai and Wang, L and Min, Xiongkuo and Duan, Huiyu and Zhai, Guangtao and Liu, Xu}, title = {FPEM: Face Prior Enhanced Facial Attractiveness Prediction for Live Videos with Face Retouching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11458-11468} }
MultiverSeg: Scalable Interactive Segmentation of Biomedical Imaging Datasets with In-Context Guidance: Hallee E. Wong,

Jose Javier Gonzalez Ortiz,

John Guttag,

Adrian V. Dalca; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wong_2025_ICCV, author = {Wong, Hallee E. and Ortiz, Jose Javier Gonzalez and Guttag, John and Dalca, Adrian V.}, title = {MultiverSeg: Scalable Interactive Segmentation of Biomedical Imaging Datasets with In-Context Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20966-20980} }
Is Meta-Learning Out? Rethinking Unsupervised Few-Shot Classification with Limited Entropy: Yunchuan Guan,

Yu Liu,

Ke Zhou,

Zhiqi Shen,

Jenq-Neng Hwang,

Serge Belongie,

Lei Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2025_ICCV, author = {Guan, Yunchuan and Liu, Yu and Zhou, Ke and Shen, Zhiqi and Hwang, Jenq-Neng and Belongie, Serge and Li, Lei}, title = {Is Meta-Learning Out? Rethinking Unsupervised Few-Shot Classification with Limited Entropy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4188-4197} }
Synergistic Prompting for Robust Visual Recognition with Missing Modalities: Zhihui Zhang,

Luanyuan Dai,

Qika Lin,

Yunfeng Diao,

Guangyin Jin,

Yufei Guo,

Jing Zhang,

Xiaoshuai Hao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zhihui and Dai, Luanyuan and Lin, Qika and Diao, Yunfeng and Jin, Guangyin and Guo, Yufei and Zhang, Jing and Hao, Xiaoshuai}, title = {Synergistic Prompting for Robust Visual Recognition with Missing Modalities}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1881-1890} }
DAMap: Distance-aware MapNet for High Quality HD Map Construction: Jinpeng Dong,

Chen Li,

Yutong Lin,

Jingwen Fu,

Sanping Zhou,

Nanning Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Jinpeng and Li, Chen and Lin, Yutong and Fu, Jingwen and Zhou, Sanping and Zheng, Nanning}, title = {DAMap: Distance-aware MapNet for High Quality HD Map Construction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5285-5294} }
Magic Insert: Style-Aware Drag-and-Drop: Nataniel Ruiz,

Yuanzhen Li,

Neal Wadhwa,

Yael Pritch,

Michael Rubinstein,

David E. Jacobs,

Shlomi Fruchter; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ruiz_2025_ICCV, author = {Ruiz, Nataniel and Li, Yuanzhen and Wadhwa, Neal and Pritch, Yael and Rubinstein, Michael and Jacobs, David E. and Fruchter, Shlomi}, title = {Magic Insert: Style-Aware Drag-and-Drop}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15971-15981} }
ProGait: A Multi-Purpose Video Dataset and Benchmark for Transfemoral Prosthesis Users: Xiangyu Yin,

Boyuan Yang,

Weichen Liu,

Qiyao Xue,

Abrar Alamri,

Goeran Fiedler,

Wei Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_ICCV, author = {Yin, Xiangyu and Yang, Boyuan and Liu, Weichen and Xue, Qiyao and Alamri, Abrar and Fiedler, Goeran and Gao, Wei}, title = {ProGait: A Multi-Purpose Video Dataset and Benchmark for Transfemoral Prosthesis Users}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8984-8993} }
MuGS: Multi-Baseline Generalizable Gaussian Splatting Reconstruction: Yaopeng Lou,

Liao Shen,

Tianqi Liu,

Jiaqi Li,

Zihao Huang,

Huiqiang Sun,

Zhiguo Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lou_2025_ICCV, author = {Lou, Yaopeng and Shen, Liao and Liu, Tianqi and Li, Jiaqi and Huang, Zihao and Sun, Huiqiang and Cao, Zhiguo}, title = {MuGS: Multi-Baseline Generalizable Gaussian Splatting Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25583-25593} }
HADES: Human Avatar with Dynamic Explicit Hair Strands: Zhanfeng Liao,

Hanzhang Tu,

Cheng Peng,

Hongwen Zhang,

Boyao Zhou,

Yebin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Zhanfeng and Tu, Hanzhang and Peng, Cheng and Zhang, Hongwen and Zhou, Boyao and Liu, Yebin}, title = {HADES: Human Avatar with Dynamic Explicit Hair Strands}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12318-12327} }
Aligning Effective Tokens with Video Anomaly in Large Language Models: Yingxian Chen,

Jiahui Liu,

Ruidi Fan,

Yanwei Li,

Chirui Chang,

Shizhen Zhao,

Wilton W. T. Fok,

Xiaojuan Qi,

Yik-Chung Wu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yingxian and Liu, Jiahui and Fan, Ruidi and Li, Yanwei and Chang, Chirui and Zhao, Shizhen and Fok, Wilton W. T. and Qi, Xiaojuan and Wu, Yik-Chung}, title = {Aligning Effective Tokens with Video Anomaly in Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22695-22706} }
PseudoMapTrainer: Learning Online Mapping without HD Maps: Christian Löwens,

Thorben Funke,

Jingchao Xie,

Alexandru Paul Condurache; [pdf] [supp]
[bibtex]
@InProceedings{Lowens_2025_ICCV, author = {L\"owens, Christian and Funke, Thorben and Xie, Jingchao and Condurache, Alexandru Paul}, title = {PseudoMapTrainer: Learning Online Mapping without HD Maps}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5263-5272} }
From Reflection to Perfection: Scaling Inference-Time Optimization for Text-to-Image Diffusion Models via Reflection Tuning: Le Zhuo,

Liangbing Zhao,

Sayak Paul,

Yue Liao,

Renrui Zhang,

Yi Xin,

Peng Gao,

Mohamed Elhoseiny,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuo_2025_ICCV, author = {Zhuo, Le and Zhao, Liangbing and Paul, Sayak and Liao, Yue and Zhang, Renrui and Xin, Yi and Gao, Peng and Elhoseiny, Mohamed and Li, Hongsheng}, title = {From Reflection to Perfection: Scaling Inference-Time Optimization for Text-to-Image Diffusion Models via Reflection Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15329-15339} }
FreeDance: Towards Harmonic Free-Number Group Dance Generation via a Unified Framework: Yiwen Zhao,

Yang Wang,

Liting Wen,

Hengyuan Zhang,

Xingqun Qi; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Yiwen and Wang, Yang and Wen, Liting and Zhang, Hengyuan and Qi, Xingqun}, title = {FreeDance: Towards Harmonic Free-Number Group Dance Generation via a Unified Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10560-10569} }
Beyond the Frame: Generating 360deg Panoramic Videos from Perspective Videos: Rundong Luo,

Matthew Wallingford,

Ali Fahardi,

Noah Snavely,

Wei-Chiu Ma; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Rundong and Wallingford, Matthew and Fahardi, Ali and Snavely, Noah and Ma, Wei-Chiu}, title = {Beyond the Frame: Generating 360deg Panoramic Videos from Perspective Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14336-14345} }
Multi-turn Consistent Image Editing: Zijun Zhou,

Yingying Deng,

Xiangyu He,

Weiming Dong,

Fan Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zijun and Deng, Yingying and He, Xiangyu and Dong, Weiming and Tang, Fan}, title = {Multi-turn Consistent Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15792-15801} }
HUMOTO: A 4D Dataset of Mocap Human Object Interactions: Jiaxin Lu,

Chun-Hao Paul Huang,

Uttaran Bhattacharya,

Qixing Huang,

Yi Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Jiaxin and Huang, Chun-Hao Paul and Bhattacharya, Uttaran and Huang, Qixing and Zhou, Yi}, title = {HUMOTO: A 4D Dataset of Mocap Human Object Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10886-10897} }
Learning Visual Hierarchies in Hyperbolic Space for Image Retrieval: Ziwei Wang,

Sameera Ramasinghe,

Chenchen Xu,

Julien Monteil,

Loris Bazzani,

Thalaiyasingam Ajanthan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ziwei and Ramasinghe, Sameera and Xu, Chenchen and Monteil, Julien and Bazzani, Loris and Ajanthan, Thalaiyasingam}, title = {Learning Visual Hierarchies in Hyperbolic Space for Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9924-9934} }
AutoPrompt: Automated Red-Teaming of Text-to-Image Models via LLM-Driven Adversarial Prompts: Yufan Liu,

Wanqian Zhang,

Huashan Chen,

Lin Wang,

Xiaojun Jia,

Zheng Lin,

Weiping Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yufan and Zhang, Wanqian and Chen, Huashan and Wang, Lin and Jia, Xiaojun and Lin, Zheng and Wang, Weiping}, title = {AutoPrompt: Automated Red-Teaming of Text-to-Image Models via LLM-Driven Adversarial Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17557-17566} }
LLM-assisted Entropy-based Adaptive Distillation for Unsupervised Fine-grained Visual Representation Learning: Jianfeng Dong,

Danfeng Luo,

Daizong Liu,

Jie Sun,

Xiaoye Qu,

Xun Yang,

Dongsheng Liu,

Xun Wang; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Jianfeng and Luo, Danfeng and Liu, Daizong and Sun, Jie and Qu, Xiaoye and Yang, Xun and Liu, Dongsheng and Wang, Xun}, title = {LLM-assisted Entropy-based Adaptive Distillation for Unsupervised Fine-grained Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {383-392} }
Jailbreaking Multimodal Large Language Models via Shuffle Inconsistency: Shiji Zhao,

Ranjie Duan,

Fengxiang Wang,

Chi Chen,

Caixin Kang,

Shouwei Ruan,

Jialing Tao,

YueFeng Chen,

Hui Xue,

Xingxing Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Shiji and Duan, Ranjie and Wang, Fengxiang and Chen, Chi and Kang, Caixin and Ruan, Shouwei and Tao, Jialing and Chen, YueFeng and Xue, Hui and Wei, Xingxing}, title = {Jailbreaking Multimodal Large Language Models via Shuffle Inconsistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2045-2054} }
UMDATrack: Unified Multi-Domain Adaptive Tracking Under Adverse Weather Conditions: Siyuan Yao,

Rui Zhu,

Ziqi Wang,

Wenqi Ren,

Yanyang Yan,

Xiaochun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_ICCV, author = {Yao, Siyuan and Zhu, Rui and Wang, Ziqi and Ren, Wenqi and Yan, Yanyang and Cao, Xiaochun}, title = {UMDATrack: Unified Multi-Domain Adaptive Tracking Under Adverse Weather Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6466-6475} }
Boosting MLLM Reasoning with Text-Debiased Hint-GRPO: Qihan Huang,

Weilong Dai,

Jinlong Liu,

Wanggui He,

Hao Jiang,

Mingli Song,

Jingyuan Chen,

Chang Yao,

Jie Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Qihan and Dai, Weilong and Liu, Jinlong and He, Wanggui and Jiang, Hao and Song, Mingli and Chen, Jingyuan and Yao, Chang and Song, Jie}, title = {Boosting MLLM Reasoning with Text-Debiased Hint-GRPO}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4848-4857} }
Learning on the Go: A Meta-learning Object Navigation Model: Xiaorong Qin,

Xinhang Song,

Sixian Zhang,

Xinyao Yu,

Xinmiao Zhang,

Shuqiang Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Xiaorong and Song, Xinhang and Zhang, Sixian and Yu, Xinyao and Zhang, Xinmiao and Jiang, Shuqiang}, title = {Learning on the Go: A Meta-learning Object Navigation Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8939-8949} }
Visual Interestingness Decoded: How GPT-4o Mirrors Human Interests: Fitim Abdullahu,

Helmut Grabner; [pdf] [supp]
[bibtex]
@InProceedings{Abdullahu_2025_ICCV, author = {Abdullahu, Fitim and Grabner, Helmut}, title = {Visual Interestingness Decoded: How GPT-4o Mirrors Human Interests}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15350-15364} }
PriOr-Flow: Enhancing Primitive Panoramic Optical Flow with Orthogonal View: Longliang Liu,

Miaojie Feng,

Junda Cheng,

Jijun Xiang,

Xuan Zhu,

Xin Yang; [pdf]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Longliang and Feng, Miaojie and Cheng, Junda and Xiang, Jijun and Zhu, Xuan and Yang, Xin}, title = {PriOr-Flow: Enhancing Primitive Panoramic Optical Flow with Orthogonal View}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5326-5336} }
A Simple yet Mighty Hartley Diffusion Versatilist for Generalizable Dense Vision Tasks: Qi Bi,

Jingjun Yi,

Huimin Huang,

Hao Zheng,

Haolan Zhan,

Wei Ji,

Yawen Huang,

Yuexiang Li,

Yefeng Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Bi_2025_ICCV, author = {Bi, Qi and Yi, Jingjun and Huang, Huimin and Zheng, Hao and Zhan, Haolan and Ji, Wei and Huang, Yawen and Li, Yuexiang and Zheng, Yefeng}, title = {A Simple yet Mighty Hartley Diffusion Versatilist for Generalizable Dense Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6748-6760} }
Separation for Better Integration: Disentangling Edge and Motion in Event-based Deblurring: Yufei Zhu,

Hao Chen,

Yongjian Deng,

Wei You; [pdf]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yufei and Chen, Hao and Deng, Yongjian and You, Wei}, title = {Separation for Better Integration: Disentangling Edge and Motion in Event-based Deblurring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14732-14742} }
Bitrate-Controlled Diffusion for Disentangling Motion and Content in Video: Xiao Li,

Qi Chen,

Xiulian Peng,

Kai Yu,

Xie Chen,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xiao and Chen, Qi and Peng, Xiulian and Yu, Kai and Chen, Xie and Lu, Yan}, title = {Bitrate-Controlled Diffusion for Disentangling Motion and Content in Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12904-12914} }
LGA-Net: Learning Local and Global Affinities for Sparse Scribble based Image Colorization: Hongjin Lyu,

Bo Li,

Paul L. Rosin,

Yu-Kun Lai; [pdf] [supp]
[bibtex]
@InProceedings{Lyu_2025_ICCV, author = {Lyu, Hongjin and Li, Bo and Rosin, Paul L. and Lai, Yu-Kun}, title = {LGA-Net: Learning Local and Global Affinities for Sparse Scribble based Image Colorization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8144-8153} }
Benchmarking Burst Super-Resolution for Polarization Images: Noise Dataset and Analysis: Inseung Hwang,

Kiseok Choi,

Hyunho Ha,

Min H. Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hwang_2025_ICCV, author = {Hwang, Inseung and Choi, Kiseok and Ha, Hyunho and Kim, Min H.}, title = {Benchmarking Burst Super-Resolution for Polarization Images: Noise Dataset and Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24899-24909} }
JointDiT: Enhancing RGB-Depth Joint Modeling with Diffusion Transformers: Kwon Byung-Ki,

Qi Dai,

Lee Hyoseok,

Chong Luo,

Tae-Hyun Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Byung-Ki_2025_ICCV, author = {Byung-Ki, Kwon and Dai, Qi and Hyoseok, Lee and Luo, Chong and Oh, Tae-Hyun}, title = {JointDiT: Enhancing RGB-Depth Joint Modeling with Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25261-25271} }
Discretized Gaussian Representation for Tomographic Reconstruction: Shaokai Wu,

Yuxiang Lu,

Yapan Guo,

Wei Ji,

Suizhi Huang,

Fengyu Yang,

Shalayiding Sirejiding,

Qichen He,

Jing Tong,

Yanbiao Ji,

Yue Ding,

Hongtao Lu; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Shaokai and Lu, Yuxiang and Guo, Yapan and Ji, Wei and Huang, Suizhi and Yang, Fengyu and Sirejiding, Shalayiding and He, Qichen and Tong, Jing and Ji, Yanbiao and Ding, Yue and Lu, Hongtao}, title = {Discretized Gaussian Representation for Tomographic Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25073-25082} }
ScoreHOI: Physically Plausible Reconstruction of Human-Object Interaction via Score-Guided Diffusion: Ao Li,

Jinpeng Liu,

Yixuan Zhu,

Yansong Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Ao and Liu, Jinpeng and Zhu, Yixuan and Tang, Yansong}, title = {ScoreHOI: Physically Plausible Reconstruction of Human-Object Interaction via Score-Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7592-7602} }
CMB-ML: A Cosmic Microwave Background Dataset for the Oldest Possible Computer Vision Task: James Amato,

Yunan Xie,

Leonel Medina-Varela,

Ammar Aljerwi,

Adam McCutcheon,

T. Seth Rippentrop,

Kristian Gonzalez,

Jacques Delabrouille,

Mustapha Ishak,

Nicholas Ruozzi; [pdf] [supp]
[bibtex]
@InProceedings{Amato_2025_ICCV, author = {Amato, James and Xie, Yunan and Medina-Varela, Leonel and Aljerwi, Ammar and McCutcheon, Adam and Rippentrop, T. Seth and Gonzalez, Kristian and Delabrouille, Jacques and Ishak, Mustapha and Ruozzi, Nicholas}, title = {CMB-ML: A Cosmic Microwave Background Dataset for the Oldest Possible Computer Vision Task}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9418-9430} }
Deeply Supervised Flow-Based Generative Models: Inkyu Shin,

Chenglin Yang,

Liang-Chieh Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shin_2025_ICCV, author = {Shin, Inkyu and Yang, Chenglin and Chen, Liang-Chieh}, title = {Deeply Supervised Flow-Based Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16535-16544} }
Towards Efficient General Feature Prediction in Masked Skeleton Modeling: Shengkai Sun,

Zefan Zhang,

Jianfeng Dong,

Zhiyong Cheng,

Xiaojun Chang,

Meng Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Shengkai and Zhang, Zefan and Dong, Jianfeng and Cheng, Zhiyong and Chang, Xiaojun and Wang, Meng}, title = {Towards Efficient General Feature Prediction in Masked Skeleton Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12212-12221} }
Dynamic Point Maps: A Versatile Representation for Dynamic 3D Reconstruction: Edgar Sucar,

Zihang Lai,

Eldar Insafutdinov,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sucar_2025_ICCV, author = {Sucar, Edgar and Lai, Zihang and Insafutdinov, Eldar and Vedaldi, Andrea}, title = {Dynamic Point Maps: A Versatile Representation for Dynamic 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7295-7305} }
Language Decoupling with Fine-grained Knowledge Guidance for Referring Multi-object Tracking: Guangyao Li,

Siping Zhuang,

Yajun Jian,

Yan Yan,

Hanzi Wang; [pdf]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Guangyao and Zhuang, Siping and Jian, Yajun and Yan, Yan and Wang, Hanzi}, title = {Language Decoupling with Fine-grained Knowledge Guidance for Referring Multi-object Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23626-23635} }
Timestep-Aware Diffusion Model for Extreme Image Rescaling: Ce Wang,

Zhenyu Hu,

Wanjie Sun,

Zhenzhong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ce and Hu, Zhenyu and Sun, Wanjie and Chen, Zhenzhong}, title = {Timestep-Aware Diffusion Model for Extreme Image Rescaling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15594-15603} }
COVTrack: Continuous Open-Vocabulary Tracking via Adaptive Multi-Cue Fusion: Zekun Qian,

Ruize Han,

Zhixiang Wang,

Junhui Hou,

Wei Feng; [pdf]
[bibtex]
@InProceedings{Qian_2025_ICCV, author = {Qian, Zekun and Han, Ruize and Wang, Zhixiang and Hou, Junhui and Feng, Wei}, title = {COVTrack: Continuous Open-Vocabulary Tracking via Adaptive Multi-Cue Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10054-10063} }
Toward Material-Agnostic System Identification from Videos: Yizhou Zhao,

Haoyu Chen,

Chunjiang Liu,

Zhenyang Li,

Charles Herrmann,

Junhwa Hur,

Yinxiao Li,

Ming-Hsuan Yang,

Bhiksha Raj,

Min Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Yizhou and Chen, Haoyu and Liu, Chunjiang and Li, Zhenyang and Herrmann, Charles and Hur, Junhwa and Li, Yinxiao and Yang, Ming-Hsuan and Raj, Bhiksha and Xu, Min}, title = {Toward Material-Agnostic System Identification from Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5944-5956} }
Referring Expression Comprehension for Small Objects: Kanoko Goto,

Takumi Hirose,

Mahiro Ukai,

Shuhei Kurita,

Nakamasa Inoue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Goto_2025_ICCV, author = {Goto, Kanoko and Hirose, Takumi and Ukai, Mahiro and Kurita, Shuhei and Inoue, Nakamasa}, title = {Referring Expression Comprehension for Small Objects}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21231-21242} }
Robust 3D-Masked Part-level Editing in 3D Gaussian Splatting with Regularized Score Distillation Sampling: Hayeon Kim,

Ji Ha Jang,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Hayeon and Jang, Ji Ha and Chun, Se Young}, title = {Robust 3D-Masked Part-level Editing in 3D Gaussian Splatting with Regularized Score Distillation Sampling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5501-5510} }
InstaDrive: Instance-Aware Driving World Models for Realistic and Consistent Video Generation: Zhuoran Yang,

Xi Guo,

Chenjing Ding,

Chiyu Wang,

Wei Wu,

Yanyong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zhuoran and Guo, Xi and Ding, Chenjing and Wang, Chiyu and Wu, Wei and Zhang, Yanyong}, title = {InstaDrive: Instance-Aware Driving World Models for Realistic and Consistent Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25410-25420} }
Effective Training Data Synthesis for Improving MLLM Chart Understanding: Yuwei Yang,

Zeyu Zhang,

Yunzhong Hou,

Zhuowan Li,

Gaowen Liu,

Ali Payani,

Yuan-Sen Ting,

Liang Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Yuwei and Zhang, Zeyu and Hou, Yunzhong and Li, Zhuowan and Liu, Gaowen and Payani, Ali and Ting, Yuan-Sen and Zheng, Liang}, title = {Effective Training Data Synthesis for Improving MLLM Chart Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2653-2663} }
DIVE: Taming DINO for Subject-Driven Video Editing: Yi Huang,

Wei Xiong,

He Zhang,

Chaoqi Chen,

Jianzhuang Liu,

Mingfu Yan,

Shifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Yi and Xiong, Wei and Zhang, He and Chen, Chaoqi and Liu, Jianzhuang and Yan, Mingfu and Chen, Shifeng}, title = {DIVE: Taming DINO for Subject-Driven Video Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16004-16014} }
Neural Architecture Search Driven by Locally Guided Diffusion for Personalized Federated Learning: Peng Liao,

Xilu Wang,

Yaochu Jin,

Wenli Du,

Han Hu; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Peng and Wang, Xilu and Jin, Yaochu and Du, Wenli and Hu, Han}, title = {Neural Architecture Search Driven by Locally Guided Diffusion for Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4222-4231} }
Global-Aware Monocular Semantic Scene Completion with State Space Models: Shijie Li,

Zhongyao Cheng,

Rong Li,

Shuai Li,

Juergen Gall,

Xun Xu,

Xulei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Shijie and Cheng, Zhongyao and Li, Rong and Li, Shuai and Gall, Juergen and Xu, Xun and Yang, Xulei}, title = {Global-Aware Monocular Semantic Scene Completion with State Space Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25550-25559} }
ULTHO: Ultra-Lightweight yet Efficient Hyperparameter Optimization in Deep Reinforcement Learning: Mingqi Yuan,

Bo Li,

Xin Jin,

Wenjun Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Mingqi and Li, Bo and Jin, Xin and Zeng, Wenjun}, title = {ULTHO: Ultra-Lightweight yet Efficient Hyperparameter Optimization in Deep Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2620-2630} }
GReg: Geometry-Aware Region Refinement for Sign Language Video Generation: Tongkai Shi,

Lianyu Hu,

Fanhua Shang,

Liqing Gao,

Wei Feng; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Tongkai and Hu, Lianyu and Shang, Fanhua and Gao, Liqing and Feng, Wei}, title = {GReg: Geometry-Aware Region Refinement for Sign Language Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16472-16481} }
CarGait: Cross-Attention based Re-ranking for Gait recognition: Gavriel Habib,

Noa Barzilay,

Or Shimshi,

Rami Ben-Ari,

Nir Darshan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Habib_2025_ICCV, author = {Habib, Gavriel and Barzilay, Noa and Shimshi, Or and Ben-Ari, Rami and Darshan, Nir}, title = {CarGait: Cross-Attention based Re-ranking for Gait recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11884-11894} }
Text2VDM: Text to Vector Displacement Maps for Expressive and Interactive 3D Sculpting: Hengyu Meng,

Duotun Wang,

Zhijing Shao,

Ligang Liu,

Zeyu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2025_ICCV, author = {Meng, Hengyu and Wang, Duotun and Shao, Zhijing and Liu, Ligang and Wang, Zeyu}, title = {Text2VDM: Text to Vector Displacement Maps for Expressive and Interactive 3D Sculpting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16882-16892} }
DiffVSR: Revealing an Effective Recipe for Taming Robust Video Super-Resolution Against Complex Degradations: Xiaohui Li,

Yihao Liu,

Shuo Cao,

Ziyan Chen,

Shaobin Zhuang,

Xiangyu Chen,

Yinan He,

Yi Wang,

Yu Qiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xiaohui and Liu, Yihao and Cao, Shuo and Chen, Ziyan and Zhuang, Shaobin and Chen, Xiangyu and He, Yinan and Wang, Yi and Qiao, Yu}, title = {DiffVSR: Revealing an Effective Recipe for Taming Robust Video Super-Resolution Against Complex Degradations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15319-15328} }
3D Gaussian Splatting Driven Multi-View Robust Physical Adversarial Camouflage Generation: Tianrui Lou,

Xiaojun Jia,

Siyuan Liang,

Jiawei Liang,

Ming Zhang,

Yanjun Xiao,

Xiaochun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lou_2025_ICCV, author = {Lou, Tianrui and Jia, Xiaojun and Liang, Siyuan and Liang, Jiawei and Zhang, Ming and Xiao, Yanjun and Cao, Xiaochun}, title = {3D Gaussian Splatting Driven Multi-View Robust Physical Adversarial Camouflage Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28752-28762} }
GenieBlue: Integrating both Linguistic and Multimodal Capabilities for Large Language Models on Mobile Devices: Xudong Lu,

Yinghao Chen,

Renshou Wu,

Haohao Gao,

Xi Chen,

Xue Yang,

Xiangyu Zhao,

Aojun Zhou,

Fangyuan Li,

Yafei Wen,

Xiaoxin Chen,

Shuai Ren,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Xudong and Chen, Yinghao and Wu, Renshou and Gao, Haohao and Chen, Xi and Yang, Xue and Zhao, Xiangyu and Zhou, Aojun and Li, Fangyuan and Wen, Yafei and Chen, Xiaoxin and Ren, Shuai and Li, Hongsheng}, title = {GenieBlue: Integrating both Linguistic and Multimodal Capabilities for Large Language Models on Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4198-4210} }
MambaML: Exploring State Space Models for Multi-Label Image Classification: Xuelin Zhu,

Jian Liu,

Jiuxin Cao,

Bing Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Xuelin and Liu, Jian and Cao, Jiuxin and Wang, Bing}, title = {MambaML: Exploring State Space Models for Multi-Label Image Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4743-4753} }
SuperMat: Physically Consistent PBR Material Estimation at Interactive Rates: Yijia Hong,

Yuan-Chen Guo,

Ran Yi,

Yulong Chen,

Yan-Pei Cao,

Lizhuang Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_ICCV, author = {Hong, Yijia and Guo, Yuan-Chen and Yi, Ran and Chen, Yulong and Cao, Yan-Pei and Ma, Lizhuang}, title = {SuperMat: Physically Consistent PBR Material Estimation at Interactive Rates}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25083-25093} }
Deep Adaptive Unfolded Network via Spatial Morphology Stripping and Spectral Filtration for Pan-sharpening: Hebaixu Wang,

Jiayi Ma; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Hebaixu and Ma, Jiayi}, title = {Deep Adaptive Unfolded Network via Spatial Morphology Stripping and Spectral Filtration for Pan-sharpening}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10730-10740} }
HiMTok: Learning Hierarchical Mask Tokens for Image Segmentation with Large Multimodal Model: Tao Wang,

Changxu Cheng,

Lingfeng Wang,

Senda Chen,

Wuyue Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Tao and Cheng, Changxu and Wang, Lingfeng and Chen, Senda and Zhao, Wuyue}, title = {HiMTok: Learning Hierarchical Mask Tokens for Image Segmentation with Large Multimodal Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23267-23278} }
MixA-Q: Revisiting Activation Sparsity for Vision Transformers from a Mixed-Precision Quantization Perspective: Weitian Wang,

Rai Shubham,

Cecilia De La Parra,

Akash Kumar; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Weitian and Shubham, Rai and De La Parra, Cecilia and Kumar, Akash}, title = {MixA-Q: Revisiting Activation Sparsity for Vision Transformers from a Mixed-Precision Quantization Perspective}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22143-22152} }
A Quality-Guided Mixture of Score-Fusion Experts Framework for Human Recognition: Jie Zhu,

Yiyang Su,

Minchul Kim,

Anil Jain,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jie and Su, Yiyang and Kim, Minchul and Jain, Anil and Liu, Xiaoming}, title = {A Quality-Guided Mixture of Score-Fusion Experts Framework for Human Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13076-13086} }
EMD: Explicit Motion Modeling for High-Quality Street Gaussian Splatting: Xiaobao Wei,

Qingpo Wuwu,

Zhongyu Zhao,

Zhuangzhe Wu,

Nan Huang,

Ming Lu,

Ningning Ma,

Shanghang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Xiaobao and Wuwu, Qingpo and Zhao, Zhongyu and Wu, Zhuangzhe and Huang, Nan and Lu, Ming and Ma, Ningning and Zhang, Shanghang}, title = {EMD: Explicit Motion Modeling for High-Quality Street Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28462-28472} }
MOVE: Motion-Guided Few-Shot Video Object Segmentation: Kaining Ying,

Hengrui Hu,

Henghui Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ying_2025_ICCV, author = {Ying, Kaining and Hu, Hengrui and Ding, Henghui}, title = {MOVE: Motion-Guided Few-Shot Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11632-11642} }
Neural Multi-View Self-Calibrated Photometric Stereo without Photometric Stereo Cues: Xu Cao,

Takafumi Taketomi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Xu and Taketomi, Takafumi}, title = {Neural Multi-View Self-Calibrated Photometric Stereo without Photometric Stereo Cues}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27552-27562} }
CO2-Net: A Physics-Informed Spatio-Temporal Model for Global Surface CO2 Reconstruction: Hao Zheng,

Yuting Zheng,

Hanbo Huang,

Chaofan Sun,

Enhui Liao,

Lin Liu,

Yi Han,

Hao Zhou,

Shiyu Liang; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Hao and Zheng, Yuting and Huang, Hanbo and Sun, Chaofan and Liao, Enhui and Liu, Lin and Han, Yi and Zhou, Hao and Liang, Shiyu}, title = {CO2-Net: A Physics-Informed Spatio-Temporal Model for Global Surface CO2 Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6220-6230} }
Multi-Modal Multi-Task Unified Embedding Model (M3T-UEM): A Task-Adaptive Representation Learning Framework: Rohan Sharma,

Changyou Chen,

Feng-Ju Chang,

Seongjun Yun,

Xiaohu Xie,

Rui Meng,

Dehong Xu,

Alejandro Mottini,

Qingjun Cui; [pdf] [supp]
[bibtex]
@InProceedings{Sharma_2025_ICCV, author = {Sharma, Rohan and Chen, Changyou and Chang, Feng-Ju and Yun, Seongjun and Xie, Xiaohu and Meng, Rui and Xu, Dehong and Mottini, Alejandro and Cui, Qingjun}, title = {Multi-Modal Multi-Task Unified Embedding Model (M3T-UEM): A Task-Adaptive Representation Learning Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22783-22793} }
Randomized Autoregressive Visual Generation: Qihang Yu,

Ju He,

Xueqing Deng,

Xiaohui Shen,

Liang-Chieh Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Qihang and He, Ju and Deng, Xueqing and Shen, Xiaohui and Chen, Liang-Chieh}, title = {Randomized Autoregressive Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18431-18441} }
Towards More Diverse and Challenging Pre-training for Point Cloud Learning: Self-Supervised Cross Reconstruction with Decoupled Views: Xiangdong Zhang,

Shaofeng Zhang,

Junchi Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiangdong and Zhang, Shaofeng and Yan, Junchi}, title = {Towards More Diverse and Challenging Pre-training for Point Cloud Learning: Self-Supervised Cross Reconstruction with Decoupled Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28696-28706} }
Baking Gaussian Splatting into Diffusion Denoiser for Fast and Scalable Single-stage Image-to-3D Generation and Reconstruction: Yuanhao Cai,

He Zhang,

Kai Zhang,

Yixun Liang,

Mengwei Ren,

Fujun Luan,

Qing Liu,

Soo Ye Kim,

Jianming Zhang,

Zhifei Zhang,

Yuqian Zhou,

Yulun Zhang,

Xiaokang Yang,

Zhe Lin,

Alan Yuille; [pdf] [arXiv]
[bibtex]
@InProceedings{Cai_2025_ICCV, author = {Cai, Yuanhao and Zhang, He and Zhang, Kai and Liang, Yixun and Ren, Mengwei and Luan, Fujun and Liu, Qing and Kim, Soo Ye and Zhang, Jianming and Zhang, Zhifei and Zhou, Yuqian and Zhang, Yulun and Yang, Xiaokang and Lin, Zhe and Yuille, Alan}, title = {Baking Gaussian Splatting into Diffusion Denoiser for Fast and Scalable Single-stage Image-to-3D Generation and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25062-25072} }
Variance-Based Pruning for Accelerating and Compressing Trained Networks: Uranik Berisha,

Jens Mehnert,

Alexandru Paul Condurache; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Berisha_2025_ICCV, author = {Berisha, Uranik and Mehnert, Jens and Condurache, Alexandru Paul}, title = {Variance-Based Pruning for Accelerating and Compressing Trained Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4973-4982} }
Learning Normal Flow Directly From Events: Dehao Yuan,

Levi Burner,

Jiayi Wu,

Minghui Liu,

Jingxi Chen,

Yiannis Aloimonos,

Cornelia Fermüller; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Dehao and Burner, Levi and Wu, Jiayi and Liu, Minghui and Chen, Jingxi and Aloimonos, Yiannis and Ferm\"uller, Cornelia}, title = {Learning Normal Flow Directly From Events}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7969-7979} }
Federated Prompt-Tuning with Heterogeneous and Incomplete Multimodal Client Data: Thu Hang Phung,

Duong M. Nguyen,

Thanh Trung Huynh,

Quoc Viet Hung Nguyen,

Trong Nghia Hoang,

Phi Le Nguyen; [pdf] [supp]
[bibtex]
@InProceedings{Phung_2025_ICCV, author = {Phung, Thu Hang and Nguyen, Duong M. and Huynh, Thanh Trung and Nguyen, Quoc Viet Hung and Hoang, Trong Nghia and Le Nguyen, Phi}, title = {Federated Prompt-Tuning with Heterogeneous and Incomplete Multimodal Client Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3936-3946} }
Trace3D: Consistent Segmentation Lifting via Gaussian Instance Tracing: Hongyu Shen,

Junfeng Ni,

Yixin Chen,

Weishuo Li,

Mingtao Pei,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_ICCV, author = {Shen, Hongyu and Ni, Junfeng and Chen, Yixin and Li, Weishuo and Pei, Mingtao and Huang, Siyuan}, title = {Trace3D: Consistent Segmentation Lifting via Gaussian Instance Tracing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6656-6666} }
LMM-Det: Make Large Multimodal Models Excel in Object Detection: Jincheng Li,

Chunyu Xie,

Ji Ao,

Dawei Leng,

Yuhui Yin; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jincheng and Xie, Chunyu and Ao, Ji and Leng, Dawei and Yin, Yuhui}, title = {LMM-Det: Make Large Multimodal Models Excel in Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {308-318} }
Bootstrap3D: Improving Multi-view Diffusion Model with Synthetic Data: Zeyi Sun,

Tong Wu,

Pan Zhang,

Yuhang Zang,

Xiaoyi Dong,

Yuanjun Xiong,

Dahua Lin,

Jiaqi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Zeyi and Wu, Tong and Zhang, Pan and Zang, Yuhang and Dong, Xiaoyi and Xiong, Yuanjun and Lin, Dahua and Wang, Jiaqi}, title = {Bootstrap3D: Improving Multi-view Diffusion Model with Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15714-15726} }
Unsupervised Imaging Inverse Problems with Diffusion Distribution Matching: Giacomo Meanti,

Thomas Ryckeboer,

Michael Arbel,

Julien Mairal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meanti_2025_ICCV, author = {Meanti, Giacomo and Ryckeboer, Thomas and Arbel, Michael and Mairal, Julien}, title = {Unsupervised Imaging Inverse Problems with Diffusion Distribution Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28364-28374} }
Towards Adversarial Robustness via Debiased High-Confidence Logit Alignment: Kejia Zhang,

Juanjuan Weng,

Shaozi Li,

Zhiming Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Kejia and Weng, Juanjuan and Li, Shaozi and Luo, Zhiming}, title = {Towards Adversarial Robustness via Debiased High-Confidence Logit Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2783-2792} }
FiffDepth: Feed-forward Transformation of Diffusion-Based Generators for Detailed Depth Estimation: Yunpeng Bai,

Qixing Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2025_ICCV, author = {Bai, Yunpeng and Huang, Qixing}, title = {FiffDepth: Feed-forward Transformation of Diffusion-Based Generators for Detailed Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6023-6033} }
Reinforcement Learning-Guided Data Selection via Redundancy Assessment: Suorong Yang,

Peijia Li,

Furao Shen,

Jian Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Suorong and Li, Peijia and Shen, Furao and Zhao, Jian}, title = {Reinforcement Learning-Guided Data Selection via Redundancy Assessment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1004-1015} }
Integrating Biological Knowledge for Robust Microscopy Image Profiling on De Novo Cell Lines: Jiayuan Chen,

Thai-Hoang Pham,

Yuanlong Wang,

Ping Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jiayuan and Pham, Thai-Hoang and Wang, Yuanlong and Zhang, Ping}, title = {Integrating Biological Knowledge for Robust Microscopy Image Profiling on De Novo Cell Lines}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22846-22856} }
OuroMamba: A Data-Free Quantization Framework for Vision Mamba: Akshat Ramachandran,

Mingyu Lee,

Huan Xu,

Souvik Kundu,

Tushar Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ramachandran_2025_ICCV, author = {Ramachandran, Akshat and Lee, Mingyu and Xu, Huan and Kundu, Souvik and Krishna, Tushar}, title = {OuroMamba: A Data-Free Quantization Framework for Vision Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21177-21186} }
Optical Model-Driven Sharpness Mapping for Autofocus in Small Depth-of-Field and Severe Defocus Scenarios: Chen-Liang Fan,

Mingpei Cao,

Chih Chien Hung,

Yuesheng Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Chen-Liang and Cao, Mingpei and Hung, Chih Chien and Zhu, Yuesheng}, title = {Optical Model-Driven Sharpness Mapping for Autofocus in Small Depth-of-Field and Severe Defocus Scenarios}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6426-6435} }
DICE: Staleness-Centric Optimizations for Parallel Diffusion MoE Inference: Jiajun Luo,

Lizhuo Luo,

Jianru Xu,

Jiajun Song,

Rongwei Lu,

Chen Tang,

Zhi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Jiajun and Luo, Lizhuo and Xu, Jianru and Song, Jiajun and Lu, Rongwei and Tang, Chen and Wang, Zhi}, title = {DICE: Staleness-Centric Optimizations for Parallel Diffusion MoE Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15481-15490} }
Safeguarding Vision-Language Models: Mitigating Vulnerabilities to Gaussian Noise in Perturbation-based Attacks: Jiawei Wang,

Yushen Zuo,

Yuanjun Chai,

Zhendong Liu,

Yicheng Fu,

Yichun Feng,

Kin-Man Lam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jiawei and Zuo, Yushen and Chai, Yuanjun and Liu, Zhendong and Fu, Yicheng and Feng, Yichun and Lam, Kin-Man}, title = {Safeguarding Vision-Language Models: Mitigating Vulnerabilities to Gaussian Noise in Perturbation-based Attacks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2773-2782} }
SHeaP: Self-Supervised Head Geometry Predictor Learned via 2D Gaussians: Liam Schoneveld,

Zhe Chen,

Davide Davoli,

Jiapeng Tang,

Saimon Terazawa,

Ko Nishino,

Matthias Nießner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schoneveld_2025_ICCV, author = {Schoneveld, Liam and Chen, Zhe and Davoli, Davide and Tang, Jiapeng and Terazawa, Saimon and Nishino, Ko and Nie{\ss}ner, Matthias}, title = {SHeaP: Self-Supervised Head Geometry Predictor Learned via 2D Gaussians}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14162-14172} }
Pretrained Reversible Generation as Unsupervised Visual Representation Learning: Rongkun Xue,

Jinouwen Zhang,

Yazhe Niu,

Dazhong Shen,

Bingqi Ma,

Yu Liu,

Jing Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2025_ICCV, author = {Xue, Rongkun and Zhang, Jinouwen and Niu, Yazhe and Shen, Dazhong and Ma, Bingqi and Liu, Yu and Yang, Jing}, title = {Pretrained Reversible Generation as Unsupervised Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19216-19226} }
CompSlider: Compositional Slider for Disentangled Multiple-Attribute Image Generation: Zixin Zhu,

Kevin Duarte,

Mamshad Nayeem Rizve,

Chengyuan Xu,

Ratheesh Kalarot,

Junsong Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Zixin and Duarte, Kevin and Rizve, Mamshad Nayeem and Xu, Chengyuan and Kalarot, Ratheesh and Yuan, Junsong}, title = {CompSlider: Compositional Slider for Disentangled Multiple-Attribute Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16682-16691} }
MA-CIR: A Multimodal Arithmetic Benchmark for Composed Image Retrieval: Jaeseok Byun,

Young Kyun Jang,

Seokhyeon Jeong,

Donghyun Kim,

Taesup Moon; [pdf] [supp]
[bibtex]
@InProceedings{Byun_2025_ICCV, author = {Byun, Jaeseok and Jang, Young Kyun and Jeong, Seokhyeon and Kim, Donghyun and Moon, Taesup}, title = {MA-CIR: A Multimodal Arithmetic Benchmark for Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21342-21352} }
NAPPure: Adversarial Purification for Robust Image Classification under Non-Additive Perturbations: Junjie Nan,

Jianing Li,

Wei Chen,

Mingkun Zhang,

Xueqi Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Nan_2025_ICCV, author = {Nan, Junjie and Li, Jianing and Chen, Wei and Zhang, Mingkun and Cheng, Xueqi}, title = {NAPPure: Adversarial Purification for Robust Image Classification under Non-Additive Perturbations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2260-2269} }
Large-scale Pre-training for Grounded Video Caption Generation: Evangelos Kazakos,

Cordelia Schmid,

Josef Sivic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kazakos_2025_ICCV, author = {Kazakos, Evangelos and Schmid, Cordelia and Sivic, Josef}, title = {Large-scale Pre-training for Grounded Video Caption Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24434-24444} }
MMReason: An Open-Ended Multi-Modal Multi-Step Reasoning Benchmark for MLLMs Toward AGI: Huanjin Yao,

Jiaxing Huang,

Yawen Qiu,

Michael K. Chen,

Wenzheng Liu,

Wei Zhang,

Wenjie Zeng,

Xikun Zhang,

Jingyi Zhang,

YuXin Song,

Wenhao Wu,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_ICCV, author = {Yao, Huanjin and Huang, Jiaxing and Qiu, Yawen and Chen, Michael K. and Liu, Wenzheng and Zhang, Wei and Zeng, Wenjie and Zhang, Xikun and Zhang, Jingyi and Song, YuXin and Wu, Wenhao and Tao, Dacheng}, title = {MMReason: An Open-Ended Multi-Modal Multi-Step Reasoning Benchmark for MLLMs Toward AGI}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {273-283} }
SimMLM: A Simple Framework for Multi-modal Learning with Missing Modality: Sijie Li,

Chen Chen,

Jungong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Sijie and Chen, Chen and Han, Jungong}, title = {SimMLM: A Simple Framework for Multi-modal Learning with Missing Modality}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24068-24077} }
Dual Reciprocal Learning of Language-based Human Motion Understanding and Generation: Chen Liang,

Zhicheng Shi,

Wenguan Wang,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Chen and Shi, Zhicheng and Wang, Wenguan and Yang, Yi}, title = {Dual Reciprocal Learning of Language-based Human Motion Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6252-6262} }
InteractAvatar: Modeling Hand-Face Interaction in Photorealistic Avatars with Deformable Gaussians: Kefan Chen,

Sreyas Mohan,

Justin Theiss,

Sergiu Oprea,

Srinath Sridhar,

Aayush Prakash; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Kefan and Mohan, Sreyas and Theiss, Justin and Oprea, Sergiu and Sridhar, Srinath and Prakash, Aayush}, title = {InteractAvatar: Modeling Hand-Face Interaction in Photorealistic Avatars with Deformable Gaussians}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10410-10420} }
LINR-PCGC: Lossless Implicit Neural Representations for Point Cloud Geometry Compression: Wenjie Huang,

Qi Yang,

Shuting Xia,

He Huang,

Yiling Xu,

Zhu Li; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Wenjie and Yang, Qi and Xia, Shuting and Huang, He and Xu, Yiling and Li, Zhu}, title = {LINR-PCGC: Lossless Implicit Neural Representations for Point Cloud Geometry Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28577-28586} }
RoCo-Sim: Enhancing Roadside Collaborative Perception through Foreground Simulation: Yuwen Du,

Anning Hu,

Zichen Chao,

Yifan Lu,

Junhao Ge,

Genjia Liu,

Weitao Wu,

Lanjun Wang,

Siheng Chen; [pdf]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Yuwen and Hu, Anning and Chao, Zichen and Lu, Yifan and Ge, Junhao and Liu, Genjia and Wu, Weitao and Wang, Lanjun and Chen, Siheng}, title = {RoCo-Sim: Enhancing Roadside Collaborative Perception through Foreground Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26977-26986} }
UKBOB: One Billion MRI Labeled Masks for Generalizable 3D Medical Image Segmentation: Emmanuelle Bourigault,

Amir Jamaludin,

Abdullah Hamdi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bourigault_2025_ICCV, author = {Bourigault, Emmanuelle and Jamaludin, Amir and Hamdi, Abdullah}, title = {UKBOB: One Billion MRI Labeled Masks for Generalizable 3D Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21600-21611} }
To Label or Not to Label: PALM - A Predictive Model for Evaluating Sample Efficiency in Active Learning Models: Julia Machnio,

Mads Nielsen,

Mostafa Mehdipour Ghazi; [pdf] [supp]
[bibtex]
@InProceedings{Machnio_2025_ICCV, author = {Machnio, Julia and Nielsen, Mads and Ghazi, Mostafa Mehdipour}, title = {To Label or Not to Label: PALM - A Predictive Model for Evaluating Sample Efficiency in Active Learning Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4039-4048} }
GaussianOcc: Fully Self-supervised and Efficient 3D Occupancy Estimation with Gaussian Splatting: Wanshui Gan,

Fang Liu,

Hongbin Xu,

Ningkai Mo,

Naoto Yokoya; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gan_2025_ICCV, author = {Gan, Wanshui and Liu, Fang and Xu, Hongbin and Mo, Ningkai and Yokoya, Naoto}, title = {GaussianOcc: Fully Self-supervised and Efficient 3D Occupancy Estimation with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28980-28990} }
Free-MoRef: Instantly Multiplexing Context Perception Capabilities of Video-MLLMs within Single Inference: Kuo Wang,

Quanlong Zheng,

Junlin Xie,

Yanhao Zhang,

Jinguo Luo,

Haonan Lu,

Liang Lin,

Fan Zhou,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Kuo and Zheng, Quanlong and Xie, Junlin and Zhang, Yanhao and Luo, Jinguo and Lu, Haonan and Lin, Liang and Zhou, Fan and Li, Guanbin}, title = {Free-MoRef: Instantly Multiplexing Context Perception Capabilities of Video-MLLMs within Single Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22499-22508} }
DC-ControlNet: Decoupling Inter- and Intra-Element Conditions in Image Generation with Diffusion Models: Hongji Yang,

Wencheng Han,

Yucheng Zhou,

Jianbing Shen; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Hongji and Han, Wencheng and Zhou, Yucheng and Shen, Jianbing}, title = {DC-ControlNet: Decoupling Inter- and Intra-Element Conditions in Image Generation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19065-19074} }
EfficientMT: Efficient Temporal Adaptation for Motion Transfer in Text-to-Video Diffusion Models: Yufei Cai,

Hu Han,

Yuxiang Wei,

Shiguang Shan,

Xilin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2025_ICCV, author = {Cai, Yufei and Han, Hu and Wei, Yuxiang and Shan, Shiguang and Chen, Xilin}, title = {EfficientMT: Efficient Temporal Adaptation for Motion Transfer in Text-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10592-10601} }
Dynamic Multimodal Prototype Learning in Vision-Language Models: Xingyu Zhu,

Shuo Wang,

Beier Zhu,

Miaoge Li,

Yunfan Li,

Junfeng Fang,

Zhicai Wang,

Dongsheng Wang,

Hanwang Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Xingyu and Wang, Shuo and Zhu, Beier and Li, Miaoge and Li, Yunfan and Fang, Junfeng and Wang, Zhicai and Wang, Dongsheng and Zhang, Hanwang}, title = {Dynamic Multimodal Prototype Learning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2501-2511} }
Liberated-GS: 3D Gaussian Splatting Independent from SfM Point Clouds: Weihong Pan,

Xiaoyu Zhang,

Hongjia Zhai,

Xiaojun Xiang,

Hanqing Jiang,

Guofeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2025_ICCV, author = {Pan, Weihong and Zhang, Xiaoyu and Zhai, Hongjia and Xiang, Xiaojun and Jiang, Hanqing and Zhang, Guofeng}, title = {Liberated-GS: 3D Gaussian Splatting Independent from SfM Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26675-26685} }
Client2Vec: Improving Federated Learning by Distribution Shifts Aware Client Indexing: Yongxin Guo,

Lin Wang,

Xiaoying Tang,

Tao Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Yongxin and Wang, Lin and Tang, Xiaoying and Lin, Tao}, title = {Client2Vec: Improving Federated Learning by Distribution Shifts Aware Client Indexing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1433-1443} }
FuXi-RTM: A Physics-Guided Prediction Framework with Radiative Transfer Modeling: Qiusheng Huang,

Xiaohui Zhong,

Xu Fan,

Hao Li; [pdf]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Qiusheng and Zhong, Xiaohui and Fan, Xu and Li, Hao}, title = {FuXi-RTM: A Physics-Guided Prediction Framework with Radiative Transfer Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8852-8862} }
Enhanced Pansharpening via Quaternion Spatial-Spectral Interactions: Dong Li,

Chunhui Luo,

Yuanfei Bao,

Gang Yang,

Jie Xiao,

Xueyang Fu,

Zheng-Jun Zha; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Dong and Luo, Chunhui and Bao, Yuanfei and Yang, Gang and Xiao, Jie and Fu, Xueyang and Zha, Zheng-Jun}, title = {Enhanced Pansharpening via Quaternion Spatial-Spectral Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10908-10918} }
Loss Functions for Predictor-based Neural Architecture Search: Han Ji,

Yuqi Feng,

Jiahao Fan,

Yanan Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Han and Feng, Yuqi and Fan, Jiahao and Sun, Yanan}, title = {Loss Functions for Predictor-based Neural Architecture Search}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1624-1633} }
Learning to Generalize without Bias for Open-Vocabulary Action Recognition: Yating Yu,

Congqi Cao,

Yifan Zhang,

Yanning Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Yating and Cao, Congqi and Zhang, Yifan and Zhang, Yanning}, title = {Learning to Generalize without Bias for Open-Vocabulary Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12800-12810} }
Metric Convolutions: A Unifying Theory to Adaptive Image Convolutions: Thomas Dagès,

Michael Lindenbaum,

Alfred M. Bruckstein; [pdf] [supp]
[bibtex]
@InProceedings{Dages_2025_ICCV, author = {Dag\`es, Thomas and Lindenbaum, Michael and Bruckstein, Alfred M.}, title = {Metric Convolutions: A Unifying Theory to Adaptive Image Convolutions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13974-13984} }
SDMatte: Grafting Diffusion Models for Interactive Matting: Longfei Huang,

Yu Liang,

Hao Zhang,

Jinwei Chen,

Wei Dong,

Lunde Chen,

Wanyu Liu,

Bo Li,

Peng-Tao Jiang; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Longfei and Liang, Yu and Zhang, Hao and Chen, Jinwei and Dong, Wei and Chen, Lunde and Liu, Wanyu and Li, Bo and Jiang, Peng-Tao}, title = {SDMatte: Grafting Diffusion Models for Interactive Matting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15229-15239} }
Scaling Inference-Time Search with Vision Value Model for Improved Visual Comprehension: Xiyao Wang,

Zhengyuan Yang,

Linjie Li,

Hongjin Lu,

Yuancheng Xu,

Chung-Ching Lin,

Kevin Lin,

Furong Huang,

Lijuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xiyao and Yang, Zhengyuan and Li, Linjie and Lu, Hongjin and Xu, Yuancheng and Lin, Chung-Ching and Lin, Kevin and Huang, Furong and Wang, Lijuan}, title = {Scaling Inference-Time Search with Vision Value Model for Improved Visual Comprehension}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1173-1184} }
Cooperative Pseudo Labeling for Unsupervised Federated Classification: Kuangpu Guo,

Lijun Sheng,

Yongcan Yu,

Jian Liang,

Zilei Wang,

Ran He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Kuangpu and Sheng, Lijun and Yu, Yongcan and Liang, Jian and Wang, Zilei and He, Ran}, title = {Cooperative Pseudo Labeling for Unsupervised Federated Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3326-3336} }
FreeDNA: Endowing Domain Adaptation of Diffusion-Based Dense Prediction with Training-Free Domain Noise Alignment: Hang Xu,

Jie Huang,

Linjiang Huang,

Dong Li,

Yidi Liu,

Feng Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Hang and Huang, Jie and Huang, Linjiang and Li, Dong and Liu, Yidi and Zhao, Feng}, title = {FreeDNA: Endowing Domain Adaptation of Diffusion-Based Dense Prediction with Training-Free Domain Noise Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3268-3279} }
DOLLAR: Few-Step Video Generation via Distillation and Latent Reward Optimization: Zihan Ding,

Chi Jin,

Difan Liu,

Haitian Zheng,

Krishna Kumar Singh,

Qiang Zhang,

Yan Kang,

Zhe Lin,

Yuchen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2025_ICCV, author = {Ding, Zihan and Jin, Chi and Liu, Difan and Zheng, Haitian and Singh, Krishna Kumar and Zhang, Qiang and Kang, Yan and Lin, Zhe and Liu, Yuchen}, title = {DOLLAR: Few-Step Video Generation via Distillation and Latent Reward Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17961-17971} }
Semantic Alignment and Reinforcement for Data-Free Quantization of Vision Transformers: Yunshan Zhong,

Yuyao Zhou,

Yuxin Zhang,

Wanchen Sui,

Shen Li,

Yong Li,

Fei Chao,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Yunshan and Zhou, Yuyao and Zhang, Yuxin and Sui, Wanchen and Li, Shen and Li, Yong and Chao, Fei and Ji, Rongrong}, title = {Semantic Alignment and Reinforcement for Data-Free Quantization of Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12479-12490} }
Zero-AVSR: Zero-Shot Audio-Visual Speech Recognition with LLMs by Learning Language-Agnostic Speech Representations: Jeong Hun Yeo,

Minsu Kim,

Chae Won Kim,

Stavros Petridis,

Yong Man Ro; [pdf] [supp]
[bibtex]
@InProceedings{Yeo_2025_ICCV, author = {Yeo, Jeong Hun and Kim, Minsu and Kim, Chae Won and Petridis, Stavros and Ro, Yong Man}, title = {Zero-AVSR: Zero-Shot Audio-Visual Speech Recognition with LLMs by Learning Language-Agnostic Speech Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6693-6703} }
Supervised Exploratory Learning for Long-Tailed Visual Recognition: Zhongquan Jian,

Yanhao Chen,

Yancheng Wang,

Junfeng Yao,

Meihong Wang,

Qingqiang Wu; [pdf]
[bibtex]
@InProceedings{Jian_2025_ICCV, author = {Jian, Zhongquan and Chen, Yanhao and Wang, Yancheng and Yao, Junfeng and Wang, Meihong and Wu, Qingqiang}, title = {Supervised Exploratory Learning for Long-Tailed Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1870-1880} }
Enhanced Event-based Dense Stereo via Cross-Sensor Knowledge Distillation: Haihao Zhang,

Yunjian Zhang,

Jianing Li,

Lin Zhu,

Meng Lv,

Yao Zhu,

Yanwei Liu,

Xiangyang Ji; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Haihao and Zhang, Yunjian and Li, Jianing and Zhu, Lin and Lv, Meng and Zhu, Yao and Liu, Yanwei and Ji, Xiangyang}, title = {Enhanced Event-based Dense Stereo via Cross-Sensor Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5437-5447} }
Disentangled World Models: Learning to Transfer Semantic Knowledge from Distracting Videos for Reinforcement Learning: Qi Wang,

Zhipeng Zhang,

Baao Xie,

Xin Jin,

Yunbo Wang,

Shiyu Wang,

Liaomo Zheng,

Xiaokang Yang,

Wenjun Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Qi and Zhang, Zhipeng and Xie, Baao and Jin, Xin and Wang, Yunbo and Wang, Shiyu and Zheng, Liaomo and Yang, Xiaokang and Zeng, Wenjun}, title = {Disentangled World Models: Learning to Transfer Semantic Knowledge from Distracting Videos for Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2599-2608} }
StreamDiffusion: A Pipeline-level Solution for Real-Time Interactive Generation: Akio Kodaira,

Chenfeng Xu,

Toshiki Hazama,

Takanori Yoshimoto,

Kohei Ohno,

Shogo Mitsuhori,

Soichi Sugano,

Hanying Cho,

Zhijian Liu,

Masayoshi Tomizuka,

Kurt Keutzer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kodaira_2025_ICCV, author = {Kodaira, Akio and Xu, Chenfeng and Hazama, Toshiki and Yoshimoto, Takanori and Ohno, Kohei and Mitsuhori, Shogo and Sugano, Soichi and Cho, Hanying and Liu, Zhijian and Tomizuka, Masayoshi and Keutzer, Kurt}, title = {StreamDiffusion: A Pipeline-level Solution for Real-Time Interactive Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12371-12380} }
Toward Better Out-painting: Improving the Image Composition with Initialization Policy Model: Xuan Han,

Yihao Zhao,

Yanhao Ge,

Mingyu You; [pdf] [supp]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Xuan and Zhao, Yihao and Ge, Yanhao and You, Mingyu}, title = {Toward Better Out-painting: Improving the Image Composition with Initialization Policy Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16938-16947} }
When Large Vision-Language Model Meets Large Remote Sensing Imagery: Coarse-to-Fine Text-Guided Token Pruning: Junwei Luo,

Yingying Zhang,

Xue Yang,

Kang Wu,

Qi Zhu,

Lei Liang,

Jingdong Chen,

Yansheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Junwei and Zhang, Yingying and Yang, Xue and Wu, Kang and Zhu, Qi and Liang, Lei and Chen, Jingdong and Li, Yansheng}, title = {When Large Vision-Language Model Meets Large Remote Sensing Imagery: Coarse-to-Fine Text-Guided Token Pruning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9206-9217} }
StreamMind: Unlocking Full Frame Rate Streaming Video Dialogue through Event-Gated Cognition: Xin Ding,

Hao Wu,

Yifan Yang,

Shiqi Jiang,

Qianxi Zhang,

Donglin Bai,

Zhibo Chen,

Ting Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2025_ICCV, author = {Ding, Xin and Wu, Hao and Yang, Yifan and Jiang, Shiqi and Zhang, Qianxi and Bai, Donglin and Chen, Zhibo and Cao, Ting}, title = {StreamMind: Unlocking Full Frame Rate Streaming Video Dialogue through Event-Gated Cognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13448-13459} }
DecAD: Decoupling Anomalies in Latent Space for Multi-Class Unsupervised Anomaly Detection: Xiaolei Wang,

Xiaoyang Wang,

Huihui Bai,

Eng Gee Lim,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xiaolei and Wang, Xiaoyang and Bai, Huihui and Lim, Eng Gee and Xiao, Jimin}, title = {DecAD: Decoupling Anomalies in Latent Space for Multi-Class Unsupervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21568-21577} }
Discovering Divergent Representations between Text-to-Image Models: Lisa Dunlap,

Joseph E. Gonzalez,

Trevor Darrell,

Fabian Caba Heilbron,

Josef Sivic,

Bryan Russell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dunlap_2025_ICCV, author = {Dunlap, Lisa and Gonzalez, Joseph E. and Darrell, Trevor and Heilbron, Fabian Caba and Sivic, Josef and Russell, Bryan}, title = {Discovering Divergent Representations between Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17516-17525} }
Kaputt: A Large-Scale Dataset for Visual Defect Detection: Sebastian Höfer,

Dorian F. Henning,

Artemij Amiranashvili,

Douglas Morrison,

Mariliza Tzes,

Ingmar Posner,

Marc Matvienko,

Alessandro Rennola,

Anton Milan; [pdf] [supp]
[bibtex]
@InProceedings{Hofer_2025_ICCV, author = {H\"ofer, Sebastian and Henning, Dorian F. and Amiranashvili, Artemij and Morrison, Douglas and Tzes, Mariliza and Posner, Ingmar and Matvienko, Marc and Rennola, Alessandro and Milan, Anton}, title = {Kaputt: A Large-Scale Dataset for Visual Defect Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24224-24233} }
Self-Ensembling Gaussian Splatting for Few-Shot Novel View Synthesis: Chen Zhao,

Xuan Wang,

Tong Zhang,

Saqib Javed,

Mathieu Salzmann; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Chen and Wang, Xuan and Zhang, Tong and Javed, Saqib and Salzmann, Mathieu}, title = {Self-Ensembling Gaussian Splatting for Few-Shot Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4940-4950} }
Spectral Image Tokenizer: Carlos Esteves,

Mohammed Suhail,

Ameesh Makadia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Esteves_2025_ICCV, author = {Esteves, Carlos and Suhail, Mohammed and Makadia, Ameesh}, title = {Spectral Image Tokenizer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17181-17190} }
GameFactory: Creating New Games with Generative Interactive Videos: Jiwen Yu,

Yiran Qin,

Xintao Wang,

Pengfei Wan,

Di Zhang,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Jiwen and Qin, Yiran and Wang, Xintao and Wan, Pengfei and Zhang, Di and Liu, Xihui}, title = {GameFactory: Creating New Games with Generative Interactive Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11590-11599} }
Occupancy Learning with Spatiotemporal Memory: Ziyang Leng,

Jiawei Yang,

Wenlong Yi,

Bolei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Leng_2025_ICCV, author = {Leng, Ziyang and Yang, Jiawei and Yi, Wenlong and Zhou, Bolei}, title = {Occupancy Learning with Spatiotemporal Memory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26569-26578} }
Understanding Co-speech Gestures in-the-wild: Sindhu B Hegde,

K R Prajwal,

Taein Kwon,

Andrew Zisserman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hegde_2025_ICCV, author = {Hegde, Sindhu B and Prajwal, K R and Kwon, Taein and Zisserman, Andrew}, title = {Understanding Co-speech Gestures in-the-wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9977-9987} }
CAD-Recode: Reverse Engineering CAD Code from Point Clouds: Danila Rukhovich,

Elona Dupont,

Dimitrios Mallis,

Kseniya Cherenkova,

Anis Kacem,

Djamila Aouada; [pdf] [supp]
[bibtex]
@InProceedings{Rukhovich_2025_ICCV, author = {Rukhovich, Danila and Dupont, Elona and Mallis, Dimitrios and Cherenkova, Kseniya and Kacem, Anis and Aouada, Djamila}, title = {CAD-Recode: Reverse Engineering CAD Code from Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9801-9811} }
ObjectRelator: Enabling Cross-View Object Relation Understanding Across Ego-Centric and Exo-Centric Perspectives: Yuqian Fu,

Runze Wang,

Bin Ren,

Guolei Sun,

Biao Gong,

Yanwei Fu,

Danda Pani Paudel,

Xuanjing Huang,

Luc Van Gool; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Yuqian and Wang, Runze and Ren, Bin and Sun, Guolei and Gong, Biao and Fu, Yanwei and Paudel, Danda Pani and Huang, Xuanjing and Van Gool, Luc}, title = {ObjectRelator: Enabling Cross-View Object Relation Understanding Across Ego-Centric and Exo-Centric Perspectives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6530-6540} }
Tune-Your-Style: Intensity-tunable 3D Style Transfer with Gaussian Splatting: Yian Zhao,

Rushi Ye,

Ruochong Zheng,

Zesen Cheng,

Chaoran Feng,

Jiashu Yang,

Pengchong Qiao,

Chang Liu,

Jie Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Yian and Ye, Rushi and Zheng, Ruochong and Cheng, Zesen and Feng, Chaoran and Yang, Jiashu and Qiao, Pengchong and Liu, Chang and Chen, Jie}, title = {Tune-Your-Style: Intensity-tunable 3D Style Transfer with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19032-19042} }
Stylized-Face: A Million-level Stylized Face Dataset for Face Recognition: Zhengyuan Peng,

Jianqing Xu,

Yuge Huang,

Jinkun Hao,

Shouhong Ding,

Zhizhong Zhang,

Xin Tan,

Lizhuang Ma; [pdf]
[bibtex]
@InProceedings{Peng_2025_ICCV, author = {Peng, Zhengyuan and Xu, Jianqing and Huang, Yuge and Hao, Jinkun and Ding, Shouhong and Zhang, Zhizhong and Tan, Xin and Ma, Lizhuang}, title = {Stylized-Face: A Million-level Stylized Face Dataset for Face Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13053-13064} }
Bring Your Rear Cameras for Egocentric 3D Human Pose Estimation: Hiroyasu Akada,

Jian Wang,

Vladislav Golyanik,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Akada_2025_ICCV, author = {Akada, Hiroyasu and Wang, Jian and Golyanik, Vladislav and Theobalt, Christian}, title = {Bring Your Rear Cameras for Egocentric 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9497-9507} }
ReferEverything: Towards Segmenting Everything We Can Speak of in Videos: Anurag Bagchi,

Zhipeng Bao,

Yu-Xiong Wang,

Pavel Tokmakov,

Martial Hebert; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bagchi_2025_ICCV, author = {Bagchi, Anurag and Bao, Zhipeng and Wang, Yu-Xiong and Tokmakov, Pavel and Hebert, Martial}, title = {ReferEverything: Towards Segmenting Everything We Can Speak of in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23221-23231} }
GestureHYDRA: Semantic Co-speech Gesture Synthesis via Hybrid Modality Diffusion Transformer and Cascaded-Synchronized Retrieval-Augmented Generation: Quanwei Yang,

Luying Huang,

Kaisiyuan Wang,

Jiazhi Guan,

Shengyi He,

Fengguo Li,

Hang Zhou,

Lingyun Yu,

Yingying Li,

Haocheng Feng,

Hongtao Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Quanwei and Huang, Luying and Wang, Kaisiyuan and Guan, Jiazhi and He, Shengyi and Li, Fengguo and Zhou, Hang and Yu, Lingyun and Li, Yingying and Feng, Haocheng and Xie, Hongtao}, title = {GestureHYDRA: Semantic Co-speech Gesture Synthesis via Hybrid Modality Diffusion Transformer and Cascaded-Synchronized Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12615-12625} }
Make Your Training Flexible: Towards Deployment-Efficient Video Models: Chenting Wang,

Kunchang Li,

Tianxiang Jiang,

Xiangyu Zeng,

Yi Wang,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Chenting and Li, Kunchang and Jiang, Tianxiang and Zeng, Xiangyu and Wang, Yi and Wang, Limin}, title = {Make Your Training Flexible: Towards Deployment-Efficient Video Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23880-23891} }
Federated Continual Instruction Tuning: Haiyang Guo,

Fanhu Zeng,

Fei Zhu,

Wenzhuo Liu,

Da-Han Wang,

Jian Xu,

Xu-Yao Zhang,

Cheng-Lin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Haiyang and Zeng, Fanhu and Zhu, Fei and Liu, Wenzhuo and Wang, Da-Han and Xu, Jian and Zhang, Xu-Yao and Liu, Cheng-Lin}, title = {Federated Continual Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1325-1335} }
HUST: High-Fidelity Unbiased Skin Tone Estimation via Texture Quantization: Zimin Ran,

Xingyu Ren,

Xiang An,

Kaicheng Yang,

Ziyong Feng,

Jing Yang,

Rolandos Alexandros Potamias,

Linchao Zhu,

Jiankang Deng; [pdf] [supp]
[bibtex]
@InProceedings{Ran_2025_ICCV, author = {Ran, Zimin and Ren, Xingyu and An, Xiang and Yang, Kaicheng and Feng, Ziyong and Yang, Jing and Potamias, Rolandos Alexandros and Zhu, Linchao and Deng, Jiankang}, title = {HUST: High-Fidelity Unbiased Skin Tone Estimation via Texture Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13523-13532} }
DLFR-Gen: Diffusion-based Video Generation with Dynamic Latent Frame Rate: Zhihang Yuan,

Rui Xie,

Yuzhang Shang,

Hanling Zhang,

Siyuan Wang,

Shengen Yan,

Guohao Dai,

Yu Wang; [pdf]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Zhihang and Xie, Rui and Shang, Yuzhang and Zhang, Hanling and Wang, Siyuan and Yan, Shengen and Dai, Guohao and Wang, Yu}, title = {DLFR-Gen: Diffusion-based Video Generation with Dynamic Latent Frame Rate}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16410-16419} }
What's in a Latent? Leveraging Diffusion Latent Space for Domain Generalization: Xavier Thomas,

Deepti Ghadiyaram; [pdf] [supp]
[bibtex]
@InProceedings{Thomas_2025_ICCV, author = {Thomas, Xavier and Ghadiyaram, Deepti}, title = {What's in a Latent? Leveraging Diffusion Latent Space for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2183-2194} }
DiffSim: Taming Diffusion Models for Evaluating Visual Similarity: Yiren Song,

Xiaokang Liu,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_ICCV, author = {Song, Yiren and Liu, Xiaokang and Shou, Mike Zheng}, title = {DiffSim: Taming Diffusion Models for Evaluating Visual Similarity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16904-16915} }
Controllable Feature Whitening for Hyperparameter-Free Bias Mitigation: Yooshin Cho,

Hanbyel Cho,

Janghyeon Lee,

HyeongGwon Hong,

Jaesung Ahn,

Junmo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cho_2025_ICCV, author = {Cho, Yooshin and Cho, Hanbyel and Lee, Janghyeon and Hong, HyeongGwon and Ahn, Jaesung and Kim, Junmo}, title = {Controllable Feature Whitening for Hyperparameter-Free Bias Mitigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4550-4560} }
iManip: Skill-Incremental Learning for Robotic Manipulation: Zexin Zheng,

Jia-Feng Cai,

Xiao-Ming Wu,

Yi-Lin Wei,

Yu-Ming Tang,

Ancong Wu,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Zexin and Cai, Jia-Feng and Wu, Xiao-Ming and Wei, Yi-Lin and Tang, Yu-Ming and Wu, Ancong and Zheng, Wei-Shi}, title = {iManip: Skill-Incremental Learning for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13890-13900} }
RadarSplat: Radar Gaussian Splatting for High-Fidelity Data Synthesis and 3D Reconstruction of Autonomous Driving Scenes: Pou-Chun Kung,

Skanda Harisha,

Ram Vasudevan,

Aline Eid,

Katherine A. Skinner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kung_2025_ICCV, author = {Kung, Pou-Chun and Harisha, Skanda and Vasudevan, Ram and Eid, Aline and Skinner, Katherine A.}, title = {RadarSplat: Radar Gaussian Splatting for High-Fidelity Data Synthesis and 3D Reconstruction of Autonomous Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27596-27606} }
Hierarchical 3D Scene Graphs Construction Outdoors: Jon Nyffeler,

Federico Tombari,

Daniel Barath; [pdf] [supp]
[bibtex]
@InProceedings{Nyffeler_2025_ICCV, author = {Nyffeler, Jon and Tombari, Federico and Barath, Daniel}, title = {Hierarchical 3D Scene Graphs Construction Outdoors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26817-26826} }
SIC: Similarity-Based Interpretable Image Classification with Neural Networks: Tom Nuno Wolf,

Emre Kavak,

Fabian Bongratz,

Christian Wachinger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wolf_2025_ICCV, author = {Wolf, Tom Nuno and Kavak, Emre and Bongratz, Fabian and Wachinger, Christian}, title = {SIC: Similarity-Based Interpretable Image Classification with Neural Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24276-24285} }
Towards Cross-modal Backward-compatible Representation Learning for Vision-Language Models: Young Kyun Jang,

Ser-nam Lim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2025_ICCV, author = {Jang, Young Kyun and Lim, Ser-nam}, title = {Towards Cross-modal Backward-compatible Representation Learning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1783-1792} }
Fuse Before Transfer: Knowledge Fusion for Heterogeneous Distillation: Guopeng Li,

Qiang Wang,

Ke Yan,

Shouhong Ding,

Yuan Gao,

Gui-Song Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Guopeng and Wang, Qiang and Yan, Ke and Ding, Shouhong and Gao, Yuan and Xia, Gui-Song}, title = {Fuse Before Transfer: Knowledge Fusion for Heterogeneous Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3445-3454} }
CoLMDriver: LLM-based Negotiation Benefits Cooperative Autonomous Driving: Changxing Liu,

Genjia Liu,

Zijun Wang,

Jinchang Yang,

Siheng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Changxing and Liu, Genjia and Wang, Zijun and Yang, Jinchang and Chen, Siheng}, title = {CoLMDriver: LLM-based Negotiation Benefits Cooperative Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25951-25960} }
Dual Domain Control via Active Learning for Remote Sensing Domain Incremental Object Detection: Jiachen Sun,

De Cheng,

Xi Yang,

Nannan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Jiachen and Cheng, De and Yang, Xi and Wang, Nannan}, title = {Dual Domain Control via Active Learning for Remote Sensing Domain Incremental Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3809-3818} }
VideoSetDiff: Identifying and Reasoning Similarities and Differences in Similar Videos: Yue Qiu,

Yanjun Sun,

Takuma Yagi,

Shusaku Egami,

Natsuki Miyata,

Ken Fukuda,

Kensho Hara,

Ryusuke Sagawa; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2025_ICCV, author = {Qiu, Yue and Sun, Yanjun and Yagi, Takuma and Egami, Shusaku and Miyata, Natsuki and Fukuda, Ken and Hara, Kensho and Sagawa, Ryusuke}, title = {VideoSetDiff: Identifying and Reasoning Similarities and Differences in Similar Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12242-12252} }
GRAB: A Challenging GRaph Analysis Benchmark for Large Multimodal Models: Jonathan Roberts,

Kai Han,

Samuel Albanie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Roberts_2025_ICCV, author = {Roberts, Jonathan and Han, Kai and Albanie, Samuel}, title = {GRAB: A Challenging GRaph Analysis Benchmark for Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1644-1654} }
Transparent Vision: A Theory of Hierarchical Invariant Representations: Shuren Qi,

Yushu Zhang,

Chao Wang,

Zhihua Xia,

Xiaochun Cao,

Fenglei Fan; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2025_ICCV, author = {Qi, Shuren and Zhang, Yushu and Wang, Chao and Xia, Zhihua and Cao, Xiaochun and Fan, Fenglei}, title = {Transparent Vision: A Theory of Hierarchical Invariant Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3435-3444} }
Multi-View 3D Point Tracking: Frano Rajič,

Haofei Xu,

Marko Mihajlovic,

Siyuan Li,

Irem Demir,

Emircan Gündoğdu,

Lei Ke,

Sergey Prokudin,

Marc Pollefeys,

Siyu Tang; [pdf] [supp]
[bibtex]
@InProceedings{Rajic_2025_ICCV, author = {Raji\v{c}, Frano and Xu, Haofei and Mihajlovic, Marko and Li, Siyuan and Demir, Irem and G\"undo\u{g}du, Emircan and Ke, Lei and Prokudin, Sergey and Pollefeys, Marc and Tang, Siyu}, title = {Multi-View 3D Point Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {59-68} }
InsideOut: Integrated RGB-Radiative Gaussian Splatting for Comprehensive 3D Object Representation: Jungmin Lee,

Seonghyuk Hong,

Juyong Lee,

Jaeyoon Lee,

Jongwon Choi; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Jungmin and Hong, Seonghyuk and Lee, Juyong and Lee, Jaeyoon and Choi, Jongwon}, title = {InsideOut: Integrated RGB-Radiative Gaussian Splatting for Comprehensive 3D Object Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25820-25830} }
Splat-LOAM: Gaussian Splatting LiDAR Odometry and Mapping: Emanuele Giacomini,

Luca Di Giammarino,

Lorenzo De Rebotti,

Giorgio Grisetti,

Martin R. Oswald; [pdf] [supp]
[bibtex]
@InProceedings{Giacomini_2025_ICCV, author = {Giacomini, Emanuele and Di Giammarino, Luca and De Rebotti, Lorenzo and Grisetti, Giorgio and Oswald, Martin R.}, title = {Splat-LOAM: Gaussian Splatting LiDAR Odometry and Mapping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27630-27639} }
LUSD: Localized Update Score Distillation for Text-Guided Image Editing: Worameth Chinchuthakun,

Tossaporn Saengja,

Nontawat Tritrong,

Pitchaporn Rewatbowornwong,

Pramook Khungurn,

Supasorn Suwajanakorn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chinchuthakun_2025_ICCV, author = {Chinchuthakun, Worameth and Saengja, Tossaporn and Tritrong, Nontawat and Rewatbowornwong, Pitchaporn and Khungurn, Pramook and Suwajanakorn, Supasorn}, title = {LUSD: Localized Update Score Distillation for Text-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15298-15307} }
ChartCap: Mitigating Hallucination of Dense Chart Captioning: Junyoung Lim,

Jaewoo Ahn,

Gunhee Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lim_2025_ICCV, author = {Lim, Junyoung and Ahn, Jaewoo and Kim, Gunhee}, title = {ChartCap: Mitigating Hallucination of Dense Chart Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13171-13182} }
RTMap: Real-Time Recursive Mapping with Change Detection and Localization: Yuheng Du,

Sheng Yang,

Lingxuan Wang,

Zhenghua Hou,

Chengying Cai,

Zhitao Tan,

Mingxia Chen,

Shi-Sheng Huang,

Qiang Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Yuheng and Yang, Sheng and Wang, Lingxuan and Hou, Zhenghua and Cai, Chengying and Tan, Zhitao and Chen, Mingxia and Huang, Shi-Sheng and Li, Qiang}, title = {RTMap: Real-Time Recursive Mapping with Change Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28021-28030} }
Scaling 3D Compositional Models for Robust Classification and Pose Estimation: Xiaoding Yuan,

Guofeng Zhang,

Prakhar Kaushik,

Artur Jesslen,

Adam Kortylewski,

Alan Yuille; [pdf] [supp]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Xiaoding and Zhang, Guofeng and Kaushik, Prakhar and Jesslen, Artur and Kortylewski, Adam and Yuille, Alan}, title = {Scaling 3D Compositional Models for Robust Classification and Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6406-6415} }
MultiVerse: A Multi-Turn Conversation Benchmark for Evaluating Large Vision and Language Models: Young-Jun Lee,

Byung-Kwan Lee,

Jianshu Zhang,

Yechan Hwang,

Byungsoo Ko,

Han-Gyu Kim,

Dongyu Yao,

Xuankun Rong,

Eojin Joo,

Seung-Ho Han,

Bowon Ko,

Ho-Jin Choi; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Young-Jun and Lee, Byung-Kwan and Zhang, Jianshu and Hwang, Yechan and Ko, Byungsoo and Kim, Han-Gyu and Yao, Dongyu and Rong, Xuankun and Joo, Eojin and Han, Seung-Ho and Ko, Bowon and Choi, Ho-Jin}, title = {MultiVerse: A Multi-Turn Conversation Benchmark for Evaluating Large Vision and Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {708-719} }
Contact-Aware Refinement of Human Pose Pseudo-Ground Truth via Bioimpedance Sensing: Maria-Paola Forte,

Nikos Athanasiou,

Giulia Ballardini,

Jan Ulrich Bartels,

Katherine J. Kuchenbecker,

Michael J. Black; [pdf]
[bibtex]
@InProceedings{Forte_2025_ICCV, author = {Forte, Maria-Paola and Athanasiou, Nikos and Ballardini, Giulia and Bartels, Jan Ulrich and Kuchenbecker, Katherine J. and Black, Michael J.}, title = {Contact-Aware Refinement of Human Pose Pseudo-Ground Truth via Bioimpedance Sensing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5071-5080} }
Embodied VideoAgent: Persistent Memory from Egocentric Videos and Embodied Sensors Enables Dynamic Scene Understanding: Yue Fan,

Xiaojian Ma,

Rongpeng Su,

Jun Guo,

Rujie Wu,

Xi Chen,

Qing Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Yue and Ma, Xiaojian and Su, Rongpeng and Guo, Jun and Wu, Rujie and Chen, Xi and Li, Qing}, title = {Embodied VideoAgent: Persistent Memory from Egocentric Videos and Embodied Sensors Enables Dynamic Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6342-6352} }
CuRe: Cultural Gaps in the Long Tail of Text-to-Image Systems: Aniket Rege,

Zinnia Nie,

Mahesh Ramesh,

Unmesh Raskar,

Zhuoran Yu,

Aditya Kusupati,

Yong Jae Lee,

Ramya Korlakai Vinayak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rege_2025_ICCV, author = {Rege, Aniket and Nie, Zinnia and Ramesh, Mahesh and Raskar, Unmesh and Yu, Zhuoran and Kusupati, Aditya and Lee, Yong Jae and Vinayak, Ramya Korlakai}, title = {CuRe: Cultural Gaps in the Long Tail of Text-to-Image Systems}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15680-15691} }
Exploiting Domain Properties in Language-Driven Domain Generalization for Semantic Segmentation: Seogkyu Jeon,

Kibeom Hong,

Hyeran Byun; [pdf] [supp]
[bibtex]
@InProceedings{Jeon_2025_ICCV, author = {Jeon, Seogkyu and Hong, Kibeom and Byun, Hyeran}, title = {Exploiting Domain Properties in Language-Driven Domain Generalization for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20791-20801} }
OVG-HQ: Online Video Grounding with Hybrid-modal Queries: Runhao Zeng,

Jiaqi Mao,

Minghao Lai,

Minh Hieu Phan,

Yanjie Dong,

Wei Wang,

Qi Chen,

Xiping Hu; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Runhao and Mao, Jiaqi and Lai, Minghao and Phan, Minh Hieu and Dong, Yanjie and Wang, Wei and Chen, Qi and Hu, Xiping}, title = {OVG-HQ: Online Video Grounding with Hybrid-modal Queries}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21085-21096} }
ConformalSAM: Unlocking the Potential of Foundational Segmentation Models in Semi-Supervised Semantic Segmentation with Conformal Prediction: Danhui Chen,

Ziquan Liu,

Chuxi Yang,

Dan Wang,

Yan Yan,

Yi Xu,

Xiangyang Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Danhui and Liu, Ziquan and Yang, Chuxi and Wang, Dan and Yan, Yan and Xu, Yi and Ji, Xiangyang}, title = {ConformalSAM: Unlocking the Potential of Foundational Segmentation Models in Semi-Supervised Semantic Segmentation with Conformal Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24045-24055} }
V2PE: Improving Multimodal Long-Context Capability of Vision-Language Models with Variable Visual Position Encoding: Junqi Ge,

Ziyi Chen,

Jintao Lin,

Jinguo Zhu,

Xihui Liu,

Jifeng Dai,

Xizhou Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2025_ICCV, author = {Ge, Junqi and Chen, Ziyi and Lin, Jintao and Zhu, Jinguo and Liu, Xihui and Dai, Jifeng and Zhu, Xizhou}, title = {V2PE: Improving Multimodal Long-Context Capability of Vision-Language Models with Variable Visual Position Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21070-21084} }
PRO-VPT: Distribution-Adaptive Visual Prompt Tuning via Prompt Relocation: Chikai Shang,

Mengke Li,

Yiqun Zhang,

Zhen Chen,

Jinlin Wu,

Fangqing Gu,

Yang Lu,

Yiu-Ming Cheung; [pdf] [supp]
[bibtex]
@InProceedings{Shang_2025_ICCV, author = {Shang, Chikai and Li, Mengke and Zhang, Yiqun and Chen, Zhen and Wu, Jinlin and Gu, Fangqing and Lu, Yang and Cheung, Yiu-Ming}, title = {PRO-VPT: Distribution-Adaptive Visual Prompt Tuning via Prompt Relocation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1558-1568} }
CoMPaSS: Enhancing Spatial Understanding in Text-to-Image Diffusion Models: Gaoyang Zhang,

Bingtao Fu,

Qingnan Fan,

Qi Zhang,

Runxing Liu,

Hong Gu,

Huaqi Zhang,

Xinguo Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Gaoyang and Fu, Bingtao and Fan, Qingnan and Zhang, Qi and Liu, Runxing and Gu, Hong and Zhang, Huaqi and Liu, Xinguo}, title = {CoMPaSS: Enhancing Spatial Understanding in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15253-15265} }
AdaDrive: Self-Adaptive Slow-Fast System for Language-Grounded Autonomous Driving: Ruifei Zhang,

Junlin Xie,

Wei Zhang,

Weikai Chen,

Xiao Tan,

Xiang Wan,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Ruifei and Xie, Junlin and Zhang, Wei and Chen, Weikai and Tan, Xiao and Wan, Xiang and Li, Guanbin}, title = {AdaDrive: Self-Adaptive Slow-Fast System for Language-Grounded Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5112-5121} }
Multi-modal Segment Anything Model for Camouflaged Scene Segmentation: Guangyu Ren,

Hengyan Liu,

Michalis Lazarou,

Tania Stathaki; [pdf]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Guangyu and Liu, Hengyan and Lazarou, Michalis and Stathaki, Tania}, title = {Multi-modal Segment Anything Model for Camouflaged Scene Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19882-19892} }
LLaVA-PruMerge: Adaptive Token Reduction for Efficient Large Multimodal Models: Yuzhang Shang,

Mu Cai,

Bingxin Xu,

Yong Jae Lee,

Yan Yan; [pdf] [supp]
[bibtex]
@InProceedings{Shang_2025_ICCV, author = {Shang, Yuzhang and Cai, Mu and Xu, Bingxin and Lee, Yong Jae and Yan, Yan}, title = {LLaVA-PruMerge: Adaptive Token Reduction for Efficient Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22857-22867} }
Event-boosted Deformable 3D Gaussians for Dynamic Scene Reconstruction: Wenhao Xu,

Wenming Weng,

Yueyi Zhang,

Ruikang Xu,

Zhiwei Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Wenhao and Weng, Wenming and Zhang, Yueyi and Xu, Ruikang and Xiong, Zhiwei}, title = {Event-boosted Deformable 3D Gaussians for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28334-28343} }
Gradient-Reweighted Adversarial Camouflage for Physical Object Detection Evasion: Jiawei Liang,

Siyuan Liang,

Tianrui Lou,

Ming Zhang,

Wenjin Li,

Dunqiu Fan,

Xiaochun Cao; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Jiawei and Liang, Siyuan and Lou, Tianrui and Zhang, Ming and Li, Wenjin and Fan, Dunqiu and Cao, Xiaochun}, title = {Gradient-Reweighted Adversarial Camouflage for Physical Object Detection Evasion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13880-13889} }
Stereo Any Video: Temporally Consistent Stereo Matching: Junpeng Jing,

Weixun Luo,

Ye Mao,

Krystian Mikolajczyk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jing_2025_ICCV, author = {Jing, Junpeng and Luo, Weixun and Mao, Ye and Mikolajczyk, Krystian}, title = {Stereo Any Video: Temporally Consistent Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20836-20846} }
ASCENT: Annotation-free Self-supervised Contrastive Embeddings for 3D Neuron Tracking in Fluorescence Microscopy: Haejun Han,

Hang Lu; [pdf] [supp]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Haejun and Lu, Hang}, title = {ASCENT: Annotation-free Self-supervised Contrastive Embeddings for 3D Neuron Tracking in Fluorescence Microscopy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14676-14687} }
ViLU: Learning Vision-Language Uncertainties for Failure Prediction: Marc Lafon,

Yannis Karmim,

Julio Silva-Rodríguez,

Paul Couairon,

Clément Rambour,

Raphael Fournier-Sniehotta,

Ismail Ben Ayed,

Jose Dolz,

Nicolas Thome; [pdf] [supp]
[bibtex]
@InProceedings{Lafon_2025_ICCV, author = {Lafon, Marc and Karmim, Yannis and Silva-Rodr{\'\i}guez, Julio and Couairon, Paul and Rambour, Cl\'ement and Fournier-Sniehotta, Raphael and Ben Ayed, Ismail and Dolz, Jose and Thome, Nicolas}, title = {ViLU: Learning Vision-Language Uncertainties for Failure Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17807-17817} }
A Linear N-Point Solver for Structure and Motion from Asynchronous Tracks: Hang Su,

Yunlong Feng,

Daniel Gehrig,

Panfeng Jiang,

Ling Gao,

Xavier Lagorce,

Laurent Kneip; [pdf] [supp]
[bibtex]
@InProceedings{Su_2025_ICCV, author = {Su, Hang and Feng, Yunlong and Gehrig, Daniel and Jiang, Panfeng and Gao, Ling and Lagorce, Xavier and Kneip, Laurent}, title = {A Linear N-Point Solver for Structure and Motion from Asynchronous Tracks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4339-4348} }
Constraint-Aware Feature Learning for Parametric Point Cloud: Xi Cheng,

Ruiqi Lei,

Di Huang,

Zhichao Liao,

Fengyuan Piao,

Yan Chen,

Pingfa Feng,

Long Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Xi and Lei, Ruiqi and Huang, Di and Liao, Zhichao and Piao, Fengyuan and Chen, Yan and Feng, Pingfa and Zeng, Long}, title = {Constraint-Aware Feature Learning for Parametric Point Cloud}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28114-28124} }
Unleashing Vecset Diffusion Model for Fast Shape Generation: Zeqiang Lai,

Yunfei Zhao,

Zibo Zhao,

Haolin Liu,

Fuyun Wang,

Huiwen Shi,

Xianghui Yang,

Qingxiang Lin,

Jingwei Huang,

Yuhong Liu,

Jie Jiang,

Chunchao Guo,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2025_ICCV, author = {Lai, Zeqiang and Zhao, Yunfei and Zhao, Zibo and Liu, Haolin and Wang, Fuyun and Shi, Huiwen and Yang, Xianghui and Lin, Qingxiang and Huang, Jingwei and Liu, Yuhong and Jiang, Jie and Guo, Chunchao and Yue, Xiangyu}, title = {Unleashing Vecset Diffusion Model for Fast Shape Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2523-2533} }
Revisiting Point Cloud Completion: Are We Ready For The Real-World?: Stuti Pathak,

Prashant Kumar,

Dheeraj Baiju,

Nicholus Mboga,

Gunther Steenackers,

Rudi Penne; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pathak_2025_ICCV, author = {Pathak, Stuti and Kumar, Prashant and Baiju, Dheeraj and Mboga, Nicholus and Steenackers, Gunther and Penne, Rudi}, title = {Revisiting Point Cloud Completion: Are We Ready For The Real-World?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25388-25398} }
RayZer: A Self-supervised Large View Synthesis Model: Hanwen Jiang,

Hao Tan,

Peng Wang,

Haian Jin,

Yue Zhao,

Sai Bi,

Kai Zhang,

Fujun Luan,

Kalyan Sunkavalli,

Qixing Huang,

Georgios Pavlakos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Hanwen and Tan, Hao and Wang, Peng and Jin, Haian and Zhao, Yue and Bi, Sai and Zhang, Kai and Luan, Fujun and Sunkavalli, Kalyan and Huang, Qixing and Pavlakos, Georgios}, title = {RayZer: A Self-supervised Large View Synthesis Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4918-4929} }
Perception-as-Control: Fine-grained Controllable Image Animation with 3D-aware Motion Representation: Yingjie Chen,

Yifang Men,

Yuan Yao,

Miaomiao Cui,

Liefeng Bo; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yingjie and Men, Yifang and Yao, Yuan and Cui, Miaomiao and Bo, Liefeng}, title = {Perception-as-Control: Fine-grained Controllable Image Animation with 3D-aware Motion Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14380-14389} }
Structure-aware Semantic Discrepancy and Consistency for 3D Medical Image Self-supervised Learning: Tan Pan,

Zhaorui Tan,

Kaiyu Guo,

Dongli Xu,

Weidi Xu,

Chen Jiang,

Xin Guo,

Yuan Qi,

Yuan Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_ICCV, author = {Pan, Tan and Tan, Zhaorui and Guo, Kaiyu and Xu, Dongli and Xu, Weidi and Jiang, Chen and Guo, Xin and Qi, Yuan and Cheng, Yuan}, title = {Structure-aware Semantic Discrepancy and Consistency for 3D Medical Image Self-supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20257-20267} }
Do It Yourself: Learning Semantic Correspondence from Pseudo-Labels: Olaf Dünkel,

Thomas Wimmer,

Christian Theobalt,

Christian Rupprecht,

Adam Kortylewski; [pdf] [supp]
[bibtex]
@InProceedings{Dunkel_2025_ICCV, author = {D\"unkel, Olaf and Wimmer, Thomas and Theobalt, Christian and Rupprecht, Christian and Kortylewski, Adam}, title = {Do It Yourself: Learning Semantic Correspondence from Pseudo-Labels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5834-5844} }
SA-LUT: Spatial Adaptive 4D Look-Up Table for Photorealistic Style Transfer: Zerui Gong,

Zhonghua Wu,

Qingyi Tao,

Qinyue Li,

Chen Change Loy; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2025_ICCV, author = {Gong, Zerui and Wu, Zhonghua and Tao, Qingyi and Li, Qinyue and Loy, Chen Change}, title = {SA-LUT: Spatial Adaptive 4D Look-Up Table for Photorealistic Style Transfer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18294-18303} }
DuET: Dual Incremental Object Detection via Exemplar-Free Task Arithmetic: Munish Monga,

Vishal Chudasama,

Pankaj Wasnik,

Biplab Banerjee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Monga_2025_ICCV, author = {Monga, Munish and Chudasama, Vishal and Wasnik, Pankaj and Banerjee, Biplab}, title = {DuET: Dual Incremental Object Detection via Exemplar-Free Task Arithmetic}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3121-3131} }
BATCLIP: Bimodal Online Test-Time Adaptation for CLIP: Sarthak Maharana,

Baoming Zhang,

Leonid Karlinsky,

Rogerio Feris,

Yunhui Guo; [pdf] [supp]
[bibtex]
@InProceedings{Maharana_2025_ICCV, author = {Maharana, Sarthak and Zhang, Baoming and Karlinsky, Leonid and Feris, Rogerio and Guo, Yunhui}, title = {BATCLIP: Bimodal Online Test-Time Adaptation for CLIP}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1569-1579} }
MagShield: Towards Better Robustness in Sparse Inertial Motion Capture Under Magnetic Disturbances: Yunzhe Shao,

Xinyu Yi,

Lu Yin,

Shihui Guo,

Junhai Yong,

Feng Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2025_ICCV, author = {Shao, Yunzhe and Yi, Xinyu and Yin, Lu and Guo, Shihui and Yong, Junhai and Xu, Feng}, title = {MagShield: Towards Better Robustness in Sparse Inertial Motion Capture Under Magnetic Disturbances}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29021-29030} }
SHIFT: Smoothing Hallucinations by Information Flow Tuning for Multimodal Large Language Models: Sudong Wang,

Yunjian Zhang,

Yao Zhu,

Enci Liu,

Jianing Li,

Yanwei Liu,

Xiangyang Ji; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Sudong and Zhang, Yunjian and Zhu, Yao and Liu, Enci and Li, Jianing and Liu, Yanwei and Ji, Xiangyang}, title = {SHIFT: Smoothing Hallucinations by Information Flow Tuning for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3639-3649} }
Bias-Resilient Weakly Supervised Semantic Segmentation Using Normalizing Flows: Xianglin Qiu,

Xiaoyang Wang,

Zhen Zhang,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2025_ICCV, author = {Qiu, Xianglin and Wang, Xiaoyang and Zhang, Zhen and Xiao, Jimin}, title = {Bias-Resilient Weakly Supervised Semantic Segmentation Using Normalizing Flows}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21321-21330} }
DSO: Aligning 3D Generators with Simulation Feedback for Physical Soundness: Ruining Li,

Chuanxia Zheng,

Christian Rupprecht,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Ruining and Zheng, Chuanxia and Rupprecht, Christian and Vedaldi, Andrea}, title = {DSO: Aligning 3D Generators with Simulation Feedback for Physical Soundness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6772-6783} }
Align Your Rhythm: Generating Highly Aligned Dance Poses with Gating-Enhanced Rhythm-Aware Feature Representation: Congyi Fan,

Jian Guan,

Xuanjia Zhao,

Dongli Xu,

Youtian Lin,

Tong Ye,

Pengming Feng,

Haiwei Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Congyi and Guan, Jian and Zhao, Xuanjia and Xu, Dongli and Lin, Youtian and Ye, Tong and Feng, Pengming and Pan, Haiwei}, title = {Align Your Rhythm: Generating Highly Aligned Dance Poses with Gating-Enhanced Rhythm-Aware Feature Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13193-13202} }
Stable Score Distillation: Haiming Zhu,

Yangyang Xu,

Chenshu Xu,

Tingrui Shen,

Wenxi Liu,

Yong Du,

Jun Yu,

Shengfeng He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Haiming and Xu, Yangyang and Xu, Chenshu and Shen, Tingrui and Liu, Wenxi and Du, Yong and Yu, Jun and He, Shengfeng}, title = {Stable Score Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16597-16606} }
UAVScenes: A Multi-Modal Dataset for UAVs: Sijie Wang,

Siqi Li,

Yawei Zhang,

Shangshu Yu,

Shenghai Yuan,

Rui She,

Quanjiang Guo,

JinXuan Zheng,

Ong Kang Howe,

Leonrich Chandra,

Shrivarshann Srijeyan,

Aditya Sivadas,

Toshan Aggarwal,

Heyuan Liu,

Hongming Zhang,

Chujie Chen,

Junyu Jiang,

Lihua Xie,

Wee Peng Tay; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Sijie and Li, Siqi and Zhang, Yawei and Yu, Shangshu and Yuan, Shenghai and She, Rui and Guo, Quanjiang and Zheng, JinXuan and Howe, Ong Kang and Chandra, Leonrich and Srijeyan, Shrivarshann and Sivadas, Aditya and Aggarwal, Toshan and Liu, Heyuan and Zhang, Hongming and Chen, Chujie and Jiang, Junyu and Xie, Lihua and Tay, Wee Peng}, title = {UAVScenes: A Multi-Modal Dataset for UAVs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28946-28958} }
FoundIR: Unleashing Million-scale Training Data to Advance Foundation Models for Image Restoration: Hao Li,

Xiang Chen,

Jiangxin Dong,

Jinhui Tang,

Jinshan Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Hao and Chen, Xiang and Dong, Jiangxin and Tang, Jinhui and Pan, Jinshan}, title = {FoundIR: Unleashing Million-scale Training Data to Advance Foundation Models for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12626-12636} }
LightsOut: Diffusion-based Outpainting for Enhanced Lens Flare Removal: Shr-Ruei Tsai,

Wei-Cheng Chang,

Jie-Ying Lee,

Chih-Hai Su,

Yu-Lun Liu; [pdf] [supp]
[bibtex]
@InProceedings{Tsai_2025_ICCV, author = {Tsai, Shr-Ruei and Chang, Wei-Cheng and Lee, Jie-Ying and Su, Chih-Hai and Liu, Yu-Lun}, title = {LightsOut: Diffusion-based Outpainting for Enhanced Lens Flare Removal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6353-6363} }
TPG-INR: Target Prior-Guided Implicit 3D CT Reconstruction for Enhanced Sparse-view Imaging: Qinglei Cao,

Ziyao Tang,

Xiaoqin Tang; [pdf]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Qinglei and Tang, Ziyao and Tang, Xiaoqin}, title = {TPG-INR: Target Prior-Guided Implicit 3D CT Reconstruction for Enhanced Sparse-view Imaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28239-28248} }
From Holistic to Localized: Local Enhanced Adapters for Efficient Visual Instruction Fine-Tuning: Pengkun Jiao,

Bin Zhu,

Jingjing Chen,

Chong-Wah Ngo,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiao_2025_ICCV, author = {Jiao, Pengkun and Zhu, Bin and Chen, Jingjing and Ngo, Chong-Wah and Jiang, Yu-Gang}, title = {From Holistic to Localized: Local Enhanced Adapters for Efficient Visual Instruction Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2728-2737} }
Ouroboros: Single-step Diffusion Models for Cycle-consistent Forward and Inverse Rendering: Shanlin Sun,

Yifan Wang,

Hanwen Zhang,

Yifeng Xiong,

Qin Ren,

Ruogu Fang,

Xiaohui Xie,

Chenyu You; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Shanlin and Wang, Yifan and Zhang, Hanwen and Xiong, Yifeng and Ren, Qin and Fang, Ruogu and Xie, Xiaohui and You, Chenyu}, title = {Ouroboros: Single-step Diffusion Models for Cycle-consistent Forward and Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10386-10397} }
Photolithography Overlay Map Generation with Implicit Knowledge Distillation Diffusion Transformer: Yuan-Fu Yang,

Hsiu-Hui Hsiao; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Yuan-Fu and Hsiao, Hsiu-Hui}, title = {Photolithography Overlay Map Generation with Implicit Knowledge Distillation Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15288-15297} }
S4M: Boosting Semi-Supervised Instance Segmentation with SAM: Heeji Yoon,

Heeseong Shin,

Eunbeen Hong,

Hyunwook Choi,

Hansang Cho,

Daun Jeong,

Seungryong Kim; [pdf] [supp]
[bibtex]
@InProceedings{Yoon_2025_ICCV, author = {Yoon, Heeji and Shin, Heeseong and Hong, Eunbeen and Choi, Hyunwook and Cho, Hansang and Jeong, Daun and Kim, Seungryong}, title = {S4M: Boosting Semi-Supervised Instance Segmentation with SAM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20226-20236} }
Debiased Curriculum Adaptation for Safe Transfer Learning in Chest X-ray Classification: Mingyang Liu,

Xinyang Chen,

Yang Shu,

Xiucheng Li,

Weili Guan,

Liqiang Nie; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Mingyang and Chen, Xinyang and Shu, Yang and Li, Xiucheng and Guan, Weili and Nie, Liqiang}, title = {Debiased Curriculum Adaptation for Safe Transfer Learning in Chest X-ray Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22610-22619} }
Diorama: Unleashing Zero-shot Single-view 3D Indoor Scene Modeling: Qirui Wu,

Denys Iliash,

Daniel Ritchie,

Manolis Savva,

Angel X. Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Qirui and Iliash, Denys and Ritchie, Daniel and Savva, Manolis and Chang, Angel X.}, title = {Diorama: Unleashing Zero-shot Single-view 3D Indoor Scene Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8896-8907} }
Boosting Multi-View Indoor 3D Object Detection via Adaptive 3D Volume Construction: Runmin Zhang,

Zhu Yu,

Si-Yuan Cao,

Lingyu Zhu,

Guangyi Zhang,

Xiaokai Bai,

Hui-Liang Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Runmin and Yu, Zhu and Cao, Si-Yuan and Zhu, Lingyu and Zhang, Guangyi and Bai, Xiaokai and Shen, Hui-Liang}, title = {Boosting Multi-View Indoor 3D Object Detection via Adaptive 3D Volume Construction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5980-5989} }
VQ-VLA: Improving Vision-Language-Action Models via Scaling Vector-Quantized Action Tokenizers: Yating Wang,

Haoyi Zhu,

Mingyu Liu,

Jiange Yang,

Hao-Shu Fang,

Tong He; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yating and Zhu, Haoyi and Liu, Mingyu and Yang, Jiange and Fang, Hao-Shu and He, Tong}, title = {VQ-VLA: Improving Vision-Language-Action Models via Scaling Vector-Quantized Action Tokenizers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11089-11099} }
Quadratic Gaussian Splatting: High Quality Surface Reconstruction with Second-order Geometric Primitives: Ziyu Zhang,

Binbin Huang,

Hanqing Jiang,

Liyang Zhou,

Xiaojun Xiang,

Shuhan Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Ziyu and Huang, Binbin and Jiang, Hanqing and Zhou, Liyang and Xiang, Xiaojun and Shen, Shuhan}, title = {Quadratic Gaussian Splatting: High Quality Surface Reconstruction with Second-order Geometric Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28260-28270} }
UDC-VIT: A Real-World Video Dataset for Under-Display Cameras: Kyusu Ahn,

JiSoo Kim,

Sangik Lee,

HyunGyu Lee,

Byeonghyun Ko,

Chanwoo Park,

Jaejin Lee; [pdf] [supp]
[bibtex]
@InProceedings{Ahn_2025_ICCV, author = {Ahn, Kyusu and Kim, JiSoo and Lee, Sangik and Lee, HyunGyu and Ko, Byeonghyun and Park, Chanwoo and Lee, Jaejin}, title = {UDC-VIT: A Real-World Video Dataset for Under-Display Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10950-10960} }
Prompt Guidance and Human Proximal Perception for HOT Prediction with Regional Joint Loss: Yuxiao Wang,

Yu Lei,

Zhenao Wei,

Weiying Xue,

Xinyu Jiang,

Nan Zhuang,

Qi Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuxiao and Lei, Yu and Wei, Zhenao and Xue, Weiying and Jiang, Xinyu and Zhuang, Nan and Liu, Qi}, title = {Prompt Guidance and Human Proximal Perception for HOT Prediction with Regional Joint Loss}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23636-23645} }
Towards Omnimodal Expressions and Reasoning in Referring Audio-Visual Segmentation: Kaining Ying,

Henghui Ding,

Guangquan Jie,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ying_2025_ICCV, author = {Ying, Kaining and Ding, Henghui and Jie, Guangquan and Jiang, Yu-Gang}, title = {Towards Omnimodal Expressions and Reasoning in Referring Audio-Visual Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22575-22585} }
Gait-X: Exploring X modality for Generalized Gait Recognition: Zengbin Wang,

Saihui Hou,

Junjie Li,

Xu Liu,

Chunshui Cao,

Yongzhen Huang,

Siye Wang,

Man Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zengbin and Hou, Saihui and Li, Junjie and Liu, Xu and Cao, Chunshui and Huang, Yongzhen and Wang, Siye and Zhang, Man}, title = {Gait-X: Exploring X modality for Generalized Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13259-13269} }
Learning Separable Fine-Grained Representation via Dendrogram Construction from Coarse Labels for Fine-grained Visual Recognition: Guanghui Shi,

Xuefeng Liang,

Wenjie Li,

Xiaoyu Lin; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Guanghui and Liang, Xuefeng and Li, Wenjie and Lin, Xiaoyu}, title = {Learning Separable Fine-Grained Representation via Dendrogram Construction from Coarse Labels for Fine-grained Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {870-879} }
DyGS-SLAM: Real-Time Accurate Localization and Gaussian Reconstruction for Dynamic Scenes: Xinggang Hu,

Chenyangguang Zhang,

Mingyuan Zhao,

Yuanze Gui,

Xiangkui Zhang,

Xiangyang Ji; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Xinggang and Zhang, Chenyangguang and Zhao, Mingyuan and Gui, Yuanze and Zhang, Xiangkui and Ji, Xiangyang}, title = {DyGS-SLAM: Real-Time Accurate Localization and Gaussian Reconstruction for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9561-9571} }
D3QE: Learning Discrete Distribution Discrepancy-aware Quantization Error for Autoregressive-Generated Image Detection: Yanran Zhang,

Bingyao Yu,

Yu Zheng,

Wenzhao Zheng,

Yueqi Duan,

Lei Chen,

Jie Zhou,

Jiwen Lu; [pdf]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yanran and Yu, Bingyao and Zheng, Yu and Zheng, Wenzhao and Duan, Yueqi and Chen, Lei and Zhou, Jie and Lu, Jiwen}, title = {D3QE: Learning Discrete Distribution Discrepancy-aware Quantization Error for Autoregressive-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16292-16301} }
Towards a Universal 3D Medical Multi-modality Generalization via Learning Personalized Invariant Representation: Zhaorui Tan,

Xi Yang,

Tan Pan,

Tianyi Liu,

Chen Jiang,

Xin Guo,

Qiufeng Wang,

Anh Nguyen,

Yuan Qi,

Kaizhu Huang,

Yuan Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2025_ICCV, author = {Tan, Zhaorui and Yang, Xi and Pan, Tan and Liu, Tianyi and Jiang, Chen and Guo, Xin and Wang, Qiufeng and Nguyen, Anh and Qi, Yuan and Huang, Kaizhu and Cheng, Yuan}, title = {Towards a Universal 3D Medical Multi-modality Generalization via Learning Personalized Invariant Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21895-21905} }
StrandHead: Text to Hair-Disentangled 3D Head Avatars Using Human-Centric Priors: Xiaokun Sun,

Zeyu Cai,

Ying Tai,

Jian Yang,

Zhenyu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Xiaokun and Cai, Zeyu and Tai, Ying and Yang, Jian and Zhang, Zhenyu}, title = {StrandHead: Text to Hair-Disentangled 3D Head Avatars Using Human-Centric Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13393-13404} }
MCAM: Multimodal Causal Analysis Model for Ego-Vehicle-Level Driving Video Understanding: Tongtong Cheng,

Rongzhen Li,

Yixin Xiong,

Tao Zhang,

Jing Wang,

Kai Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Tongtong and Li, Rongzhen and Xiong, Yixin and Zhang, Tao and Wang, Jing and Liu, Kai}, title = {MCAM: Multimodal Causal Analysis Model for Ego-Vehicle-Level Driving Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5479-5489} }
Adversarial Robust Memory-Based Continual Learner: Xiaoyue Mi,

Fan Tang,

Zonghan Yang,

Danding Wang,

Juan Cao,

Peng Li,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mi_2025_ICCV, author = {Mi, Xiaoyue and Tang, Fan and Yang, Zonghan and Wang, Danding and Cao, Juan and Li, Peng and Liu, Yang}, title = {Adversarial Robust Memory-Based Continual Learner}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {562-572} }
SurfaceSplat: Connecting Surface Reconstruction and Gaussian Splatting: Zihui Gao,

Jia-Wang Bian,

Guosheng Lin,

Hao Chen,

Chunhua Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Zihui and Bian, Jia-Wang and Lin, Guosheng and Chen, Hao and Shen, Chunhua}, title = {SurfaceSplat: Connecting Surface Reconstruction and Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28525-28534} }
TransiT: Transient Transformer for Non-line-of-sight Videography: Ruiqian Li,

Siyuan Shen,

Suan Xia,

Ziheng Wang,

Xingyue Peng,

Chengxuan Song,

Yingsheng Zhu,

Tao Wu,

Shiying Li,

Jingyi Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Ruiqian and Shen, Siyuan and Xia, Suan and Wang, Ziheng and Peng, Xingyue and Song, Chengxuan and Zhu, Yingsheng and Wu, Tao and Li, Shiying and Yu, Jingyi}, title = {TransiT: Transient Transformer for Non-line-of-sight Videography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27542-27551} }
Penalizing Boundary Activation for Object Completeness in Diffusion Models: Haoyang Xu,

Tianhao Zhao,

Sibei Yang,

Yutian Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Haoyang and Zhao, Tianhao and Yang, Sibei and Lin, Yutian}, title = {Penalizing Boundary Activation for Object Completeness in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14962-14972} }
DMQ: Dissecting Outliers of Diffusion Models for Post-Training Quantization: Dongyeun Lee,

Jiwan Hur,

Hyounguk Shon,

Jae Young Lee,

Junmo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Dongyeun and Hur, Jiwan and Shon, Hyounguk and Lee, Jae Young and Kim, Junmo}, title = {DMQ: Dissecting Outliers of Diffusion Models for Post-Training Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18510-18520} }
QK-Edit: Revisiting Attention-based Injection in MM-DiT for Image and Video Editing: Tiancheng Shen,

Zilong Huang,

Xiangtai Li,

Zhijie Lin,

Jiyang Liu,

Yitong Wang,

Jiashi Feng,

Ming-Hsuan Yang,

Jun Hao Liew; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_ICCV, author = {Shen, Tiancheng and Huang, Zilong and Li, Xiangtai and Lin, Zhijie and Liu, Jiyang and Wang, Yitong and Feng, Jiashi and Yang, Ming-Hsuan and Liew, Jun Hao}, title = {QK-Edit: Revisiting Attention-based Injection in MM-DiT for Image and Video Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19043-19053} }
Visual Chronicles: Using Multimodal LLMs to Analyze Massive Collections of Images: Boyang Deng,

Songyou Peng,

Kyle Genova,

Gordon Wetzstein,

Noah Snavely,

Leonidas Guibas,

Thomas Funkhouser; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Boyang and Peng, Songyou and Genova, Kyle and Wetzstein, Gordon and Snavely, Noah and Guibas, Leonidas and Funkhouser, Thomas}, title = {Visual Chronicles: Using Multimodal LLMs to Analyze Massive Collections of Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12769-12778} }
SAFT: Shape and Appearance of Fabrics from Template via Differentiable Physical Simulations from Monocular Video: David Stotko,

Reinhard Klein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stotko_2025_ICCV, author = {Stotko, David and Klein, Reinhard}, title = {SAFT: Shape and Appearance of Fabrics from Template via Differentiable Physical Simulations from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27660-27670} }
Gradient Short-Circuit: Efficient Out-of-Distribution Detection via Feature Intervention: Jiawei Gu,

Ziyue Qiao,

Zechao Li; [pdf] [supp]
[bibtex]
@InProceedings{Gu_2025_ICCV, author = {Gu, Jiawei and Qiao, Ziyue and Li, Zechao}, title = {Gradient Short-Circuit: Efficient Out-of-Distribution Detection via Feature Intervention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {457-466} }
AD-GS: Object-Aware B-Spline Gaussian Splatting for Self-Supervised Autonomous Driving: Jiawei Xu,

Kai Deng,

Zexin Fan,

Shenlong Wang,

Jin Xie,

Jian Yang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Jiawei and Deng, Kai and Fan, Zexin and Wang, Shenlong and Xie, Jin and Yang, Jian}, title = {AD-GS: Object-Aware B-Spline Gaussian Splatting for Self-Supervised Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24770-24779} }
HRScene: How Far Are VLMs from Effective High-Resolution Image Understanding?: Yusen Zhang,

Wenliang Zheng,

Aashrith Madasu,

Peng Shi,

Ryo Kamoi,

Hao Zhou,

Zhuoyang Zou,

Shu Zhao,

Sarkar Snigdha Sarathi Das,

Vipul Gupta,

Xiaoxin Lu,

Nan Zhang,

Ranran Haoran Zhang,

Avitej Iyer,

Renze Lou,

Wenpeng Yin,

Rui Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yusen and Zheng, Wenliang and Madasu, Aashrith and Shi, Peng and Kamoi, Ryo and Zhou, Hao and Zou, Zhuoyang and Zhao, Shu and Das, Sarkar Snigdha Sarathi and Gupta, Vipul and Lu, Xiaoxin and Zhang, Nan and Zhang, Ranran Haoran and Iyer, Avitej and Lou, Renze and Yin, Wenpeng and Zhang, Rui}, title = {HRScene: How Far Are VLMs from Effective High-Resolution Image Understanding?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22922-22933} }
Decoding Correlation-Induced Misalignment in the Stable Diffusion Workflow for Text-to-Image Generation: Yunze Tong,

Fengda Zhang,

Didi Zhu,

Jun Xiao,

Kun Kuang; [pdf] [supp]
[bibtex]
@InProceedings{Tong_2025_ICCV, author = {Tong, Yunze and Zhang, Fengda and Zhu, Didi and Xiao, Jun and Kuang, Kun}, title = {Decoding Correlation-Induced Misalignment in the Stable Diffusion Workflow for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18187-18196} }
CityNav: A Large-Scale Dataset for Real-World Aerial Navigation: Jungdae Lee,

Taiki Miyanishi,

Shuhei Kurita,

Koya Sakamoto,

Daichi Azuma,

Yutaka Matsuo,

Nakamasa Inoue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Jungdae and Miyanishi, Taiki and Kurita, Shuhei and Sakamoto, Koya and Azuma, Daichi and Matsuo, Yutaka and Inoue, Nakamasa}, title = {CityNav: A Large-Scale Dataset for Real-World Aerial Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5912-5922} }
Neuromanifold-Regularized KANs for Shape-fair Feature Representations: Mazlum Ferhat Arslan,

Weihong Guo,

Shuo Li; [pdf] [supp]
[bibtex]
@InProceedings{Arslan_2025_ICCV, author = {Arslan, Mazlum Ferhat and Guo, Weihong and Li, Shuo}, title = {Neuromanifold-Regularized KANs for Shape-fair Feature Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12790-12799} }
Diffusion Guided Adaptive Augmentation for Generalization in Visual Reinforcement Learning: Jeong Woon Lee,

Hyoseok Hwang; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Jeong Woon and Hwang, Hyoseok}, title = {Diffusion Guided Adaptive Augmentation for Generalization in Visual Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {880-889} }
Graph Domain Adaptation with Dual-branch Encoder and Two-level Alignment for Whole Slide Image-based Survival Prediction: Yuntao Shou,

Xiangyong Cao,

Peiqiang Yan,

Qiao Hui,

Qian Zhao,

Deyu Meng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shou_2025_ICCV, author = {Shou, Yuntao and Cao, Xiangyong and Yan, Peiqiang and Hui, Qiao and Zhao, Qian and Meng, Deyu}, title = {Graph Domain Adaptation with Dual-branch Encoder and Two-level Alignment for Whole Slide Image-based Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19925-19935} }
Vision-Language Interactive Relation Mining for Open-Vocabulary Scene Graph Generation: Yukuan Min,

Muli Yang,

Jinhao Zhang,

Yuxuan Wang,

Aming Wu,

Cheng Deng; [pdf] [supp]
[bibtex]
@InProceedings{Min_2025_ICCV, author = {Min, Yukuan and Yang, Muli and Zhang, Jinhao and Wang, Yuxuan and Wu, Aming and Deng, Cheng}, title = {Vision-Language Interactive Relation Mining for Open-Vocabulary Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16755-16764} }
FedAGC: Federated Continual Learning with Asymmetric Gradient Correction: Chengchao Zhang,

Fanhua Shang,

Hongying Liu,

Liang Wan,

Wei Feng; [pdf]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Chengchao and Shang, Fanhua and Liu, Hongying and Wan, Liang and Feng, Wei}, title = {FedAGC: Federated Continual Learning with Asymmetric Gradient Correction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3841-3850} }
Free-running vs Synchronous: Single-Photon Lidar for High-flux 3D Imaging: Ruangrawee Kitichotkul,

Shashwath Bharadwaj,

Joshua Rapp,

Yanting Ma,

Alexander Mehta,

Vivek K Goyal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kitichotkul_2025_ICCV, author = {Kitichotkul, Ruangrawee and Bharadwaj, Shashwath and Rapp, Joshua and Ma, Yanting and Mehta, Alexander and Goyal, Vivek K}, title = {Free-running vs Synchronous: Single-Photon Lidar for High-flux 3D Imaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25972-25982} }
Continuous-Time Human Motion Field from Event Cameras: Ziyun Wang,

Ruijun Zhang,

Zi-Yan Liu,

Yufu Wang,

Kostas Daniilidis; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ziyun and Zhang, Ruijun and Liu, Zi-Yan and Wang, Yufu and Daniilidis, Kostas}, title = {Continuous-Time Human Motion Field from Event Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11502-11512} }
MagicMirror: ID-Preserved Video Generation in Video Diffusion Transformers: Yuechen Zhang,

Yaoyang Liu,

Bin Xia,

Bohao Peng,

Zexin Yan,

Eric Lo,

Jiaya Jia; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yuechen and Liu, Yaoyang and Xia, Bin and Peng, Bohao and Yan, Zexin and Lo, Eric and Jia, Jiaya}, title = {MagicMirror: ID-Preserved Video Generation in Video Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14464-14474} }
Unified Category-Level Object Detection and Pose Estimation from RGB Images using 3D Prototypes: Tom Fischer,

Xiaojie Zhang,

Eddy Ilg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fischer_2025_ICCV, author = {Fischer, Tom and Zhang, Xiaojie and Ilg, Eddy}, title = {Unified Category-Level Object Detection and Pose Estimation from RGB Images using 3D Prototypes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9790-9800} }
GT-Mean Loss: A Simple Yet Effective Solution for Brightness Mismatch in Low-Light Image Enhancement: Jingxi Liao,

Shijie Hao,

Richang Hong,

Meng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Jingxi and Hao, Shijie and Hong, Richang and Wang, Meng}, title = {GT-Mean Loss: A Simple Yet Effective Solution for Brightness Mismatch in Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6112-6121} }
OpenVision: A Fully-Open, Cost-Effective Family of Advanced Vision Encoders for Multimodal Learning: Xianhang Li,

Yanqing Liu,

Haoqin Tu,

Cihang Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xianhang and Liu, Yanqing and Tu, Haoqin and Xie, Cihang}, title = {OpenVision: A Fully-Open, Cost-Effective Family of Advanced Vision Encoders for Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3977-3987} }
Unified Multimodal Understanding via Byte-Pair Visual Encoding: Wanpeng Zhang,

Yicheng Feng,

Hao Luo,

Yijiang Li,

Zihao Yue,

Sipeng Zheng,

Zongqing Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wanpeng and Feng, Yicheng and Luo, Hao and Li, Yijiang and Yue, Zihao and Zheng, Sipeng and Lu, Zongqing}, title = {Unified Multimodal Understanding via Byte-Pair Visual Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12976-12986} }
MobileIE: An Extremely Lightweight and Effective ConvNet for Real-Time Image Enhancement on Mobile Devices: Hailong Yan,

Ao Li,

Xiangtao Zhang,

Zhe Liu,

Zenglin Shi,

Ce Zhu,

Le Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Hailong and Li, Ao and Zhang, Xiangtao and Liu, Zhe and Shi, Zenglin and Zhu, Ce and Zhang, Le}, title = {MobileIE: An Extremely Lightweight and Effective ConvNet for Real-Time Image Enhancement on Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21949-21960} }
Learning Pixel-adaptive Multi-layer Perceptrons for Real-time Image Enhancement: Junyu Lou,

Xiaorui Zhao,

Kexuan Shi,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lou_2025_ICCV, author = {Lou, Junyu and Zhao, Xiaorui and Shi, Kexuan and Gu, Shuhang}, title = {Learning Pixel-adaptive Multi-layer Perceptrons for Real-time Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14095-14105} }
ACE-G: Improving Generalization of Scene Coordinate Regression Through Query Pre-Training: Leonard Bruns,

Axel Barroso-Laguna,

Tommaso Cavallari,

Aron Monszpart,

Sowmya Munukutla,

Victor Adrian Prisacariu,

Eric Brachmann; [pdf] [supp]
[bibtex]
@InProceedings{Bruns_2025_ICCV, author = {Bruns, Leonard and Barroso-Laguna, Axel and Cavallari, Tommaso and Monszpart, Aron and Munukutla, Sowmya and Prisacariu, Victor Adrian and Brachmann, Eric}, title = {ACE-G: Improving Generalization of Scene Coordinate Regression Through Query Pre-Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26751-26761} }
Everything is a Video: Unifying Modalities through Next-Frame Prediction: G. Thomas Hudson,

Dean Slack,

Thomas Winterbottom,

Jamie Sterling,

Chenghao Xiao,

Junjie Shentu,

Noura Al Moubayed; [pdf] [arXiv]
[bibtex]
@InProceedings{Hudson_2025_ICCV, author = {Hudson, G. Thomas and Slack, Dean and Winterbottom, Thomas and Sterling, Jamie and Xiao, Chenghao and Shentu, Junjie and Al Moubayed, Noura}, title = {Everything is a Video: Unifying Modalities through Next-Frame Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22004-22013} }
ViT-EnsembleAttack: Augmenting Ensemble Models for Stronger Adversarial Transferability in Vision Transformers: Hanwen Cao,

Haobo Lu,

Xiaosen Wang,

Kun He; [pdf]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Hanwen and Lu, Haobo and Wang, Xiaosen and He, Kun}, title = {ViT-EnsembleAttack: Augmenting Ensemble Models for Stronger Adversarial Transferability in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2000-2009} }
LayerTracer: Cognitive-Aligned Layered SVG Synthesis via Diffusion Transformer: Yiren Song,

Danze Chen,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_ICCV, author = {Song, Yiren and Chen, Danze and Shou, Mike Zheng}, title = {LayerTracer: Cognitive-Aligned Layered SVG Synthesis via Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19731-19741} }
TopoTTA: Topology-Enhanced Test-Time Adaptation for Tubular Structure Segmentation: Jiale Zhou,

Wenhan Wang,

Shikun Li,

Xiaolei Qu,

Xin Guo,

Yizhong Liu,

Wenzhong Tang,

Xun Lin,

Yefeng Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Jiale and Wang, Wenhan and Li, Shikun and Qu, Xiaolei and Guo, Xin and Liu, Yizhong and Tang, Wenzhong and Lin, Xun and Zheng, Yefeng}, title = {TopoTTA: Topology-Enhanced Test-Time Adaptation for Tubular Structure Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24123-24134} }
Counting Stacked Objects: Corentin Dumery,

Noa Etté,

Aoxiang Fan,

Ren Li,

Jingyi Xu,

Hieu Le,

Pascal Fua; [pdf] [supp]
[bibtex]
@InProceedings{Dumery_2025_ICCV, author = {Dumery, Corentin and Ett\'e, Noa and Fan, Aoxiang and Li, Ren and Xu, Jingyi and Le, Hieu and Fua, Pascal}, title = {Counting Stacked Objects}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19774-19783} }
VOccl3D: A Video Benchmark Dataset for 3D Human Pose and Shape Estimation under real Occlusions: Yash Garg,

Saketh Bachu,

Arindam Dutta,

Rohit Lal,

Sarosij Bose,

Calvin-Khang Ta,

M. Salman Asif,

Amit Roy-Chowdhury; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Garg_2025_ICCV, author = {Garg, Yash and Bachu, Saketh and Dutta, Arindam and Lal, Rohit and Bose, Sarosij and Ta, Calvin-Khang and Asif, M. Salman and Roy-Chowdhury, Amit}, title = {VOccl3D: A Video Benchmark Dataset for 3D Human Pose and Shape Estimation under real Occlusions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7350-7360} }
PoseAnchor: Robust Root Position Estimation for 3D Human Pose Estimation: Jun-Hee Kim,

Jumin Han,

Seong-Whan Lee; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jun-Hee and Han, Jumin and Lee, Seong-Whan}, title = {PoseAnchor: Robust Root Position Estimation for 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7079-7088} }
Sparfels: Fast Reconstruction from Sparse Unposed Imagery: Shubhendu Jena,

Amine Ouasfi,

Mae Younes,

Adnane Boukhayma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jena_2025_ICCV, author = {Jena, Shubhendu and Ouasfi, Amine and Younes, Mae and Boukhayma, Adnane}, title = {Sparfels: Fast Reconstruction from Sparse Unposed Imagery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27476-27487} }
Unsupervised Visual Chain-of-Thought Reasoning via Preference Optimization: Kesen Zhao,

Beier Zhu,

Qianru Sun,

Hanwang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Kesen and Zhu, Beier and Sun, Qianru and Zhang, Hanwang}, title = {Unsupervised Visual Chain-of-Thought Reasoning via Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2303-2312} }
On the Complexity-Faithfulness Trade-off of Gradient-Based Explanations: Amir Mehrpanah,

Matteo Gamba,

Kevin Smith,

Hossein Azizpour; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mehrpanah_2025_ICCV, author = {Mehrpanah, Amir and Gamba, Matteo and Smith, Kevin and Azizpour, Hossein}, title = {On the Complexity-Faithfulness Trade-off of Gradient-Based Explanations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3531-3541} }
Adaptive Dual Uncertainty Optimization: Boosting Monocular 3D Object Detection under Test-Time Shifts: Zixuan Hu,

Dongxiao Li,

Xinzhu Ma,

Shixiang Tang,

Xiaotong Li,

Wenhan Yang,

Ling-Yu Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Zixuan and Li, Dongxiao and Ma, Xinzhu and Tang, Shixiang and Li, Xiaotong and Yang, Wenhan and Duan, Ling-Yu}, title = {Adaptive Dual Uncertainty Optimization: Boosting Monocular 3D Object Detection under Test-Time Shifts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7273-7283} }
RayPose: Ray Bundling Diffusion for Template Views in Unseen 6D Object Pose Estimation: Junwen Huang,

Shishir Reddy Vutukur,

Peter KT Yu,

Nassir Navab,

Slobodan Ilic,

Benjamin Busam; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Junwen and Vutukur, Shishir Reddy and Yu, Peter KT and Navab, Nassir and Ilic, Slobodan and Busam, Benjamin}, title = {RayPose: Ray Bundling Diffusion for Template Views in Unseen 6D Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9102-9112} }
DuoLoRA : Cycle-consistent and Rank-disentangled Content-Style Personalization: Aniket Roy,

Shubhankar Borse,

Shreya Kadambi,

Debasmit Das,

Shweta Mahajan,

Risheek Garrepalli,

Hyojin Park,

Ankita Nayak,

Rama Chellappa,

Munawar Hayat,

Fatih Porikli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Roy_2025_ICCV, author = {Roy, Aniket and Borse, Shubhankar and Kadambi, Shreya and Das, Debasmit and Mahajan, Shweta and Garrepalli, Risheek and Park, Hyojin and Nayak, Ankita and Chellappa, Rama and Hayat, Munawar and Porikli, Fatih}, title = {DuoLoRA : Cycle-consistent and Rank-disentangled Content-Style Personalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15395-15404} }
F-Bench: Rethinking Human Preference Evaluation Metrics for Benchmarking Face Generation, Customization, and Restoration: Lu Liu,

Huiyu Duan,

Qiang Hu,

Liu Yang,

Chunlei Cai,

Tianxiao Ye,

Huayu Liu,

Xiaoyun Zhang,

Guangtao Zhai; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Lu and Duan, Huiyu and Hu, Qiang and Yang, Liu and Cai, Chunlei and Ye, Tianxiao and Liu, Huayu and Zhang, Xiaoyun and Zhai, Guangtao}, title = {F-Bench: Rethinking Human Preference Evaluation Metrics for Benchmarking Face Generation, Customization, and Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10982-10994} }
MikuDance: Animating Character Art with Mixed Motion Dynamics: Jiaxu Zhang,

Xianfang Zeng,

Xin Chen,

Wei Zuo,

Gang Yu,

Zhigang Tu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jiaxu and Zeng, Xianfang and Chen, Xin and Zuo, Wei and Yu, Gang and Tu, Zhigang}, title = {MikuDance: Animating Character Art with Mixed Motion Dynamics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19689-19699} }
GaussRender: Learning 3D Occupancy with Gaussian Rendering: Loick Chambon,

Eloi Zablocki,

Alexandre Boulch,

Mickael Chen,

Matthieu Cord; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chambon_2025_ICCV, author = {Chambon, Loick and Zablocki, Eloi and Boulch, Alexandre and Chen, Mickael and Cord, Matthieu}, title = {GaussRender: Learning 3D Occupancy with Gaussian Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27010-27020} }
UniDxMD: Towards Unified Representation for Cross-Modal Unsupervised Domain Adaptation in 3D Semantic Segmentation: Zhengyin Liang,

Hui Yin,

Min Liang,

Qianqian Du,

Ying Yang,

Hua Huang; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Zhengyin and Yin, Hui and Liang, Min and Du, Qianqian and Yang, Ying and Huang, Hua}, title = {UniDxMD: Towards Unified Representation for Cross-Modal Unsupervised Domain Adaptation in 3D Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20346-20356} }
GFPack++: Attention-Driven Gradient Fields for Optimizing 2D Irregular Packing: Tianyang Xue,

Lin Lu,

Yang Liu,

Mingdong Wu,

Hao Dong,

Yanbin Zhang,

Renmin Han,

Baoquan Chen; [pdf] [supp]
[bibtex]
@InProceedings{Xue_2025_ICCV, author = {Xue, Tianyang and Lu, Lin and Liu, Yang and Wu, Mingdong and Dong, Hao and Zhang, Yanbin and Han, Renmin and Chen, Baoquan}, title = {GFPack++: Attention-Driven Gradient Fields for Optimizing 2D Irregular Packing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18014-18023} }
PLA: Prompt Learning Attack against Text-to-Image Generative Models: Xinqi Lyu,

Yihao Liu,

Yanjie Li,

Bin Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2025_ICCV, author = {Lyu, Xinqi and Liu, Yihao and Li, Yanjie and Xiao, Bin}, title = {PLA: Prompt Learning Attack against Text-to-Image Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16851-16860} }
HumanOLAT: A Large-Scale Dataset for Full-Body Human Relighting and Novel-View Synthesis: Timo Teufel,

Pulkit Gera,

Xilong Zhou,

Umar Iqbal,

Pramod Rao,

Jan Kautz,

Vladislav Golyanik,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Teufel_2025_ICCV, author = {Teufel, Timo and Gera, Pulkit and Zhou, Xilong and Iqbal, Umar and Rao, Pramod and Kautz, Jan and Golyanik, Vladislav and Theobalt, Christian}, title = {HumanOLAT: A Large-Scale Dataset for Full-Body Human Relighting and Novel-View Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29131-29141} }
Integrating Task-Specific and Universal Adapters for Pre-Trained Model-based Class-Incremental Learning: Yan Wang,

Da-Wei Zhou,

Han-Jia Ye; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yan and Zhou, Da-Wei and Ye, Han-Jia}, title = {Integrating Task-Specific and Universal Adapters for Pre-Trained Model-based Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {806-816} }
RoboFactory: Exploring Embodied Agent Collaboration with Compositional Constraints: Yiran Qin,

Li Kang,

Xiufeng Song,

Zhenfei Yin,

Xiaohong Liu,

Xihui Liu,

Ruimao Zhang,

Lei Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Yiran and Kang, Li and Song, Xiufeng and Yin, Zhenfei and Liu, Xiaohong and Liu, Xihui and Zhang, Ruimao and Bai, Lei}, title = {RoboFactory: Exploring Embodied Agent Collaboration with Compositional Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10075-10085} }
Momentum-GS: Momentum Gaussian Self-Distillation for High-Quality Large Scene Reconstruction: Jixuan Fan,

Wanhua Li,

Yifei Han,

Tianru Dai,

Yansong Tang; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Jixuan and Li, Wanhua and Han, Yifei and Dai, Tianru and Tang, Yansong}, title = {Momentum-GS: Momentum Gaussian Self-Distillation for High-Quality Large Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25250-25260} }
SEHDR: Single-Exposure HDR Novel View Synthesis via 3D Gaussian Bracketing: Yiyu Li,

Haoyuan Wang,

Ke Xu,

Gerhard Petrus Hancke,

Rynson W.H. Lau; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yiyu and Wang, Haoyuan and Xu, Ke and Hancke, Gerhard Petrus and Lau, Rynson W.H.}, title = {SEHDR: Single-Exposure HDR Novel View Synthesis via 3D Gaussian Bracketing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26045-26054} }
TriDi: Trilateral Diffusion of 3D Humans, Objects, and Interactions: Ilya A. Petrov,

Riccardo Marin,

Julian Chibane,

Gerard Pons-Moll; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Petrov_2025_ICCV, author = {Petrov, Ilya A. and Marin, Riccardo and Chibane, Julian and Pons-Moll, Gerard}, title = {TriDi: Trilateral Diffusion of 3D Humans, Objects, and Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5523-5535} }
GEOBench-VLM: Benchmarking Vision-Language Models for Geospatial Tasks: Muhammad Danish,

Muhammad Akhtar Munir,

Syed Roshaan Ali Shah,

Kartik Kuckreja,

Fahad Shahbaz Khan,

Paolo Fraccaro,

Alexandre Lacoste,

Salman Khan; [pdf] [supp]
[bibtex]
@InProceedings{Danish_2025_ICCV, author = {Danish, Muhammad and Munir, Muhammad Akhtar and Shah, Syed Roshaan Ali and Kuckreja, Kartik and Khan, Fahad Shahbaz and Fraccaro, Paolo and Lacoste, Alexandre and Khan, Salman}, title = {GEOBench-VLM: Benchmarking Vision-Language Models for Geospatial Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7132-7142} }
UPP: Unified Point-Level Prompting for Robust Point Cloud Analysis: Zixiang Ai,

Zhenyu Cui,

Yuxin Peng,

Jiahuan Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ai_2025_ICCV, author = {Ai, Zixiang and Cui, Zhenyu and Peng, Yuxin and Zhou, Jiahuan}, title = {UPP: Unified Point-Level Prompting for Robust Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27359-27368} }
HypDAE: Hyperbolic Diffusion Autoencoders for Hierarchical Few-shot Image Generation: Lingxiao Li,

Kaixuan Fan,

Boqing Gong,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Lingxiao and Fan, Kaixuan and Gong, Boqing and Yue, Xiangyu}, title = {HypDAE: Hyperbolic Diffusion Autoencoders for Hierarchical Few-shot Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17119-17128} }
High-Resolution Spatiotemporal Modeling with Global-Local State Space Models for Video-Based Human Pose Estimation: Runyang Feng,

Hyung Jin Chang,

Tze Ho Elden Tse,

Boeun Kim,

Yi Chang,

Yixing Gao; [pdf] [arXiv]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Runyang and Chang, Hyung Jin and Tse, Tze Ho Elden and Kim, Boeun and Chang, Yi and Gao, Yixing}, title = {High-Resolution Spatiotemporal Modeling with Global-Local State Space Models for Video-Based Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8929-8938} }
ShadowHack: Hacking Shadows via Luminance-Color Divide and Conquer: Jin Hu,

Mingjia Li,

Xiaojie Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Jin and Li, Mingjia and Guo, Xiaojie}, title = {ShadowHack: Hacking Shadows via Luminance-Color Divide and Conquer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11403-11413} }
REPA-E: Unlocking VAE for End-to-End Tuning of Latent Diffusion Transformers: Xingjian Leng,

Jaskirat Singh,

Yunzhong Hou,

Zhenchang Xing,

Saining Xie,

Liang Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Leng_2025_ICCV, author = {Leng, Xingjian and Singh, Jaskirat and Hou, Yunzhong and Xing, Zhenchang and Xie, Saining and Zheng, Liang}, title = {REPA-E: Unlocking VAE for End-to-End Tuning of Latent Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18262-18272} }
MOSAIC: Generating Consistent, Privacy-Preserving Scenes from Multiple Depth Views in Multi-Room Environments: Zhixuan Liu,

Haokun Zhu,

Rui Chen,

Jonathan Francis,

Soonmin Hwang,

Ji Zhang,

Jean Oh; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Zhixuan and Zhu, Haokun and Chen, Rui and Francis, Jonathan and Hwang, Soonmin and Zhang, Ji and Oh, Jean}, title = {MOSAIC: Generating Consistent, Privacy-Preserving Scenes from Multiple Depth Views in Multi-Room Environments}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27456-27465} }
RoBridge: A Hierarchical Architecture Bridging Cognition and Execution for General Robotic Manipulation: Kaidong Zhang,

Rongtao Xu,

Pengzhen Ren,

Junfan Lin,

Hefeng Wu,

Liang Lin,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Kaidong and Xu, Rongtao and Ren, Pengzhen and Lin, Junfan and Wu, Hefeng and Lin, Liang and Liang, Xiaodan}, title = {RoBridge: A Hierarchical Architecture Bridging Cognition and Execution for General Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14590-14601} }
PersonaCraft: Personalized and Controllable Full-Body Multi-Human Scene Generation Using Occlusion-Aware 3D-Conditioned Diffusion: Gwanghyun Kim,

Suh Yoon Jeon,

Seunggyu Lee,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Gwanghyun and Jeon, Suh Yoon and Lee, Seunggyu and Chun, Se Young}, title = {PersonaCraft: Personalized and Controllable Full-Body Multi-Human Scene Generation Using Occlusion-Aware 3D-Conditioned Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12034-12044} }
Controllable and Expressive One-Shot Video Head Swapping: Chaonan Ji,

Jinwei Qi,

Peng Zhang,

Bang Zhang,

Liefeng Bo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Chaonan and Qi, Jinwei and Zhang, Peng and Zhang, Bang and Bo, Liefeng}, title = {Controllable and Expressive One-Shot Video Head Swapping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10239-10250} }
SkySense V2: A Unified Foundation Model for Multi-modal Remote Sensing: Yingying Zhang,

Lixiang Ru,

Kang Wu,

Lei Yu,

Lei Liang,

Yansheng Li,

Jingdong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yingying and Ru, Lixiang and Wu, Kang and Yu, Lei and Liang, Lei and Li, Yansheng and Chen, Jingdong}, title = {SkySense V2: A Unified Foundation Model for Multi-modal Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9136-9146} }
Towards Video Thinking Test: A Holistic Benchmark for Advanced Video Reasoning and Understanding: Yuanhan Zhang,

Yunice Chew,

Yuhao Dong,

Aria Leo,

Bo Hu,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yuanhan and Chew, Yunice and Dong, Yuhao and Leo, Aria and Hu, Bo and Liu, Ziwei}, title = {Towards Video Thinking Test: A Holistic Benchmark for Advanced Video Reasoning and Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20626-20636} }
Intra-view and Inter-view Correlation Guided Multi-view Novel Class Discovery: Xinhang Wan,

Jiyuan Liu,

Qian Qu,

Suyuan Liu,

Chuyu Zhang,

Fangdi Wang,

Xinwang Liu,

En Zhu,

Kunlun He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wan_2025_ICCV, author = {Wan, Xinhang and Liu, Jiyuan and Qu, Qian and Liu, Suyuan and Zhang, Chuyu and Wang, Fangdi and Liu, Xinwang and Zhu, En and He, Kunlun}, title = {Intra-view and Inter-view Correlation Guided Multi-view Novel Class Discovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4114-4124} }
Decoupled Diffusion Sparks Adaptive Scene Generation: Yunsong Zhou,

Naisheng Ye,

William Ljungbergh,

Tianyu Li,

Jiazhi Yang,

Zetong Yang,

Hongzi Zhu,

Christoffer Petersson,

Hongyang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Yunsong and Ye, Naisheng and Ljungbergh, William and Li, Tianyu and Yang, Jiazhi and Yang, Zetong and Zhu, Hongzi and Petersson, Christoffer and Li, Hongyang}, title = {Decoupled Diffusion Sparks Adaptive Scene Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27760-27770} }
Unsupervised Joint Learning of Optical Flow and Intensity with Event Cameras: Shuang Guo,

Friedhelm Hamann,

Guillermo Gallego; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Shuang and Hamann, Friedhelm and Gallego, Guillermo}, title = {Unsupervised Joint Learning of Optical Flow and Intensity with Event Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7980-7989} }
Intra-modal and Cross-modal Synchronization for Audio-visual Deepfake Detection and Temporal Localization: Ashutosh Anshul,

Shreyas Gopal,

Deepu Rajan,

Eng Siong Chng; [pdf] [supp]
[bibtex]
@InProceedings{Anshul_2025_ICCV, author = {Anshul, Ashutosh and Gopal, Shreyas and Rajan, Deepu and Chng, Eng Siong}, title = {Intra-modal and Cross-modal Synchronization for Audio-visual Deepfake Detection and Temporal Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13826-13836} }
YOLO-Count: Differentiable Object Counting for Text-to-Image Generation: Guanning Zeng,

Xiang Zhang,

Zirui Wang,

Haiyang Xu,

Zeyuan Chen,

Bingnan Li,

Zhuowen Tu; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Guanning and Zhang, Xiang and Wang, Zirui and Xu, Haiyang and Chen, Zeyuan and Li, Bingnan and Tu, Zhuowen}, title = {YOLO-Count: Differentiable Object Counting for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16765-16775} }
PriorMotion: Generative Class-Agnostic Motion Prediction with Raster-Vector Motion Field Priors: Kangan Qian,

Jinyu Miao,

Xinyu Jiao,

Ziang Luo,

Zheng Fu,

Yining Shi,

Yunlong Wang,

Kun Jiang,

Diange Yang; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2025_ICCV, author = {Qian, Kangan and Miao, Jinyu and Jiao, Xinyu and Luo, Ziang and Fu, Zheng and Shi, Yining and Wang, Yunlong and Jiang, Kun and Yang, Diange}, title = {PriorMotion: Generative Class-Agnostic Motion Prediction with Raster-Vector Motion Field Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27284-27294} }
Scaling Tumor Segmentation: Best Lessons from Real and Synthetic Data: Qi Chen,

Xinze Zhou,

Chen Liu,

Hao Chen,

Wenxuan Li,

Zekun Jiang,

Ziyan Huang,

Yuxuan Zhao,

Dexin Yu,

Junjun He,

Yefeng Zheng,

Ling Shao,

Alan Yuille,

Zongwei Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Qi and Zhou, Xinze and Liu, Chen and Chen, Hao and Li, Wenxuan and Jiang, Zekun and Huang, Ziyan and Zhao, Yuxuan and Yu, Dexin and He, Junjun and Zheng, Yefeng and Shao, Ling and Yuille, Alan and Zhou, Zongwei}, title = {Scaling Tumor Segmentation: Best Lessons from Real and Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24001-24013} }
DCT-Shield: A Robust Frequency Domain Defense against Malicious Image Editing: Aniruddha Bala,

Rohit Chowdhury,

Rohan Jaiswal,

Siddharth Roheda; [pdf] [supp]
[bibtex]
@InProceedings{Bala_2025_ICCV, author = {Bala, Aniruddha and Chowdhury, Rohit and Jaiswal, Rohan and Roheda, Siddharth}, title = {DCT-Shield: A Robust Frequency Domain Defense against Malicious Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18876-18884} }
MobileViCLIP: An Efficient Video-Text Model for Mobile Devices: Min Yang,

Zihan Jia,

Zhilin Dai,

Sheng Guo,

Limin Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Min and Jia, Zihan and Dai, Zhilin and Guo, Sheng and Wang, Limin}, title = {MobileViCLIP: An Efficient Video-Text Model for Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20824-20835} }
SignRep: Enhancing Self-Supervised Sign Representations: Ryan Wong,

Necati Cihan Camgoz,

Richard Bowden; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wong_2025_ICCV, author = {Wong, Ryan and Camgoz, Necati Cihan and Bowden, Richard}, title = {SignRep: Enhancing Self-Supervised Sign Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22804-22814} }
Towards Comprehensive Lecture Slides Understanding: Large-scale Dataset and Effective Method: Enming Zhang,

Yuzhe Li,

Yuliang Liu,

Yingying Zhu,

Xiang Bai; [pdf]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Enming and Li, Yuzhe and Liu, Yuliang and Zhu, Yingying and Bai, Xiang}, title = {Towards Comprehensive Lecture Slides Understanding: Large-scale Dataset and Effective Method}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4455-4464} }
Visual Relation Diffusion for Human-Object Interaction Detection: Ping Cao,

Yepeng Tang,

Chunjie Zhang,

Xiaolong Zheng,

Chao Liang,

Yunchao Wei,

Yao Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Ping and Tang, Yepeng and Zhang, Chunjie and Zheng, Xiaolong and Liang, Chao and Wei, Yunchao and Zhao, Yao}, title = {Visual Relation Diffusion for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23551-23560} }
Egocentric Action-aware Inertial Localization in Point Clouds with Vision-Language Guidance: Mingfang Zhang,

Ryo Yonetani,

Yifei Huang,

Liangyang Ouyang,

Ruicong Liu,

Yoichi Sato; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Mingfang and Yonetani, Ryo and Huang, Yifei and Ouyang, Liangyang and Liu, Ruicong and Sato, Yoichi}, title = {Egocentric Action-aware Inertial Localization in Point Clouds with Vision-Language Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27209-27219} }
Hybrid-TTA: Continual Test-time Adaptation via Dynamic Domain Shift Detection: Hyewon Park,

Hyejin Park,

Jueun Ko,

Dongbo Min; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Hyewon and Park, Hyejin and Ko, Jueun and Min, Dongbo}, title = {Hybrid-TTA: Continual Test-time Adaptation via Dynamic Domain Shift Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2877-2886} }
NavQ: Learning a Q-Model for Foresighted Vision-and-Language Navigation: Peiran Xu,

Xicheng Gong,

Yadong Mu; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Peiran and Gong, Xicheng and Mu, Yadong}, title = {NavQ: Learning a Q-Model for Foresighted Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6327-6341} }
PartField: Learning 3D Feature Fields for Part Segmentation and Beyond: Minghua Liu,

Mikaela Angelina Uy,

Donglai Xiang,

Hao Su,

Sanja Fidler,

Nicholas Sharp,

Jun Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Minghua and Uy, Mikaela Angelina and Xiang, Donglai and Su, Hao and Fidler, Sanja and Sharp, Nicholas and Gao, Jun}, title = {PartField: Learning 3D Feature Fields for Part Segmentation and Beyond}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9704-9715} }
Anti-Tamper Protection for Unauthorized Individual Image Generation: Zelin Li,

Ruohan Zong,

Yifan Liu,

Ruichen Yao,

Yaokun Liu,

Yang Zhang,

Dong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zelin and Zong, Ruohan and Liu, Yifan and Yao, Ruichen and Liu, Yaokun and Zhang, Yang and Wang, Dong}, title = {Anti-Tamper Protection for Unauthorized Individual Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15501-15510} }
EgoAdapt: Adaptive Multisensory Distillation and Policy Learning for Efficient Egocentric Perception: Sanjoy Chowdhury,

Subrata Biswas,

Sayan Nag,

Tushar Nagarajan,

Calvin Murdock,

Ishwarya Ananthabhotla,

Yijun Qian,

Vamsi Krishna Ithapu,

Dinesh Manocha,

Ruohan Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chowdhury_2025_ICCV, author = {Chowdhury, Sanjoy and Biswas, Subrata and Nag, Sayan and Nagarajan, Tushar and Murdock, Calvin and Ananthabhotla, Ishwarya and Qian, Yijun and Ithapu, Vamsi Krishna and Manocha, Dinesh and Gao, Ruohan}, title = {EgoAdapt: Adaptive Multisensory Distillation and Policy Learning for Efficient Egocentric Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10741-10752} }
EVT: Efficient View Transformation for Multi-Modal 3D Object Detection: Yongjin Lee,

Hyeon-Mun Jeong,

Yurim Jeon,

Sanghyun Kim; [pdf] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Yongjin and Jeong, Hyeon-Mun and Jeon, Yurim and Kim, Sanghyun}, title = {EVT: Efficient View Transformation for Multi-Modal 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26632-26642} }
Mitigating Object Hallucinations via Sentence-Level Early Intervention: Shangpin Peng,

Senqiao Yang,

Li Jiang,

Zhuotao Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_ICCV, author = {Peng, Shangpin and Yang, Senqiao and Jiang, Li and Tian, Zhuotao}, title = {Mitigating Object Hallucinations via Sentence-Level Early Intervention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {635-646} }
RobAVA: A Large-scale Dataset and Baseline Towards Video based Robotic Arm Action Understanding: Baoli Sun,

Ning Wang,

Xinzhu Ma,

Anqi Zou,

Yihang Lu,

Chuixuan Fan,

Zhihui Wang,

Kun Lu,

Zhiyong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Baoli and Wang, Ning and Ma, Xinzhu and Zou, Anqi and Lu, Yihang and Fan, Chuixuan and Wang, Zhihui and Lu, Kun and Wang, Zhiyong}, title = {RobAVA: A Large-scale Dataset and Baseline Towards Video based Robotic Arm Action Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13985-13994} }
DisenQ: Disentangling Q-Former for Activity-Biometrics: Shehreen Azad,

Yogesh Singh Rawat; [pdf] [supp]
[bibtex]
@InProceedings{Azad_2025_ICCV, author = {Azad, Shehreen and Rawat, Yogesh Singh}, title = {DisenQ: Disentangling Q-Former for Activity-Biometrics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13502-13512} }
Vision-Language Neural Graph Featurization for Extracting Retinal Lesions: Taimur Hassan,

Anabia Sohail,

Muzammal Naseer,

Naoufel Werghi; [pdf]
[bibtex]
@InProceedings{Hassan_2025_ICCV, author = {Hassan, Taimur and Sohail, Anabia and Naseer, Muzammal and Werghi, Naoufel}, title = {Vision-Language Neural Graph Featurization for Extracting Retinal Lesions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23700-23709} }
PhysSplat: Efficient Physics Simulation for 3D Scenes via MLLM-Guided Gaussian Splatting: Haoyu Zhao,

Hao Wang,

Xingyue Zhao,

Hao Fei,

Hongqiu Wang,

Chengjiang Long,

Hua Zou; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Haoyu and Wang, Hao and Zhao, Xingyue and Fei, Hao and Wang, Hongqiu and Long, Chengjiang and Zou, Hua}, title = {PhysSplat: Efficient Physics Simulation for 3D Scenes via MLLM-Guided Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5242-5252} }
Zero-Shot Depth Aware Image Editing with Diffusion Models: Rishubh Parihar,

Sachidanand VS,

R. Venkatesh Babu; [pdf] [supp]
[bibtex]
@InProceedings{Parihar_2025_ICCV, author = {Parihar, Rishubh and VS, Sachidanand and Babu, R. Venkatesh}, title = {Zero-Shot Depth Aware Image Editing with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15748-15759} }
OrderChain: Towards General Instruct-Tuning for Stimulating the Ordinal Understanding Ability of MLLM: Jinhong Wang,

Shuo Tong,

Jian Liu,

Dongqi Tang,

Weiqiang Wang,

Wentong Li,

Hongxia Xu,

Danny Z. Chen,

Jintai Chen,

Jian Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jinhong and Tong, Shuo and Liu, Jian and Tang, Dongqi and Wang, Weiqiang and Li, Wentong and Xu, Hongxia and Chen, Danny Z. and Chen, Jintai and Wu, Jian}, title = {OrderChain: Towards General Instruct-Tuning for Stimulating the Ordinal Understanding Ability of MLLM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3477-3487} }
V2XPnP: Vehicle-to-Everything Spatio-Temporal Fusion for Multi-Agent Perception and Prediction: Zewei Zhou,

Hao Xiang,

Zhaoliang Zheng,

Seth Z. Zhao,

Mingyue Lei,

Yun Zhang,

Tianhui Cai,

Xinyi Liu,

Johnson Liu,

Maheswari Bajji,

Xin Xia,

Zhiyu Huang,

Bolei Zhou,

Jiaqi Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zewei and Xiang, Hao and Zheng, Zhaoliang and Zhao, Seth Z. and Lei, Mingyue and Zhang, Yun and Cai, Tianhui and Liu, Xinyi and Liu, Johnson and Bajji, Maheswari and Xia, Xin and Huang, Zhiyu and Zhou, Bolei and Ma, Jiaqi}, title = {V2XPnP: Vehicle-to-Everything Spatio-Temporal Fusion for Multi-Agent Perception and Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25399-25409} }
Scaling Action Detection: AdaTAD++ with Transformer-Enhanced Temporal-Spatial Adaptation: Tanay Agrawal,

Abid Ali,

Antitza Dantcheva,

Francois Bremond; [pdf]
[bibtex]
@InProceedings{Agrawal_2025_ICCV, author = {Agrawal, Tanay and Ali, Abid and Dantcheva, Antitza and Bremond, Francois}, title = {Scaling Action Detection: AdaTAD++ with Transformer-Enhanced Temporal-Spatial Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12222-12231} }
Fix-CLIP: Dual-Branch Hierarchical Contrastive Learning via Synthetic Captions for Better Understanding of Long Text: Bingchao Wang,

Zhiwei Ning,

Jianyu Ding,

Xuanang Gao,

Yin Li,

Dongsheng Jiang,

Jie Yang,

Wei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Bingchao and Ning, Zhiwei and Ding, Jianyu and Gao, Xuanang and Li, Yin and Jiang, Dongsheng and Yang, Jie and Liu, Wei}, title = {Fix-CLIP: Dual-Branch Hierarchical Contrastive Learning via Synthetic Captions for Better Understanding of Long Text}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20694-20704} }
MotionCtrl: A Real-time Controllable Vision-Language-Motion Model: Bin Cao,

Sipeng Zheng,

Ye Wang,

Lujie Xia,

Qianshan Wei,

Qin Jin,

Jing Liu,

Zongqing Lu; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Bin and Zheng, Sipeng and Wang, Ye and Xia, Lujie and Wei, Qianshan and Jin, Qin and Liu, Jing and Lu, Zongqing}, title = {MotionCtrl: A Real-time Controllable Vision-Language-Motion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12253-12262} }
Benchmarking Multimodal Large Language Models Against Image Corruptions: Xinkuan Qiu,

Meina Kan,

Yongbin Zhou,

Shiguang Shan; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2025_ICCV, author = {Qiu, Xinkuan and Kan, Meina and Zhou, Yongbin and Shan, Shiguang}, title = {Benchmarking Multimodal Large Language Models Against Image Corruptions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9014-9023} }
ETVA: Evaluation of Text-to-Video Alignment via Fine-grained Question Generation and Answering: Kaisi Guan,

Zhengfeng Lai,

Yuchong Sun,

Peng Zhang,

Wei Liu,

Kieran Liu,

Meng Cao,

Ruihua Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2025_ICCV, author = {Guan, Kaisi and Lai, Zhengfeng and Sun, Yuchong and Zhang, Peng and Liu, Wei and Liu, Kieran and Cao, Meng and Song, Ruihua}, title = {ETVA: Evaluation of Text-to-Video Alignment via Fine-grained Question Generation and Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21299-21309} }
Adaptive Routing of Text-to-Image Generation Requests Between Large Cloud Model and Light-Weight Edge Model: Zewei Xin,

Qinya Li,

Chaoyue Niu,

Fan Wu,

Guihai Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xin_2025_ICCV, author = {Xin, Zewei and Li, Qinya and Niu, Chaoyue and Wu, Fan and Chen, Guihai}, title = {Adaptive Routing of Text-to-Image Generation Requests Between Large Cloud Model and Light-Weight Edge Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19482-19491} }
Outdoor Monocular SLAM with Global Scale-Consistent 3D Gaussian Pointmaps: Chong Cheng,

Sicheng Yu,

Zijian Wang,

Yifan Zhou,

Hao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Chong and Yu, Sicheng and Wang, Zijian and Zhou, Yifan and Wang, Hao}, title = {Outdoor Monocular SLAM with Global Scale-Consistent 3D Gaussian Pointmaps}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26035-26044} }
FlowStyler: Artistic Video Stylization via Transformation Fields Transports: Yuning Gong,

Jiaming Chen,

Xiaohua Ren,

Yuanjun Liao,

Yanci Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2025_ICCV, author = {Gong, Yuning and Chen, Jiaming and Ren, Xiaohua and Liao, Yuanjun and Zhang, Yanci}, title = {FlowStyler: Artistic Video Stylization via Transformation Fields Transports}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10229-10238} }
7DGS: Unified Spatial-Temporal-Angular Gaussian Splatting: Zhongpai Gao,

Benjamin Planche,

Meng Zheng,

Anwesa Choudhuri,

Terrence Chen,

Ziyan Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Zhongpai and Planche, Benjamin and Zheng, Meng and Choudhuri, Anwesa and Chen, Terrence and Wu, Ziyan}, title = {7DGS: Unified Spatial-Temporal-Angular Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26316-26325} }
Robust and Efficient 3D Gaussian Splatting for Urban Scene Reconstruction: Zhensheng Yuan,

Haozhi Huang,

Zhen Xiong,

Di Wang,

Guanghua Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Zhensheng and Huang, Haozhi and Xiong, Zhen and Wang, Di and Yang, Guanghua}, title = {Robust and Efficient 3D Gaussian Splatting for Urban Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26209-26219} }
DWIM: Towards Tool-aware Visual Reasoning via Discrepancy-aware Workflow Generation & Instruct-Masking Tuning: Fucai Ke,

Vijay Kumar B G,

Xingjian Leng,

Zhixi Cai,

Zaid Khan,

Weiqing Wang,

Pari Delir Haghighi,

Hamid Rezatofighi,

Manmohan Chandraker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2025_ICCV, author = {Ke, Fucai and G, Vijay Kumar B and Leng, Xingjian and Cai, Zhixi and Khan, Zaid and Wang, Weiqing and Haghighi, Pari Delir and Rezatofighi, Hamid and Chandraker, Manmohan}, title = {DWIM: Towards Tool-aware Visual Reasoning via Discrepancy-aware Workflow Generation \& Instruct-Masking Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3378-3389} }
Iris: Breaking GUI Complexity with Adaptive Focus and Self-Refining: Zhiqi Ge,

Juncheng Li,

Xinglei Pang,

Minghe Gao,

Kaihang Pan,

Wang Lin,

Hao Fei,

Wenqiao Zhang,

Siliang Tang,

Yueting Zhuang; [pdf] [arXiv]
[bibtex]
@InProceedings{Ge_2025_ICCV, author = {Ge, Zhiqi and Li, Juncheng and Pang, Xinglei and Gao, Minghe and Pan, Kaihang and Lin, Wang and Fei, Hao and Zhang, Wenqiao and Tang, Siliang and Zhuang, Yueting}, title = {Iris: Breaking GUI Complexity with Adaptive Focus and Self-Refining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24559-24568} }
Rethink Sparse Signals for Pose-guided Text-to-image Generation: Wenjie Xuan,

Jing Zhang,

Juhua Liu,

Bo Du,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xuan_2025_ICCV, author = {Xuan, Wenjie and Zhang, Jing and Liu, Juhua and Du, Bo and Tao, Dacheng}, title = {Rethink Sparse Signals for Pose-guided Text-to-image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15896-15906} }
RoboTrom-Nav: A Unified Framework for Embodied Navigation Integrating Perception, Planning, and Prediction: Yufeng Zhong,

Chengjian Feng,

Feng Yan,

Fanfan Liu,

Liming Zheng,

Lin Ma; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Yufeng and Feng, Chengjian and Yan, Feng and Liu, Fanfan and Zheng, Liming and Ma, Lin}, title = {RoboTrom-Nav: A Unified Framework for Embodied Navigation Integrating Perception, Planning, and Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6416-6425} }
PatchScaler: An Efficient Patch-Independent Diffusion Model for Image Super-Resolution: Yong Liu,

Hang Dong,

Jinshan Pan,

Qingji Dong,

Kai Chen,

Rongxiang Zhang,

Lean Fu,

Fei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yong and Dong, Hang and Pan, Jinshan and Dong, Qingji and Chen, Kai and Zhang, Rongxiang and Fu, Lean and Wang, Fei}, title = {PatchScaler: An Efficient Patch-Independent Diffusion Model for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11283-11293} }
RefEdit: A Benchmark and Method for Improving Instruction-based Image Editing Model on Referring Expressions: Bimsara Pathiraja,

Maitreya Patel,

Shivam Singh,

Yezhou Yang,

Chitta Baral; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pathiraja_2025_ICCV, author = {Pathiraja, Bimsara and Patel, Maitreya and Singh, Shivam and Yang, Yezhou and Baral, Chitta}, title = {RefEdit: A Benchmark and Method for Improving Instruction-based Image Editing Model on Referring Expressions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15646-15656} }
ETA: Energy-based Test-time Adaptation for Depth Completion: Younjoon Chung,

Hyoungseob Park,

Patrick Rim,

Xiaoran Zhang,

Jihe He,

Ziyao Zeng,

Safa Cicek,

Byung-Woo Hong,

James S. Duncan,

Alex Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chung_2025_ICCV, author = {Chung, Younjoon and Park, Hyoungseob and Rim, Patrick and Zhang, Xiaoran and He, Jihe and Zeng, Ziyao and Cicek, Safa and Hong, Byung-Woo and Duncan, James S. and Wong, Alex}, title = {ETA: Energy-based Test-time Adaptation for Depth Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6001-6012} }
Amodal3R: Amodal 3D Reconstruction from Occluded 2D Images: Tianhao Wu,

Chuanxia Zheng,

Frank Guan,

Andrea Vedaldi,

Tat-Jen Cham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Tianhao and Zheng, Chuanxia and Guan, Frank and Vedaldi, Andrea and Cham, Tat-Jen}, title = {Amodal3R: Amodal 3D Reconstruction from Occluded 2D Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9181-9193} }
A Unified Framework for Motion Reasoning and Generation in Human Interaction: Jeongeun Park,

Sungjoon Choi,

Sangdoo Yun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Jeongeun and Choi, Sungjoon and Yun, Sangdoo}, title = {A Unified Framework for Motion Reasoning and Generation in Human Interaction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10698-10707} }
Dynamic Group Detection using VLM-augmented Temporal Groupness Graph: Kaname Yokoyama,

Chihiro Nakatani,

Norimichi Ukita; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yokoyama_2025_ICCV, author = {Yokoyama, Kaname and Nakatani, Chihiro and Ukita, Norimichi}, title = {Dynamic Group Detection using VLM-augmented Temporal Groupness Graph}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10475-10484} }
SciVid: Cross-Domain Evaluation of Video Models in Scientific Applications: Yana Hasson,

Pauline Luc,

Liliane Momeni,

Maks Ovsjanikov,

Guillaume Le Moing,

Alina Kuznetsova,

Ira Ktena,

Jennifer J. Sun,

Skanda Koppula,

Dilara Gokay,

Joseph Heyward,

Etienne Pot,

Andrew Zisserman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hasson_2025_ICCV, author = {Hasson, Yana and Luc, Pauline and Momeni, Liliane and Ovsjanikov, Maks and Le Moing, Guillaume and Kuznetsova, Alina and Ktena, Ira and Sun, Jennifer J. and Koppula, Skanda and Gokay, Dilara and Heyward, Joseph and Pot, Etienne and Zisserman, Andrew}, title = {SciVid: Cross-Domain Evaluation of Video Models in Scientific Applications}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21800-21811} }
RoboPearls: Editable Video Simulation for Robot Manipulation: Tang Tao,

Likui Zhang,

Youpeng Wen,

Kaidong Zhang,

Jia-Wang Bian,

Xia Zhou,

Tianyi Yan,

Kun Zhan,

Peng Jia,

Hefeng Wu,

Liang Lin,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2025_ICCV, author = {Tao, Tang and Zhang, Likui and Wen, Youpeng and Zhang, Kaidong and Bian, Jia-Wang and Zhou, Xia and Yan, Tianyi and Zhan, Kun and Jia, Peng and Wu, Hefeng and Lin, Liang and Liang, Xiaodan}, title = {RoboPearls: Editable Video Simulation for Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10118-10129} }
FreeMorph: Tuning-Free Generalized Image Morphing with Diffusion Model: Yukang Cao,

Chenyang Si,

Jinghao Wang,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Yukang and Si, Chenyang and Wang, Jinghao and Liu, Ziwei}, title = {FreeMorph: Tuning-Free Generalized Image Morphing with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18111-18120} }
ConstStyle: Robust Domain Generalization with Unified Style Transformation: Nam Duong Tran,

Nam Nguyen Phuong,

Hieu H. Pham,

Phi Le Nguyen,

My T. Thai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2025_ICCV, author = {Tran, Nam Duong and Phuong, Nam Nguyen and Pham, Hieu H. and Le Nguyen, Phi and Thai, My T.}, title = {ConstStyle: Robust Domain Generalization with Unified Style Transformation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3174-3183} }
Vision-Language Models Can't See the Obvious: Ngoc Dung Huynh,

Phuc H Le-Khac,

Wamiq Reyaz Para,

Ankit Singh,

Sanath Narayan; [pdf]
[bibtex]
@InProceedings{Huynh_2025_ICCV, author = {Huynh, Ngoc Dung and Le-Khac, Phuc H and Para, Wamiq Reyaz and Singh, Ankit and Narayan, Sanath}, title = {Vision-Language Models Can't See the Obvious}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24159-24169} }
Perspective-Aware Teaching: Adapting Knowledge for Heterogeneous Distillation: Jhe-Hao Lin,

Yi Yao,

Chan-Feng Hsu,

Hong-Xia Xie,

Hong-Han Shuai,

Wen-Huang Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Jhe-Hao and Yao, Yi and Hsu, Chan-Feng and Xie, Hong-Xia and Shuai, Hong-Han and Cheng, Wen-Huang}, title = {Perspective-Aware Teaching: Adapting Knowledge for Heterogeneous Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4178-4187} }
GazeGaussian: High-Fidelity Gaze Redirection with 3D Gaussian Splatting: Xiaobao Wei,

Peng Chen,

Guangyu Li,

Ming Lu,

Hui Chen,

Feng Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Xiaobao and Chen, Peng and Li, Guangyu and Lu, Ming and Chen, Hui and Tian, Feng}, title = {GazeGaussian: High-Fidelity Gaze Redirection with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13293-13303} }
Balancing Task-invariant Interaction and Task-specific Adaptation for Unified Image Fusion: Xingyu Hu,

Junjun Jiang,

Chenyang Wang,

Kui Jiang,

Xianming Liu,

Jiayi Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Xingyu and Jiang, Junjun and Wang, Chenyang and Jiang, Kui and Liu, Xianming and Ma, Jiayi}, title = {Balancing Task-invariant Interaction and Task-specific Adaptation for Unified Image Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11262-11272} }
Stroke2Sketch: Harnessing Stroke Attributes for Training-Free Sketch Generation: Rui Yang,

Huining Li,

Yiyi Long,

Xiaojun Wu,

Shengfeng He; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Rui and Li, Huining and Long, Yiyi and Wu, Xiaojun and He, Shengfeng}, title = {Stroke2Sketch: Harnessing Stroke Attributes for Training-Free Sketch Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16545-16554} }
Mitigating Catastrophic Overfitting in Fast Adversarial Training via Label Information Elimination: Chao Pan,

Ke Tang,

Qing Li,

Xin Yao; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2025_ICCV, author = {Pan, Chao and Tang, Ke and Li, Qing and Yao, Xin}, title = {Mitigating Catastrophic Overfitting in Fast Adversarial Training via Label Information Elimination}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2991-3000} }
Pi-GPS: Enhancing Geometry Problem Solving by Unleashing the Power of Diagrammatic Information: Junbo Zhao,

Ting Zhang,

Jiayu Sun,

Mi Tian,

Hua Huang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Junbo and Zhang, Ting and Sun, Jiayu and Tian, Mi and Huang, Hua}, title = {Pi-GPS: Enhancing Geometry Problem Solving by Unleashing the Power of Diagrammatic Information}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1526-1536} }
CountSE: Soft Exemplar Open-set Object Counting: Shuai Liu,

Peng Zhang,

Shiwei Zhang,

Wei Ke; [pdf]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Shuai and Zhang, Peng and Zhang, Shiwei and Ke, Wei}, title = {CountSE: Soft Exemplar Open-set Object Counting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21536-21546} }
Beyond Isolated Words: Diffusion Brush for Handwritten Text-Line Generation: Gang Dai,

Yifan Zhang,

Yutao Qin,

Qiangya Guo,

Shuangping Huang,

Shuicheng Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2025_ICCV, author = {Dai, Gang and Zhang, Yifan and Qin, Yutao and Guo, Qiangya and Huang, Shuangping and Yan, Shuicheng}, title = {Beyond Isolated Words: Diffusion Brush for Handwritten Text-Line Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19054-19064} }
Wasserstein Style Distribution Analysis and Transform for Stylized Image Generation: Xi Yu,

Xiang Gu,

Zhihao Shi,

Jian Sun; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Xi and Gu, Xiang and Shi, Zhihao and Sun, Jian}, title = {Wasserstein Style Distribution Analysis and Transform for Stylized Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17496-17505} }
PRE-Mamba: A 4D State Space Model for Ultra-High-Frequent Event Camera Deraining: Ciyu Ruan,

Ruishan Guo,

Zihang Gong,

Jingao Xu,

Wenhan Yang,

Xinlei Chen; [pdf] [supp]
[bibtex]
@InProceedings{Ruan_2025_ICCV, author = {Ruan, Ciyu and Guo, Ruishan and Gong, Zihang and Xu, Jingao and Yang, Wenhan and Chen, Xinlei}, title = {PRE-Mamba: A 4D State Space Model for Ultra-High-Frequent Event Camera Deraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9169-9180} }
Future-Aware Interaction Network For Motion Forecasting: Shijie Li,

Chunyu Liu,

Xun Xu,

Si Yong Yeo,

Xulei Yang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Shijie and Liu, Chunyu and Xu, Xun and Yeo, Si Yong and Yang, Xulei}, title = {Future-Aware Interaction Network For Motion Forecasting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7505-7515} }
SuMa: A Subspace Mapping Approach for Robust and Effective Concept Erasure in Text-to-Image Diffusion Models: Kien Nguyen,

Anh Tran,

Cuong Pham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Kien and Tran, Anh and Pham, Cuong}, title = {SuMa: A Subspace Mapping Approach for Robust and Effective Concept Erasure in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19587-19596} }
Hierarchical-aware Orthogonal Disentanglement Framework for Fine-grained Skeleton-based Action Recognition: Haochen Chang,

Pengfei Ren,

Haoyang Zhang,

Liang Xie,

Hongbo Chen,

Erwei Yin; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2025_ICCV, author = {Chang, Haochen and Ren, Pengfei and Zhang, Haoyang and Xie, Liang and Chen, Hongbo and Yin, Erwei}, title = {Hierarchical-aware Orthogonal Disentanglement Framework for Fine-grained Skeleton-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11252-11261} }
Stochastic Interpolants for Revealing Stylistic Flows across the History of Art: Pingchuan Ma,

Ming Gui,

Johannes Schusterbauer,

Xiaopei Yang,

Olga Grebenkova,

Vincent Tao Hu,

Björn Ommer; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Pingchuan and Gui, Ming and Schusterbauer, Johannes and Yang, Xiaopei and Grebenkova, Olga and Hu, Vincent Tao and Ommer, Bj\"orn}, title = {Stochastic Interpolants for Revealing Stylistic Flows across the History of Art}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5867-5878} }
MIORe & VAR-MIORe: Benchmarks to Push the Boundaries of Restoration: George Ciubotariu,

Zhuyun Zhou,

Zongwei Wu,

Radu Timofte; [pdf] [supp]
[bibtex]
@InProceedings{Ciubotariu_2025_ICCV, author = {Ciubotariu, George and Zhou, Zhuyun and Wu, Zongwei and Timofte, Radu}, title = {MIORe \& VAR-MIORe: Benchmarks to Push the Boundaries of Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19784-19793} }
Depth AnyEvent: A Cross-Modal Distillation Paradigm for Event-Based Monocular Depth Estimation: Luca Bartolomei,

Enrico Mannocci,

Fabio Tosi,

Matteo Poggi,

Stefano Mattoccia; [pdf] [arXiv]
[bibtex]
@InProceedings{Bartolomei_2025_ICCV, author = {Bartolomei, Luca and Mannocci, Enrico and Tosi, Fabio and Poggi, Matteo and Mattoccia, Stefano}, title = {Depth AnyEvent: A Cross-Modal Distillation Paradigm for Event-Based Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19669-19678} }
Thermal Polarimetric Multi-view Stereo: Takahiro Kushida,

Kenichiro Tanaka; [pdf]
[bibtex]
@InProceedings{Kushida_2025_ICCV, author = {Kushida, Takahiro and Tanaka, Kenichiro}, title = {Thermal Polarimetric Multi-view Stereo}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27390-27399} }
MeshMamba: State Space Models for Articulated 3D Mesh Generation and Reconstruction: Yusuke Yoshiyasu,

Leyuan Sun,

Ryusuke Sagawa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoshiyasu_2025_ICCV, author = {Yoshiyasu, Yusuke and Sun, Leyuan and Sagawa, Ryusuke}, title = {MeshMamba: State Space Models for Articulated 3D Mesh Generation and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6563-6574} }
MH-LVC: Multi-Hypothesis Temporal Prediction for Learned Conditional Residual Video Coding: Huu-Tai Phung,

Zong-Lin Gao,

Yi-Chen Yao,

Kuan-Wei Ho,

Yi-Hsin Chen,

Yu-Hsiang Lin,

Alessandro Gnutti,

Wen-Hsiao Peng; [pdf] [supp]
[bibtex]
@InProceedings{Phung_2025_ICCV, author = {Phung, Huu-Tai and Gao, Zong-Lin and Yao, Yi-Chen and Ho, Kuan-Wei and Chen, Yi-Hsin and Lin, Yu-Hsiang and Gnutti, Alessandro and Peng, Wen-Hsiao}, title = {MH-LVC: Multi-Hypothesis Temporal Prediction for Learned Conditional Residual Video Coding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19649-19658} }
Image Intrinsic Scale Assessment: Bridging the Gap Between Quality and Resolution: Vlad Hosu,

Lorenzo Agnolucci,

Daisuke Iso,

Dietmar Saupe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hosu_2025_ICCV, author = {Hosu, Vlad and Agnolucci, Lorenzo and Iso, Daisuke and Saupe, Dietmar}, title = {Image Intrinsic Scale Assessment: Bridging the Gap Between Quality and Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12863-12872} }
Temporal Rate Reduction Clustering for Human Motion Segmentation: Xianghan Meng,

Zhengyu Tong,

Zhiyuan Huang,

Chun-Guang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2025_ICCV, author = {Meng, Xianghan and Tong, Zhengyu and Huang, Zhiyuan and Li, Chun-Guang}, title = {Temporal Rate Reduction Clustering for Human Motion Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14644-14654} }
SparseVILA: Decoupling Visual Sparsity for Efficient VLM Inference: Samir Khaki,

Junxian Guo,

Jiaming Tang,

Shang Yang,

Yukang Chen,

Konstantinos N. Plataniotis,

Yao Lu,

Song Han,

Zhijian Liu; [pdf] [supp]
[bibtex]
@InProceedings{Khaki_2025_ICCV, author = {Khaki, Samir and Guo, Junxian and Tang, Jiaming and Yang, Shang and Chen, Yukang and Plataniotis, Konstantinos N. and Lu, Yao and Han, Song and Liu, Zhijian}, title = {SparseVILA: Decoupling Visual Sparsity for Efficient VLM Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23784-23794} }
Trust but Verify: Programmatic VLM Evaluation in the Wild: Viraj Prabhu,

Senthil Purushwalkam,

An Yan,

Caiming Xiong,

Ran Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Prabhu_2025_ICCV, author = {Prabhu, Viraj and Purushwalkam, Senthil and Yan, An and Xiong, Caiming and Xu, Ran}, title = {Trust but Verify: Programmatic VLM Evaluation in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3258-3267} }
Relative Illumination Fields: Learning Medium and Light Independent Underwater Scenes: Mengkun She,

Felix Seegräber,

David Nakath,

Patricia Schöntag,

Kevin Köser; [pdf] [supp]
[bibtex]
@InProceedings{She_2025_ICCV, author = {She, Mengkun and Seegr\"aber, Felix and Nakath, David and Sch\"ontag, Patricia and K\"oser, Kevin}, title = {Relative Illumination Fields: Learning Medium and Light Independent Underwater Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29110-29119} }
Embodied Representation Alignment with Mirror Neurons: Wentao Zhu,

Zhining Zhang,

Yuwei Ren,

Yin Huang,

Hao Xu,

Yizhou Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Wentao and Zhang, Zhining and Ren, Yuwei and Huang, Yin and Xu, Hao and Wang, Yizhou}, title = {Embodied Representation Alignment with Mirror Neurons}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11948-11957} }
Wave-MambaAD: Wavelet-driven State Space Model for Multi-class Unsupervised Anomaly Detection: Qiao Zhang,

Mingwen Shao,

Xinyuan Chen,

Xiang Lv,

Kai Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Qiao and Shao, Mingwen and Chen, Xinyuan and Lv, Xiang and Xu, Kai}, title = {Wave-MambaAD: Wavelet-driven State Space Model for Multi-class Unsupervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20868-20877} }
MonoSOWA: Scalable Monocular 3D Object Detector Without Human Annotations: Jan Skvrna,

Lukas Neumann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Skvrna_2025_ICCV, author = {Skvrna, Jan and Neumann, Lukas}, title = {MonoSOWA: Scalable Monocular 3D Object Detector Without Human Annotations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7613-7623} }
Measuring the Impact of Rotation Equivariance on Aerial Object Detection: Xiuyu Wu,

Xinhao Wang,

Xiubin Zhu,

Lan Yang,

Jiyuan Liu,

Xingchen Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Xiuyu and Wang, Xinhao and Zhu, Xiubin and Yang, Lan and Liu, Jiyuan and Hu, Xingchen}, title = {Measuring the Impact of Rotation Equivariance on Aerial Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7329-7339} }
Inter2Former: Dynamic Hybrid Attention for Efficient High-Precision Interactive Segmentation: You Huang,

Lichao Chen,

Jiayi Ji,

Liujuan Cao,

Shengchuan Zhang,

Rongrong Ji; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, You and Chen, Lichao and Ji, Jiayi and Cao, Liujuan and Zhang, Shengchuan and Ji, Rongrong}, title = {Inter2Former: Dynamic Hybrid Attention for Efficient High-Precision Interactive Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19816-19826} }
LEGION: Learning to Ground and Explain for Synthetic Image Detection: Hengrui Kang,

Siwei Wen,

Zichen Wen,

Junyan Ye,

Weijia Li,

Peilin Feng,

Baichuan Zhou,

Bin Wang,

Dahua Lin,

Linfeng Zhang,

Conghui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_ICCV, author = {Kang, Hengrui and Wen, Siwei and Wen, Zichen and Ye, Junyan and Li, Weijia and Feng, Peilin and Zhou, Baichuan and Wang, Bin and Lin, Dahua and Zhang, Linfeng and He, Conghui}, title = {LEGION: Learning to Ground and Explain for Synthetic Image Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18937-18947} }
KDA: Knowledge Diffusion Alignment with Enhanced Context for Video Temporal Grounding: Ran Ran,

Jiwei Wei,

Shiyuan He,

Zeyu Ma,

Chaoning Zhang,

Ning Xie,

Yang Yang; [pdf]
[bibtex]
@InProceedings{Ran_2025_ICCV, author = {Ran, Ran and Wei, Jiwei and He, Shiyuan and Ma, Zeyu and Zhang, Chaoning and Xie, Ning and Yang, Yang}, title = {KDA: Knowledge Diffusion Alignment with Enhanced Context for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23311-23320} }
NormalCrafter: Learning Temporally Consistent Normals from Video Diffusion Priors: Yanrui Bin,

Wenbo Hu,

Haoyuan Wang,

Xinya Chen,

Bing Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bin_2025_ICCV, author = {Bin, Yanrui and Hu, Wenbo and Wang, Haoyuan and Chen, Xinya and Wang, Bing}, title = {NormalCrafter: Learning Temporally Consistent Normals from Video Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8330-8339} }
TrackAny3D: Transferring Pretrained 3D Models for Category-unified 3D Point Cloud Tracking: Mengmeng Wang,

Haonan Wang,

Yulong Li,

Xiangjie Kong,

Jiaxin Du,

Guojiang Shen,

Feng Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Mengmeng and Wang, Haonan and Li, Yulong and Kong, Xiangjie and Du, Jiaxin and Shen, Guojiang and Xia, Feng}, title = {TrackAny3D: Transferring Pretrained 3D Models for Category-unified 3D Point Cloud Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28249-28259} }
Color Matching Using Hypernetwork-Based Kolmogorov-Arnold Networks: Artem Nikonorov,

Georgy Perevozchikov,

Andrei Korepanov,

Nancy Mehta,

Mahmoud Afifi,

Egor Ershov,

Radu Timofte; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nikonorov_2025_ICCV, author = {Nikonorov, Artem and Perevozchikov, Georgy and Korepanov, Andrei and Mehta, Nancy and Afifi, Mahmoud and Ershov, Egor and Timofte, Radu}, title = {Color Matching Using Hypernetwork-Based Kolmogorov-Arnold Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7099-7109} }
S2M2: Scalable Stereo Matching Model for Reliable Depth Estimation: Junhong Min,

Youngpil Jeon,

Jimin Kim,

Minyong Choi; [pdf] [supp]
[bibtex]
@InProceedings{Min_2025_ICCV, author = {Min, Junhong and Jeon, Youngpil and Kim, Jimin and Choi, Minyong}, title = {S2M2: Scalable Stereo Matching Model for Reliable Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26729-26739} }
NETracer: A Topology-Aware Iterative Tracing Approach for Tubular Structure Extraction: Chao Liu,

Yangbo Jiang,

Nenggan Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Chao and Jiang, Yangbo and Zheng, Nenggan}, title = {NETracer: A Topology-Aware Iterative Tracing Approach for Tubular Structure Extraction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20593-20602} }
MGSfM: Multi-Camera Geometry Driven Global Structure-from-Motion: Peilin Tao,

Hainan Cui,

Diantao Tu,

Shuhan Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2025_ICCV, author = {Tao, Peilin and Cui, Hainan and Tu, Diantao and Shen, Shuhan}, title = {MGSfM: Multi-Camera Geometry Driven Global Structure-from-Motion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5232-5241} }
FaceLift: Learning Generalizable Single Image 3D Face Reconstruction from Synthetic Heads: Weijie Lyu,

Yi Zhou,

Ming-Hsuan Yang,

Zhixin Shu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2025_ICCV, author = {Lyu, Weijie and Zhou, Yi and Yang, Ming-Hsuan and Shu, Zhixin}, title = {FaceLift: Learning Generalizable Single Image 3D Face Reconstruction from Synthetic Heads}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12691-12701} }
Towards Open-World Generation of Stereo Images and Unsupervised Matching: Feng Qiao,

Zhexiao Xiong,

Eric Xing,

Nathan Jacobs; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiao_2025_ICCV, author = {Qiao, Feng and Xiong, Zhexiao and Xing, Eric and Jacobs, Nathan}, title = {Towards Open-World Generation of Stereo Images and Unsupervised Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26579-26589} }
PolarAnything: Diffusion-based Polarimetric Image Synthesis: Kailong Zhang,

Youwei Lyu,

Heng Guo,

Si Li,

Zhanyu Ma,

Boxin Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Kailong and Lyu, Youwei and Guo, Heng and Li, Si and Ma, Zhanyu and Shi, Boxin}, title = {PolarAnything: Diffusion-based Polarimetric Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26466-26476} }
What's Making That Sound Right Now? Video-centric Audio-Visual Localization: Hahyeon Choi,

Junhoo Lee,

Nojun Kwak; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2025_ICCV, author = {Choi, Hahyeon and Lee, Junhoo and Kwak, Nojun}, title = {What's Making That Sound Right Now? Video-centric Audio-Visual Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20095-20104} }
WonderPlay: Dynamic 3D Scene Generation from a Single Image and Actions: Zizhang Li,

Hong-Xing Yu,

Wei Liu,

Yin Yang,

Charles Herrmann,

Gordon Wetzstein,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zizhang and Yu, Hong-Xing and Liu, Wei and Yang, Yin and Herrmann, Charles and Wetzstein, Gordon and Wu, Jiajun}, title = {WonderPlay: Dynamic 3D Scene Generation from a Single Image and Actions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9080-9090} }
SRefiner: Soft-Braid Attention for Multi-Agent Trajectory Refinement: Liwen Xiao,

Zhiyu Pan,

Zhicheng Wang,

Zhiguo Cao,

Wei Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_ICCV, author = {Xiao, Liwen and Pan, Zhiyu and Wang, Zhicheng and Cao, Zhiguo and Li, Wei}, title = {SRefiner: Soft-Braid Attention for Multi-Agent Trajectory Refinement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {960-969} }
Scheduling Weight Transitions for Quantization-Aware Training: Junghyup Lee,

Jeimin Jeon,

Dohyung Kim,

Bumsub Ham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Junghyup and Jeon, Jeimin and Kim, Dohyung and Ham, Bumsub}, title = {Scheduling Weight Transitions for Quantization-Aware Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23466-23475} }
Cross-Granularity Online Optimization with Masked Compensated Information for Learned Image Compression: Haowei Kuang,

Wenhan Yang,

Zongming Guo,

Jiaying Liu; [pdf] [supp]
[bibtex]
@InProceedings{Kuang_2025_ICCV, author = {Kuang, Haowei and Yang, Wenhan and Guo, Zongming and Liu, Jiaying}, title = {Cross-Granularity Online Optimization with Masked Compensated Information for Learned Image Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16514-16523} }
Efficient Spiking Point Mamba for Point Cloud Analysis: Peixi Wu,

Bosong Chai,

Menghua Zheng,

Wei Li,

Zhangchi Hu,

Jie Chen,

Zheyu Zhang,

Hebei Li,

Xiaoyan Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Peixi and Chai, Bosong and Zheng, Menghua and Li, Wei and Hu, Zhangchi and Chen, Jie and Zhang, Zheyu and Li, Hebei and Sun, Xiaoyan}, title = {Efficient Spiking Point Mamba for Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26393-26403} }
VideoVAE+: Large Motion Video Autoencoding with Cross-modal Video VAE: Yazhou Xing,

Yang Fei,

Yingqing He,

Jingye Chen,

Jiaxin Xie,

Xiaowei Chi,

Qifeng Chen; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2025_ICCV, author = {Xing, Yazhou and Fei, Yang and He, Yingqing and Chen, Jingye and Xie, Jiaxin and Chi, Xiaowei and Chen, Qifeng}, title = {VideoVAE+: Large Motion Video Autoencoding with Cross-modal Video VAE}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17951-17960} }
Learning 3D Object Spatial Relationships from Pre-trained 2D Diffusion Models: Sangwon Baik,

Hyeonwoo Kim,

Hanbyul Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Baik_2025_ICCV, author = {Baik, Sangwon and Kim, Hyeonwoo and Joo, Hanbyul}, title = {Learning 3D Object Spatial Relationships from Pre-trained 2D Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8418-8428} }
Event-guided Unified Framework for Low-light Video Enhancement, Frame Interpolation, and Deblurring: Taewoo Kim,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Taewoo and Yoon, Kuk-Jin}, title = {Event-guided Unified Framework for Low-light Video Enhancement, Frame Interpolation, and Deblurring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8524-8534} }
FE-CLIP: Frequency Enhanced CLIP Model for Zero-Shot Anomaly Detection and Segmentation: Tao Gong,

Qi Chu,

Bin Liu,

Wei Zhou,

Nenghai Yu; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2025_ICCV, author = {Gong, Tao and Chu, Qi and Liu, Bin and Zhou, Wei and Yu, Nenghai}, title = {FE-CLIP: Frequency Enhanced CLIP Model for Zero-Shot Anomaly Detection and Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21220-21230} }
MotionDiff: Training-free Zero-shot Interactive Motion Editing via Flow-assisted Multi-view Diffusion: Yikun Ma,

Yiqing Li,

Jiawei Wu,

Xing Luo,

Zhi Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Yikun and Li, Yiqing and Wu, Jiawei and Luo, Xing and Jin, Zhi}, title = {MotionDiff: Training-free Zero-shot Interactive Motion Editing via Flow-assisted Multi-view Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14475-14485} }
Alleviating Textual Reliance in Medical Language-guided Segmentation via Prototype-driven Semantic Approximation: Shuchang Ye,

Usman Naseem,

Mingyuan Meng,

Jinman Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_ICCV, author = {Ye, Shuchang and Naseem, Usman and Meng, Mingyuan and Kim, Jinman}, title = {Alleviating Textual Reliance in Medical Language-guided Segmentation via Prototype-driven Semantic Approximation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22316-22326} }
CoTMR: Chain-of-Thought Multi-Scale Reasoning for Training-Free Zero-Shot Composed Image Retrieval: Zelong Sun,

Dong Jing,

Zhiwu Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Zelong and Jing, Dong and Lu, Zhiwu}, title = {CoTMR: Chain-of-Thought Multi-Scale Reasoning for Training-Free Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22675-22684} }
PASTA: Part-Aware Sketch-to-3D Shape Generation with Text-Aligned Prior: Seunggwan Lee,

Hwanhee Jung,

Byoungsoo Koh,

Qixing Huang,

Sang Ho Yoon,

Sangpil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Seunggwan and Jung, Hwanhee and Koh, Byoungsoo and Huang, Qixing and Yoon, Sang Ho and Kim, Sangpil}, title = {PASTA: Part-Aware Sketch-to-3D Shape Generation with Text-Aligned Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18585-18595} }
BASIC: Boosting Visual Alignment with Intrinsic Refined Embeddings in Multimodal Large Language Models: Jianting Tang,

Yubo Wang,

Haoyu Cao,

Linli Xu; [pdf] [arXiv]
[bibtex]
@InProceedings{Tang_2025_ICCV, author = {Tang, Jianting and Wang, Yubo and Cao, Haoyu and Xu, Linli}, title = {BASIC: Boosting Visual Alignment with Intrinsic Refined Embeddings in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20582-20592} }
Long-term Traffic Simulation with Interleaved Autoregressive Motion and Scenario Generation: Xiuyu Yang,

Shuhan Tan,

Philipp Krähenbühl; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Xiuyu and Tan, Shuhan and Kr\"ahenb\"uhl, Philipp}, title = {Long-term Traffic Simulation with Interleaved Autoregressive Motion and Scenario Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25305-25314} }
VMBench: A Benchmark for Perception-Aligned Video Motion Generation: Xinran Ling,

Chen Zhu,

Meiqi Wu,

Hangyu Li,

Xiaokun Feng,

Cundian Yang,

Aiming Hao,

Jiashu Zhu,

Jiahong Wu,

Xiangxiang Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ling_2025_ICCV, author = {Ling, Xinran and Zhu, Chen and Wu, Meiqi and Li, Hangyu and Feng, Xiaokun and Yang, Cundian and Hao, Aiming and Zhu, Jiashu and Wu, Jiahong and Chu, Xiangxiang}, title = {VMBench: A Benchmark for Perception-Aligned Video Motion Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13087-13098} }
Physics Context Builders: A Modular Framework for Physical Reasoning in Vision-Language Models: Vahid Balazadeh,

Mohammadmehdi Ataei,

Hyunmin Cheong,

Amir Hosein Khasahmadi,

Rahul G. Krishnan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Balazadeh_2025_ICCV, author = {Balazadeh, Vahid and Ataei, Mohammadmehdi and Cheong, Hyunmin and Khasahmadi, Amir Hosein and Krishnan, Rahul G.}, title = {Physics Context Builders: A Modular Framework for Physical Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7318-7328} }
RhythmGuassian: Repurposing Generalizable Gaussian Model For Remote Physiological Measurement: Hao Lu,

Yuting Zhang,

Jiaqi Tang,

Bowen Fu,

Wenhang Ge,

Wei Wei,

Kaishun Wu,

Yingcong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Hao and Zhang, Yuting and Tang, Jiaqi and Fu, Bowen and Ge, Wenhang and Wei, Wei and Wu, Kaishun and Chen, Yingcong}, title = {RhythmGuassian: Repurposing Generalizable Gaussian Model For Remote Physiological Measurement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20780-20790} }
Bridging the Sky and Ground: Towards View-Invariant Feature Learning for Aerial-Ground Person Re-Identification: Wajahat Khalid,

Bin Liu,

Xulin Li,

Muhammad Waqas,

Muhammad Sher Afgan; [pdf] [supp]
[bibtex]
@InProceedings{Khalid_2025_ICCV, author = {Khalid, Wajahat and Liu, Bin and Li, Xulin and Waqas, Muhammad and Afgan, Muhammad Sher}, title = {Bridging the Sky and Ground: Towards View-Invariant Feature Learning for Aerial-Ground Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9749-9758} }
Prototype Guided Backdoor Defense via Activation Space Manipulation: Venkat Adithya Amula,

Sunayana Samavedam,

Saurabh Saini,

Avani Gupta,

P J Narayanan; [pdf] [supp]
[bibtex]
@InProceedings{Amula_2025_ICCV, author = {Amula, Venkat Adithya and Samavedam, Sunayana and Saini, Saurabh and Gupta, Avani and Narayanan, P J}, title = {Prototype Guided Backdoor Defense via Activation Space Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2195-2205} }
EvolvingGrasp: Evolutionary Grasp Generation via Efficient Preference Alignment: Yufei Zhu,

Yiming Zhong,

Zemin Yang,

Peishan Cong,

Jingyi Yu,

Xinge Zhu,

Yuexin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yufei and Zhong, Yiming and Yang, Zemin and Cong, Peishan and Yu, Jingyi and Zhu, Xinge and Ma, Yuexin}, title = {EvolvingGrasp: Evolutionary Grasp Generation via Efficient Preference Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11665-11674} }
HyTIP: Hybrid Temporal Information Propagation for Masked Conditional Residual Video Coding: Yi-Hsin Chen,

Yi-Chen Yao,

Kuan-Wei Ho,

Chun-Hung Wu,

Huu-Tai Phung,

Martin Benjak,

Jörn Ostermann,

Wen-Hsiao Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yi-Hsin and Yao, Yi-Chen and Ho, Kuan-Wei and Wu, Chun-Hung and Phung, Huu-Tai and Benjak, Martin and Ostermann, J\"orn and Peng, Wen-Hsiao}, title = {HyTIP: Hybrid Temporal Information Propagation for Masked Conditional Residual Video Coding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17889-17898} }
Achieving More with Less: Additive Prompt Tuning for Rehearsal-Free Class-Incremental Learning: Haoran Chen,

Ping Wang,

Zihan Zhou,

Xu Zhang,

Zuxuan Wu,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Haoran and Wang, Ping and Zhou, Zihan and Zhang, Xu and Wu, Zuxuan and Jiang, Yu-Gang}, title = {Achieving More with Less: Additive Prompt Tuning for Rehearsal-Free Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {340-349} }
PromptDresser: Improving the Quality and Controllability of Virtual Try-On via Generative Textual Prompt and Prompt-aware Mask: Jeongho Kim,

Hoiyeong Jin,

Sunghyun Park,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jeongho and Jin, Hoiyeong and Park, Sunghyun and Choo, Jaegul}, title = {PromptDresser: Improving the Quality and Controllability of Virtual Try-On via Generative Textual Prompt and Prompt-aware Mask}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16026-16036} }
VLRMBench: A Comprehensive and Challenging Benchmark for Vision-Language Reward Models: Jiacheng Ruan,

Wenzhen Yuan,

Xian Gao,

Ye Guo,

Daoxin Zhang,

Zhe Xu,

Yao Hu,

Ting Liu,

Yuzhuo Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ruan_2025_ICCV, author = {Ruan, Jiacheng and Yuan, Wenzhen and Gao, Xian and Guo, Ye and Zhang, Daoxin and Xu, Zhe and Hu, Yao and Liu, Ting and Fu, Yuzhuo}, title = {VLRMBench: A Comprehensive and Challenging Benchmark for Vision-Language Reward Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3163-3173} }
SITE: towards Spatial Intelligence Thorough Evaluation: Wenqi Wang,

Reuben Tan,

Pengyue Zhu,

Jianwei Yang,

Zhengyuan Yang,

Lijuan Wang,

Andrey Kolobov,

Jianfeng Gao,

Boqing Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Wenqi and Tan, Reuben and Zhu, Pengyue and Yang, Jianwei and Yang, Zhengyuan and Wang, Lijuan and Kolobov, Andrey and Gao, Jianfeng and Gong, Boqing}, title = {SITE: towards Spatial Intelligence Thorough Evaluation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9058-9069} }
DASH: Detection and Assessment of Systematic Hallucinations of VLMs: Maximilian Augustin,

Yannic Neuhaus,

Matthias Hein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Augustin_2025_ICCV, author = {Augustin, Maximilian and Neuhaus, Yannic and Hein, Matthias}, title = {DASH: Detection and Assessment of Systematic Hallucinations of VLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22748-22759} }
FW-Merging: Scaling Model Merging with Frank-Wolfe Optimization: Hao Mark Chen,

Shell Xu Hu,

Wayne Luk,

Timothy Hospedales,

Hongxiang Fan; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Hao Mark and Hu, Shell Xu and Luk, Wayne and Hospedales, Timothy and Fan, Hongxiang}, title = {FW-Merging: Scaling Model Merging with Frank-Wolfe Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3390-3400} }
On the Provable Importance of Gradients for Autonomous Language-Assisted Image Clustering: Bo Peng,

Jie Lu,

Guangquan Zhang,

Zhen Fang; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2025_ICCV, author = {Peng, Bo and Lu, Jie and Zhang, Guangquan and Fang, Zhen}, title = {On the Provable Importance of Gradients for Autonomous Language-Assisted Image Clustering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19805-19815} }
ZipVL: Accelerating Vision-Language Models through Dynamic Token Sparsity: Yefei He,

Feng Chen,

Jing Liu,

Wenqi Shao,

Hong Zhou,

Kaipeng Zhang,

Bohan Zhuang; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Yefei and Chen, Feng and Liu, Jing and Shao, Wenqi and Zhou, Hong and Zhang, Kaipeng and Zhuang, Bohan}, title = {ZipVL: Accelerating Vision-Language Models through Dynamic Token Sparsity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20477-20486} }
RayletDF: Raylet Distance Fields for Generalizable 3D Surface Reconstruction from Point Clouds or Gaussians: Shenxing Wei,

Jinxi Li,

Yafei Yang,

Siyuan Zhou,

Bo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Shenxing and Li, Jinxi and Yang, Yafei and Zhou, Siyuan and Yang, Bo}, title = {RayletDF: Raylet Distance Fields for Generalizable 3D Surface Reconstruction from Point Clouds or Gaussians}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25616-25626} }
VOVTrack: Exploring the Potentiality in Raw Videos for Open-Vocabulary Multi-Object Tracking: Zekun Qian,

Ruize Han,

Junhui Hou,

Linqi Song,

Wei Feng; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2025_ICCV, author = {Qian, Zekun and Han, Ruize and Hou, Junhui and Song, Linqi and Feng, Wei}, title = {VOVTrack: Exploring the Potentiality in Raw Videos for Open-Vocabulary Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7472-7482} }
Planar Affine Rectification from Local Change of Scale and Orientation: Yuval Nissan,

Marc Pollefeys,

Daniel Barath; [pdf] [supp]
[bibtex]
@InProceedings{Nissan_2025_ICCV, author = {Nissan, Yuval and Pollefeys, Marc and Barath, Daniel}, title = {Planar Affine Rectification from Local Change of Scale and Orientation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27147-27155} }
Grouped Speculative Decoding for Autoregressive Image Generation: Junhyuk So,

Juncheol Shin,

Hyunho Kook,

Eunhyeok Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{So_2025_ICCV, author = {So, Junhyuk and Shin, Juncheol and Kook, Hyunho and Park, Eunhyeok}, title = {Grouped Speculative Decoding for Autoregressive Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15375-15384} }
You Share Beliefs, I Adapt: Progressive Heterogeneous Collaborative Perception: Hao Si,

Ehsan Javanmardi,

Manabu Tsukada; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Si_2025_ICCV, author = {Si, Hao and Javanmardi, Ehsan and Tsukada, Manabu}, title = {You Share Beliefs, I Adapt: Progressive Heterogeneous Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27521-27530} }
Music-Aligned Holistic 3D Dance Generation via Hierarchical Motion Modeling: Xiaojie Li,

Ronghui Li,

Shukai Fang,

Shuzhao Xie,

Xiaoyang Guo,

Jiaqing Zhou,

Junkun Peng,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xiaojie and Li, Ronghui and Fang, Shukai and Xie, Shuzhao and Guo, Xiaoyang and Zhou, Jiaqing and Peng, Junkun and Wang, Zhi}, title = {Music-Aligned Holistic 3D Dance Generation via Hierarchical Motion Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14420-14430} }
Advancing Visual Large Language Model for Multi-granular Versatile Perception: Wentao Xiang,

Haoxian Tan,

Yujie Zhong,

Cong Wei,

Dengjie Li,

Yujiu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2025_ICCV, author = {Xiang, Wentao and Tan, Haoxian and Zhong, Yujie and Wei, Cong and Li, Dengjie and Yang, Yujiu}, title = {Advancing Visual Large Language Model for Multi-granular Versatile Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22153-22164} }
Neural Solver of Dichromatic Reflection Model for Specular Highlight Removal: Gang Fu; [pdf]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Gang}, title = {Neural Solver of Dichromatic Reflection Model for Specular Highlight Removal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7241-7250} }
MatchDiffusion: Training-free Generation of Match-Cuts: Alejandro Pardo,

Fabio Pizzati,

Tong Zhang,

Alexander Pondaven,

Philip Torr,

Juan Camilo Perez,

Bernard Ghanem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pardo_2025_ICCV, author = {Pardo, Alejandro and Pizzati, Fabio and Zhang, Tong and Pondaven, Alexander and Torr, Philip and Perez, Juan Camilo and Ghanem, Bernard}, title = {MatchDiffusion: Training-free Generation of Match-Cuts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14973-14982} }
STD-GS: Exploring Frame-Event Interaction for SpatioTemporal-Disentangled Gaussian Splatting to Reconstruct High-Dynamic Scene: Hanyu Zhou,

Haonan Wang,

Haoyue Liu,

Yuxing Duan,

Luxin Yan,

Gim Hee Lee; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Hanyu and Wang, Haonan and Liu, Haoyue and Duan, Yuxing and Yan, Luxin and Lee, Gim Hee}, title = {STD-GS: Exploring Frame-Event Interaction for SpatioTemporal-Disentangled Gaussian Splatting to Reconstruct High-Dynamic Scene}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24801-24810} }
C2MIL: Synchronizing Semantic and Topological Causalities in Multiple Instance Learning for Robust and Interpretable Survival Analysis: Min Cen,

Zhenfeng Zhuang,

Yuzhe Zhang,

Min Zeng,

Baptiste Magnier,

Lequan Yu,

Hong Zhang,

Liansheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Cen_2025_ICCV, author = {Cen, Min and Zhuang, Zhenfeng and Zhang, Yuzhe and Zeng, Min and Magnier, Baptiste and Yu, Lequan and Zhang, Hong and Wang, Liansheng}, title = {C2MIL: Synchronizing Semantic and Topological Causalities in Multiple Instance Learning for Robust and Interpretable Survival Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24392-24401} }
Weakly-Supervised Learning of Dense Functional Correspondences: Stefan Stojanov,

Linan Zhao,

Yunzhi Zhang,

Daniel L. K. Yamins,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stojanov_2025_ICCV, author = {Stojanov, Stefan and Zhao, Linan and Zhang, Yunzhi and Yamins, Daniel L. K. and Wu, Jiajun}, title = {Weakly-Supervised Learning of Dense Functional Correspondences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6981-6993} }
Proxy-Bridged Game Transformer for Interactive Extreme Motion Prediction: Yanwen Fang,

Wenqi Jia,

Xu Cao,

Peng-Tao Jiang,

Guodong Li,

Jintai Chen; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Yanwen and Jia, Wenqi and Cao, Xu and Jiang, Peng-Tao and Li, Guodong and Chen, Jintai}, title = {Proxy-Bridged Game Transformer for Interactive Extreme Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13912-13921} }
ReFlex: Text-Guided Editing of Real Images in Rectified Flow via Mid-Step Feature Extraction and Attention Adaptation: Jimyeong Kim,

Jungwon Park,

Yeji Song,

Nojun Kwak,

Wonjong Rhee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jimyeong and Park, Jungwon and Song, Yeji and Kwak, Nojun and Rhee, Wonjong}, title = {ReFlex: Text-Guided Editing of Real Images in Rectified Flow via Mid-Step Feature Extraction and Attention Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15939-15948} }
Co-Painter: Fine-Grained Controllable Image Stylization via Implicit Decoupling and Adaptive Injection: Bowen Fu,

Wei Wei,

Jiaqi Tang,

Jiangtao Nie,

Yanyu Ye,

Xiaogang Xu,

Ying-Cong Chen,

Lei Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Bowen and Wei, Wei and Tang, Jiaqi and Nie, Jiangtao and Ye, Yanyu and Xu, Xiaogang and Chen, Ying-Cong and Zhang, Lei}, title = {Co-Painter: Fine-Grained Controllable Image Stylization via Implicit Decoupling and Adaptive Injection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16830-16839} }
Object-level Correlation for Few-Shot Segmentation: Chunlin Wen,

Yu Zhang,

Jie Fan,

Hongyuan Zhu,

Xiu-Shen Wei,

Yijun Wang,

Zhiqiang Kou,

Shuzhou Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2025_ICCV, author = {Wen, Chunlin and Zhang, Yu and Fan, Jie and Zhu, Hongyuan and Wei, Xiu-Shen and Wang, Yijun and Kou, Zhiqiang and Sun, Shuzhou}, title = {Object-level Correlation for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23689-23699} }
Hipandas: Hyperspectral Image Joint Denoising and Super-Resolution by Image Fusion with the Panchromatic Image: Shuang Xu,

Zixiang Zhao,

Haowen Bai,

Chang Yu,

Jiangjun Peng,

Xiangyong Cao,

Deyu Meng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Shuang and Zhao, Zixiang and Bai, Haowen and Yu, Chang and Peng, Jiangjun and Cao, Xiangyong and Meng, Deyu}, title = {Hipandas: Hyperspectral Image Joint Denoising and Super-Resolution by Image Fusion with the Panchromatic Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12002-12011} }
SAMO: A Lightweight Sharpness-Aware Approach for Multi-Task Optimization with Joint Global-Local Perturbation: Hao Ban,

Gokul Ram Subramani,

Kaiyi Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ban_2025_ICCV, author = {Ban, Hao and Subramani, Gokul Ram and Ji, Kaiyi}, title = {SAMO: A Lightweight Sharpness-Aware Approach for Multi-Task Optimization with Joint Global-Local Perturbation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {785-795} }
RoboTron-Mani: All-in-One Multimodal Large Model for Robotic Manipulation: Feng Yan,

Fanfan Liu,

Yiyang Huang,

Zechao Guan,

Liming Zheng,

Yufeng Zhong,

Chengjian Feng,

Lin Ma; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Feng and Liu, Fanfan and Huang, Yiyang and Guan, Zechao and Zheng, Liming and Zhong, Yufeng and Feng, Chengjian and Ma, Lin}, title = {RoboTron-Mani: All-in-One Multimodal Large Model for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13707-13718} }
FastJSMA: Accelerating Jacobian-based Saliency Map Attacks through Gradient Decoupling: Zhenghao Gao,

Shengjie Xu,

Zijing Li,

Meixi Chen,

Chaojian Yu,

Yuanjie Shao,

Changxin Gao; [pdf]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Zhenghao and Xu, Shengjie and Li, Zijing and Chen, Meixi and Yu, Chaojian and Shao, Yuanjie and Gao, Changxin}, title = {FastJSMA: Accelerating Jacobian-based Saliency Map Attacks through Gradient Decoupling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1506-1515} }
TAR3D: Creating High-Quality 3D Assets via Next-Part Prediction: Xuying Zhang,

Yutong Liu,

Yangguang Li,

Renrui Zhang,

Yufei Liu,

Kai Wang,

Wanli Ouyang,

Zhiwei Xiong,

Peng Gao,

Qibin Hou,

Ming-Ming Cheng; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xuying and Liu, Yutong and Li, Yangguang and Zhang, Renrui and Liu, Yufei and Wang, Kai and Ouyang, Wanli and Xiong, Zhiwei and Gao, Peng and Hou, Qibin and Cheng, Ming-Ming}, title = {TAR3D: Creating High-Quality 3D Assets via Next-Part Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5134-5145} }
CVFusion: Cross-View Fusion of 4D Radar and Camera for 3D Object Detection: Hanzhi Zhong,

Zhiyu Xiang,

Ruoyu Xu,

Jingyun Fu,

Peng Xu,

Shaohong Wang,

Zhihao Yang,

Tianyu Pu,

Eryun Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Hanzhi and Xiang, Zhiyu and Xu, Ruoyu and Fu, Jingyun and Xu, Peng and Wang, Shaohong and Yang, Zhihao and Pu, Tianyu and Liu, Eryun}, title = {CVFusion: Cross-View Fusion of 4D Radar and Camera for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28188-28197} }
Efficient Input-level Backdoor Defense on Text-to-Image Synthesis via Neuron Activation Variation: Shengfang Zhai,

Jiajun Li,

Yue Liu,

Huanran Chen,

Zhihua Tian,

Wenjie Qu,

Qingni Shen,

Ruoxi Jia,

Yinpeng Dong,

Jiaheng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhai_2025_ICCV, author = {Zhai, Shengfang and Li, Jiajun and Liu, Yue and Chen, Huanran and Tian, Zhihua and Qu, Wenjie and Shen, Qingni and Jia, Ruoxi and Dong, Yinpeng and Zhang, Jiaheng}, title = {Efficient Input-level Backdoor Defense on Text-to-Image Synthesis via Neuron Activation Variation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15182-15193} }
MoMa-Kitchen: A 100K+ Benchmark for Affordance-Grounded Last-Mile Navigation in Mobile Manipulation: Pingrui Zhang,

Xianqiang Gao,

Yuhan Wu,

Kehui Liu,

Dong Wang,

Zhigang Wang,

Bin Zhao,

Yan Ding,

Xuelong Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Pingrui and Gao, Xianqiang and Wu, Yuhan and Liu, Kehui and Wang, Dong and Wang, Zhigang and Zhao, Bin and Ding, Yan and Li, Xuelong}, title = {MoMa-Kitchen: A 100K+ Benchmark for Affordance-Grounded Last-Mile Navigation in Mobile Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6315-6326} }
ViT-Linearizer: Distilling Quadratic Knowledge into Linear-Time Vision Models: Guoyizhe Wei,

Rama Chellappa; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Guoyizhe and Chellappa, Rama}, title = {ViT-Linearizer: Distilling Quadratic Knowledge into Linear-Time Vision Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20737-20747} }
RobustSplat: Decoupling Densification and Dynamics for Transient-Free 3DGS: Chuanyu Fu,

Yuqi Zhang,

Kunbin Yao,

Guanying Chen,

Yuan Xiong,

Chuan Huang,

Shuguang Cui,

Xiaochun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Chuanyu and Zhang, Yuqi and Yao, Kunbin and Chen, Guanying and Xiong, Yuan and Huang, Chuan and Cui, Shuguang and Cao, Xiaochun}, title = {RobustSplat: Decoupling Densification and Dynamics for Transient-Free 3DGS}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27126-27136} }
Generalized Deep Multi-view Clustering via Causal Learning with Partially Aligned Cross-view Correspondence: Xihong Yang,

Siwei Wang,

Jiaqi Jin,

Fangdi Wang,

Tianrui Liu,

Yueming Jin,

Xinwang Liu,

En Zhu,

Kunlun He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Xihong and Wang, Siwei and Jin, Jiaqi and Wang, Fangdi and Liu, Tianrui and Jin, Yueming and Liu, Xinwang and Zhu, En and He, Kunlun}, title = {Generalized Deep Multi-view Clustering via Causal Learning with Partially Aligned Cross-view Correspondence}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1990-1999} }
MeasureXpert: Automatic Anthropometric Measurement Extraction from Two Unregistered, Partial, Posed, and Dressed Body Scans: Ran Zhao,

Xinxin Dai,

Pengpeng Hu,

Vasile Palade,

Adrian Munteanu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Ran and Dai, Xinxin and Hu, Pengpeng and Palade, Vasile and Munteanu, Adrian}, title = {MeasureXpert: Automatic Anthropometric Measurement Extraction from Two Unregistered, Partial, Posed, and Dressed Body Scans}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9605-9615} }
FedMeNF: Privacy-Preserving Federated Meta-Learning for Neural Fields: Junhyeog Yun,

Minui Hong,

Gunhee Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yun_2025_ICCV, author = {Yun, Junhyeog and Hong, Minui and Kim, Gunhee}, title = {FedMeNF: Privacy-Preserving Federated Meta-Learning for Neural Fields}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2161-2171} }
MeshLLM: Empowering Large Language Models to Progressively Understand and Generate 3D Mesh: Shuangkang Fang,

I-Chao Shen,

Yufeng Wang,

Yi-Hsuan Tsai,

Yi Yang,

Shuchang Zhou,

Wenrui Ding,

Takeo Igarashi,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Shuangkang and Shen, I-Chao and Wang, Yufeng and Tsai, Yi-Hsuan and Yang, Yi and Zhou, Shuchang and Ding, Wenrui and Igarashi, Takeo and Yang, Ming-Hsuan}, title = {MeshLLM: Empowering Large Language Models to Progressively Understand and Generate 3D Mesh}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14061-14072} }
Split-and-Combine: Enhancing Style Augmentation for Single Domain Generalization: Zhen Zhang,

Shuai Yang,

Qianlong Dang,

Zhize Wu,

Lichuan Gu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zhen and Yang, Shuai and Dang, Qianlong and Wu, Zhize and Gu, Lichuan}, title = {Split-and-Combine: Enhancing Style Augmentation for Single Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15616-15625} }
FOLDER: Accelerating Multi-Modal Large Language Models with Enhanced Performance: Haicheng Wang,

Zhemeng Yu,

Gabriele Spadaro,

Chen Ju,

Victor Quétu,

Shuai Xiao,

Enzo Tartaglione; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haicheng and Yu, Zhemeng and Spadaro, Gabriele and Ju, Chen and Qu\'etu, Victor and Xiao, Shuai and Tartaglione, Enzo}, title = {FOLDER: Accelerating Multi-Modal Large Language Models with Enhanced Performance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23614-23625} }
A Hyperdimensional One Place Signature to Represent Them All: Stackable Descriptors For Visual Place Recognition: Connor Malone,

Somayeh Hussaini,

Tobias Fischer,

Michael Milford; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Malone_2025_ICCV, author = {Malone, Connor and Hussaini, Somayeh and Fischer, Tobias and Milford, Michael}, title = {A Hyperdimensional One Place Signature to Represent Them All: Stackable Descriptors For Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9822-9833} }
Large Multi-modal Models Can Interpret Features in Large Multi-modal Models: Kaichen Zhang,

Yifei Shen,

Bo Li,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Kaichen and Shen, Yifei and Li, Bo and Liu, Ziwei}, title = {Large Multi-modal Models Can Interpret Features in Large Multi-modal Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3650-3661} }
Progressive Artwork Outpainting via Latent Diffusion Models: Dae-Young Song,

Jung-Jae Yu,

Donghyeon Cho; [pdf] [supp]
[bibtex]
@InProceedings{Song_2025_ICCV, author = {Song, Dae-Young and Yu, Jung-Jae and Cho, Donghyeon}, title = {Progressive Artwork Outpainting via Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15405-15415} }
Are VLMs Ready for Autonomous Driving? An Empirical Study from the Reliability, Data and Metric Perspectives: Shaoyuan Xie,

Lingdong Kong,

Yuhao Dong,

Chonghao Sima,

Wenwei Zhang,

Qi Alfred Chen,

Ziwei Liu,

Liang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Shaoyuan and Kong, Lingdong and Dong, Yuhao and Sima, Chonghao and Zhang, Wenwei and Chen, Qi Alfred and Liu, Ziwei and Pan, Liang}, title = {Are VLMs Ready for Autonomous Driving? An Empirical Study from the Reliability, Data and Metric Perspectives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6585-6597} }
Learn2Synth: Learning Optimal Data Synthesis Using Hypergradients for Brain Image Segmentation: Xiaoling Hu,

Xiangrui Zeng,

Oula Puonti,

Juan Eugenio Iglesias,

Bruce Fischl,

Yaël Balbastre; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Xiaoling and Zeng, Xiangrui and Puonti, Oula and Iglesias, Juan Eugenio and Fischl, Bruce and Balbastre, Ya\"el}, title = {Learn2Synth: Learning Optimal Data Synthesis Using Hypergradients for Brain Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20368-20378} }
Deep Incomplete Multi-view Clustering with Distribution Dual-Consistency Recovery Guidance: Jiaqi Jin,

Siwei Wang,

Zhibin Dong,

Xihong Yang,

Xinwang Liu,

En Zhu,

Kunlun He; [pdf] [arXiv]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Jiaqi and Wang, Siwei and Dong, Zhibin and Yang, Xihong and Liu, Xinwang and Zhu, En and He, Kunlun}, title = {Deep Incomplete Multi-view Clustering with Distribution Dual-Consistency Recovery Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1016-1026} }
3D Mesh Editing using Masked LRMs: Will Gao,

Dilin Wang,

Yuchen Fan,

Aljaz Bozic,

Tuur Stuyck,

Zhengqin Li,

Zhao Dong,

Rakesh Ranjan,

Nikolaos Sarafianos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Will and Wang, Dilin and Fan, Yuchen and Bozic, Aljaz and Stuyck, Tuur and Li, Zhengqin and Dong, Zhao and Ranjan, Rakesh and Sarafianos, Nikolaos}, title = {3D Mesh Editing using Masked LRMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7154-7165} }
Acknowledging Focus Ambiguity in Visual Questions: Chongyan Chen,

Yu-Yun Tseng,

Zhuoheng Li,

Anush Venkatesh,

Danna Gurari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Chongyan and Tseng, Yu-Yun and Li, Zhuoheng and Venkatesh, Anush and Gurari, Danna}, title = {Acknowledging Focus Ambiguity in Visual Questions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1228-1238} }
DRaM-LHM: A Quaternion Framework for Iterative Camera Pose Estimation: Chen Lin,

Weizhi Du,

Zhixiang Min,

Baochen She,

Enrique Dunn,

Sonya M. Hanson; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Chen and Du, Weizhi and Min, Zhixiang and She, Baochen and Dunn, Enrique and Hanson, Sonya M.}, title = {DRaM-LHM: A Quaternion Framework for Iterative Camera Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6447-6455} }
Bolt3D: Generating 3D Scenes in Seconds: Stanislaw Szymanowicz,

Jason Y. Zhang,

Pratul Srinivasan,

Ruiqi Gao,

Arthur Brussee,

Aleksander Holynski,

Ricardo Martin-Brualla,

Jonathan T. Barron,

Philipp Henzler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Szymanowicz_2025_ICCV, author = {Szymanowicz, Stanislaw and Zhang, Jason Y. and Srinivasan, Pratul and Gao, Ruiqi and Brussee, Arthur and Holynski, Aleksander and Martin-Brualla, Ricardo and Barron, Jonathan T. and Henzler, Philipp}, title = {Bolt3D: Generating 3D Scenes in Seconds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24846-24857} }
Revisiting Image Fusion for Multi-Illuminant White-Balance Correction: David Serrano-Lozano,

Aditya Arora,

Luis Herranz,

Konstantinos G. Derpanis,

Michael S. Brown,

Javier Vazquez-Corral; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Serrano-Lozano_2025_ICCV, author = {Serrano-Lozano, David and Arora, Aditya and Herranz, Luis and Derpanis, Konstantinos G. and Brown, Michael S. and Vazquez-Corral, Javier}, title = {Revisiting Image Fusion for Multi-Illuminant White-Balance Correction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8275-8284} }
Combinative Matching for Geometric Shape Assembly: Nahyuk Lee,

Juhong Min,

Junhong Lee,

Chunghyun Park,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Nahyuk and Min, Juhong and Lee, Junhong and Park, Chunghyun and Cho, Minsu}, title = {Combinative Matching for Geometric Shape Assembly}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9540-9549} }
Know Your Attention Maps: Class-specific Token Masking for Weakly Supervised Semantic Segmentation: Joëlle Hanna,

Damian Borth; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hanna_2025_ICCV, author = {Hanna, Jo\"elle and Borth, Damian}, title = {Know Your Attention Maps: Class-specific Token Masking for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23763-23772} }
DAViD: Data-efficient and Accurate Vision Models from Synthetic Data: Fatemeh Saleh,

Sadegh Aliakbarian,

Charlie Hewitt,

Lohit Petikam,

Xian Xiao,

Antonio Criminisi,

Thomas J. Cashman,

Tadas Baltrusaitis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saleh_2025_ICCV, author = {Saleh, Fatemeh and Aliakbarian, Sadegh and Hewitt, Charlie and Petikam, Lohit and Xiao, Xian and Criminisi, Antonio and Cashman, Thomas J. and Baltrusaitis, Tadas}, title = {DAViD: Data-efficient and Accurate Vision Models from Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5348-5358} }
Reducing Unimodal Bias in Multi-Modal Semantic Segmentation with Multi-Scale Functional Entropy Regularization: Xu Zheng,

Yuanhuiyi Lyu,

Lutao Jiang,

Danda Pani Paudel,

Luc Van Gool,

Xuming Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Xu and Lyu, Yuanhuiyi and Jiang, Lutao and Paudel, Danda Pani and Van Gool, Luc and Hu, Xuming}, title = {Reducing Unimodal Bias in Multi-Modal Semantic Segmentation with Multi-Scale Functional Entropy Regularization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21166-21176} }
ETA: Efficiency through Thinking Ahead, A Dual Approach to Self-Driving with Large Models: Shadi Hamdan,

Chonghao Sima,

Zetong Yang,

Hongyang Li,

Fatma Guney; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hamdan_2025_ICCV, author = {Hamdan, Shadi and Sima, Chonghao and Yang, Zetong and Li, Hongyang and Guney, Fatma}, title = {ETA: Efficiency through Thinking Ahead, A Dual Approach to Self-Driving with Large Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26529-26538} }
SCORE: Scene Context Matters in Open-Vocabulary Remote Sensing Instance Segmentation: Shiqi Huang,

Shuting He,

Huaiyuan Qin,

Bihan Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Shiqi and He, Shuting and Qin, Huaiyuan and Wen, Bihan}, title = {SCORE: Scene Context Matters in Open-Vocabulary Remote Sensing Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12559-12569} }
LEGO-Maker: A Semantic-Driven Algorithm for Text-to-3D Generation: Yifei Zhang,

Lei Chen; [pdf]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yifei and Chen, Lei}, title = {LEGO-Maker: A Semantic-Driven Algorithm for Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15127-15136} }
Learning Interpretable Queries for Explainable Image Classification with Information Pursuit: Stefan Kolek,

Aditya Chattopadhyay,

Kwan Ho Ryan Chan,

Hector Andrade-Loarca,

Gitta Kutyniok,

René Vidal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kolek_2025_ICCV, author = {Kolek, Stefan and Chattopadhyay, Aditya and Chan, Kwan Ho Ryan and Andrade-Loarca, Hector and Kutyniok, Gitta and Vidal, Ren\'e}, title = {Learning Interpretable Queries for Explainable Image Classification with Information Pursuit}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3947-3956} }
LoRA-FAIR: Federated LoRA Fine-Tuning with Aggregation and Initialization Refinement: Jieming Bian,

Lei Wang,

Letian Zhang,

Jie Xu; [pdf] [supp]
[bibtex]
@InProceedings{Bian_2025_ICCV, author = {Bian, Jieming and Wang, Lei and Zhang, Letian and Xu, Jie}, title = {LoRA-FAIR: Federated LoRA Fine-Tuning with Aggregation and Initialization Refinement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3737-3746} }
DiffDoctor: Diagnosing Image Diffusion Models Before Treating: Yiyang Wang,

Xi Chen,

Xiaogang Xu,

Sihui Ji,

Yu Liu,

Yujun Shen,

Hengshuang Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yiyang and Chen, Xi and Xu, Xiaogang and Ji, Sihui and Liu, Yu and Shen, Yujun and Zhao, Hengshuang}, title = {DiffDoctor: Diagnosing Image Diffusion Models Before Treating}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18917-18926} }
AIRA: Activation-Informed Low-Rank Adaptation for Large Models: Lujun Li,

Dezhi Li,

Cheng Lin,

Wei Li,

Wei Xue,

Sirui Han,

Yike Guo; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Lujun and Li, Dezhi and Lin, Cheng and Li, Wei and Xue, Wei and Han, Sirui and Guo, Yike}, title = {AIRA: Activation-Informed Low-Rank Adaptation for Large Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1729-1739} }
Understanding Personal Concept in Open-Vocabulary Semantic Segmentation: Sunghyun Park,

Jungsoo Lee,

Shubhankar Borse,

Munawar Hayat,

Sungha Choi,

Kyuwoong Hwang,

Fatih Porikli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Sunghyun and Lee, Jungsoo and Borse, Shubhankar and Hayat, Munawar and Choi, Sungha and Hwang, Kyuwoong and Porikli, Fatih}, title = {Understanding Personal Concept in Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19957-19966} }
4D Visual Pre-training for Robot Learning: Chengkai Hou,

Yanjie Ze,

Yankai Fu,

Zeyu Gao,

Songbo Hu,

Yue Yu,

Shanghang Zhang,

Huazhe Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2025_ICCV, author = {Hou, Chengkai and Ze, Yanjie and Fu, Yankai and Gao, Zeyu and Hu, Songbo and Yu, Yue and Zhang, Shanghang and Xu, Huazhe}, title = {4D Visual Pre-training for Robot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8451-8461} }
SemTalk: Holistic Co-speech Motion Generation with Frame-level Semantic Emphasis: Xiangyue Zhang,

Jianfang Li,

Jiaxu Zhang,

Ziqiang Dang,

Jianqiang Ren,

Liefeng Bo,

Zhigang Tu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiangyue and Li, Jianfang and Zhang, Jiaxu and Dang, Ziqiang and Ren, Jianqiang and Bo, Liefeng and Tu, Zhigang}, title = {SemTalk: Holistic Co-speech Motion Generation with Frame-level Semantic Emphasis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13761-13771} }
Textured 3D Regenerative Morphing with 3D Diffusion Prior: Songlin Yang,

Yushi Lan,

Honghua Chen,

Xingang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Songlin and Lan, Yushi and Chen, Honghua and Pan, Xingang}, title = {Textured 3D Regenerative Morphing with 3D Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15159-15170} }
LONG3R: Long Sequence Streaming 3D Reconstruction: Zhuoguang Chen,

Minghui Qin,

Tianyuan Yuan,

Zhe Liu,

Hang Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Zhuoguang and Qin, Minghui and Yuan, Tianyuan and Liu, Zhe and Zhao, Hang}, title = {LONG3R: Long Sequence Streaming 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5273-5284} }
DreamActor-M1: Holistic, Expressive and Robust Human Image Animation with Hybrid Guidance: Yuxuan Luo,

Zhengkun Rong,

Lizhen Wang,

Longhao Zhang,

Tianshu Hu; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Yuxuan and Rong, Zhengkun and Wang, Lizhen and Zhang, Longhao and Hu, Tianshu}, title = {DreamActor-M1: Holistic, Expressive and Robust Human Image Animation with Hybrid Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11036-11046} }
Blind2Sound: Self-Supervised Image Denoising without Residual Noise: Jiazheng Liu,

Zejin Wang,

Bohao Chen,

Hua Han; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Jiazheng and Wang, Zejin and Chen, Bohao and Han, Hua}, title = {Blind2Sound: Self-Supervised Image Denoising without Residual Noise}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12937-12946} }
Sparsity Outperforms Low-Rank Projections in Few-Shot Adaptation: Nairouz Mrabah,

Nicolas Richet,

Ismail Ben Ayed,

Eric Granger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mrabah_2025_ICCV, author = {Mrabah, Nairouz and Richet, Nicolas and Ben Ayed, Ismail and Granger, Eric}, title = {Sparsity Outperforms Low-Rank Projections in Few-Shot Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3143-3152} }
Hyper-Depth: Hypergraph-based Multi-Scale Representation Fusion for Monocular Depth Estimation: Lin Bie,

Siqi Li,

Yifan Feng,

Yue Gao; [pdf]
[bibtex]
@InProceedings{Bie_2025_ICCV, author = {Bie, Lin and Li, Siqi and Feng, Yifan and Gao, Yue}, title = {Hyper-Depth: Hypergraph-based Multi-Scale Representation Fusion for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5081-5090} }
PVMamba: Parallelizing Vision Mamba via Dynamic State Aggregation: Fei Xie,

Zhongdao Wang,

Weijia Zhang,

Chao Ma; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Fei and Wang, Zhongdao and Zhang, Weijia and Ma, Chao}, title = {PVMamba: Parallelizing Vision Mamba via Dynamic State Aggregation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10218-10228} }
Fuzzy Contrastive Decoding to Alleviate Object Hallucination in Large Vision-Language Models: Jieun Kim,

Jinmyeong Kim,

Yoonji Kim,

Sung-Bae Cho; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jieun and Kim, Jinmyeong and Kim, Yoonji and Cho, Sung-Bae}, title = {Fuzzy Contrastive Decoding to Alleviate Object Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20572-20581} }
SMARTIES: Spectrum-Aware Multi-Sensor Auto-Encoder for Remote Sensing Images: Gencer Sumbul,

Chang Xu,

Emanuele Dalsasso,

Devis Tuia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sumbul_2025_ICCV, author = {Sumbul, Gencer and Xu, Chang and Dalsasso, Emanuele and Tuia, Devis}, title = {SMARTIES: Spectrum-Aware Multi-Sensor Auto-Encoder for Remote Sensing Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5569-5578} }
SynFER: Towards Boosting Facial Expression Recognition with Synthetic Data: Xilin He,

Cheng Luo,

Xiaole Xian,

Bing Li,

Muhammad Haris Khan,

Zongyuan Ge,

Weicheng Xie,

Siyang Song,

Linlin Shen,

Bernard Ghanem,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Xilin and Luo, Cheng and Xian, Xiaole and Li, Bing and Khan, Muhammad Haris and Ge, Zongyuan and Xie, Weicheng and Song, Siyang and Shen, Linlin and Ghanem, Bernard and Yue, Xiangyu}, title = {SynFER: Towards Boosting Facial Expression Recognition with Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10184-10195} }
Boosting Multimodal Learning via Disentangled Gradient Learning: Shicai Wei,

Chunbo Luo,

Yang Luo; [pdf] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Shicai and Luo, Chunbo and Luo, Yang}, title = {Boosting Multimodal Learning via Disentangled Gradient Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22879-22888} }
Generating Physically Stable and Buildable Brick Structures from Text: Ava Pun,

Kangle Deng,

Ruixuan Liu,

Deva Ramanan,

Changliu Liu,

Jun-Yan Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pun_2025_ICCV, author = {Pun, Ava and Deng, Kangle and Liu, Ruixuan and Ramanan, Deva and Liu, Changliu and Zhu, Jun-Yan}, title = {Generating Physically Stable and Buildable Brick Structures from Text}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14798-14809} }
Bootstrapping Grounded Chain-of-Thought in Multimodal LLMs for Data-Efficient Model Adaptation: Jiaer Xia,

Bingkui Tong,

Yuhang Zang,

Rui Shao,

Kaiyang Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_ICCV, author = {Xia, Jiaer and Tong, Bingkui and Zang, Yuhang and Shao, Rui and Zhou, Kaiyang}, title = {Bootstrapping Grounded Chain-of-Thought in Multimodal LLMs for Data-Efficient Model Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {208-217} }
DisTime: Distribution-based Time Representation for Video Large Language Models: Yingsen Zeng,

Zepeng Huang,

Yujie Zhong,

Chengjian Feng,

Jie Hu,

Lin Ma,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Yingsen and Huang, Zepeng and Zhong, Yujie and Feng, Chengjian and Hu, Jie and Ma, Lin and Liu, Yang}, title = {DisTime: Distribution-based Time Representation for Video Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21961-21971} }
Engage for All: Making Ordinary Image Descriptions Appealing Again!: Yuyan Chen,

Yifan Jiang,

Li Zhou,

Jinghan Cao,

Yu Guan,

Ming Yang,

Qingpei Guo; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yuyan and Jiang, Yifan and Zhou, Li and Cao, Jinghan and Guan, Yu and Yang, Ming and Guo, Qingpei}, title = {Engage for All: Making Ordinary Image Descriptions Appealing Again!}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19342-19352} }
QuantCache: Adaptive Importance-Guided Quantization with Hierarchical Latent and Layer Caching for Video Generation: Junyi Wu,

Zhiteng Li,

Zheng Hui,

Yulun Zhang,

Linghe Kong,

Xiaokang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Junyi and Li, Zhiteng and Hui, Zheng and Zhang, Yulun and Kong, Linghe and Yang, Xiaokang}, title = {QuantCache: Adaptive Importance-Guided Quantization with Hierarchical Latent and Layer Caching for Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15035-15044} }
DH-FaceVid-1K: A Large-Scale High-Quality Dataset for Face Video Generation: Donglin Di,

He Feng,

Wenzhang Sun,

Yongjia Ma,

Hao Li,

Wei Chen,

Lei Fan,

Tonghua Su,

Xun Yang; [pdf] [supp]
[bibtex]
@InProceedings{Di_2025_ICCV, author = {Di, Donglin and Feng, He and Sun, Wenzhang and Ma, Yongjia and Li, Hao and Chen, Wei and Fan, Lei and Su, Tonghua and Yang, Xun}, title = {DH-FaceVid-1K: A Large-Scale High-Quality Dataset for Face Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12124-12134} }
Instruction-Grounded Visual Projectors for Continual Learning of Generative Vision-Language Models: Hyundong Jin,

Hyung Jin Chang,

Eunwoo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Hyundong and Chang, Hyung Jin and Kim, Eunwoo}, title = {Instruction-Grounded Visual Projectors for Continual Learning of Generative Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3466-3476} }
COIN: Confidence Score-Guided Distillation for Annotation-Free Cell Segmentation: Sanghyun Jo,

Seo Jin Lee,

Seungwoo Lee,

Seohyung Hong,

Hyungseok Seo,

Kyungsu Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jo_2025_ICCV, author = {Jo, Sanghyun and Lee, Seo Jin and Lee, Seungwoo and Hong, Seohyung and Seo, Hyungseok and Kim, Kyungsu}, title = {COIN: Confidence Score-Guided Distillation for Annotation-Free Cell Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20324-20335} }
Proactive Scene Decomposition and Reconstruction: Baicheng Li,

Zike Yan,

Dong Wu,

Hongbin Zha; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Baicheng and Yan, Zike and Wu, Dong and Zha, Hongbin}, title = {Proactive Scene Decomposition and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9780-9789} }
Estimating 2D Camera Motion with Hybrid Motion Basis: Haipeng Li,

Tianhao Zhou,

Zhanglei Yang,

Yi Wu,

Yan Chen,

Zijing Mao,

Shen Cheng,

Bing Zeng,

Shuaicheng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Haipeng and Zhou, Tianhao and Yang, Zhanglei and Wu, Yi and Chen, Yan and Mao, Zijing and Cheng, Shen and Zeng, Bing and Liu, Shuaicheng}, title = {Estimating 2D Camera Motion with Hybrid Motion Basis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7624-7633} }
Adaptive Articulated Object Manipulation On The Fly with Foundation Model Reasoning and Part Grounding: Xiaojie Zhang,

Yuanfei Wang,

Ruihai Wu,

Kunqi Xu,

Yu Li,

Liuyu Xiang,

Hao Dong,

Zhaofeng He; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiaojie and Wang, Yuanfei and Wu, Ruihai and Xu, Kunqi and Li, Yu and Xiang, Liuyu and Dong, Hao and He, Zhaofeng}, title = {Adaptive Articulated Object Manipulation On The Fly with Foundation Model Reasoning and Part Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13032-13042} }
Predict-Optimize-Distill: A Self-Improving Cycle for 4D Object Understanding: Mingxuan Wu,

Huang Huang,

Justin Kerr,

Chung Min Kim,

Anthony Zhang,

Brent Yi,

Angjoo Kanazawa; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Mingxuan and Huang, Huang and Kerr, Justin and Kim, Chung Min and Zhang, Anthony and Yi, Brent and Kanazawa, Angjoo}, title = {Predict-Optimize-Distill: A Self-Improving Cycle for 4D Object Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6575-6584} }
LHM: Large Animatable Human Reconstruction Model for Single Image to 3D in Seconds: Lingteng Qiu,

Xiaodong Gu,

Peihao Li,

Qi Zuo,

Weichao Shen,

Junfei Zhang,

Kejie Qiu,

Weihao Yuan,

Guanying Chen,

Zilong Dong,

Liefeng Bo; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2025_ICCV, author = {Qiu, Lingteng and Gu, Xiaodong and Li, Peihao and Zuo, Qi and Shen, Weichao and Zhang, Junfei and Qiu, Kejie and Yuan, Weihao and Chen, Guanying and Dong, Zilong and Bo, Liefeng}, title = {LHM: Large Animatable Human Reconstruction Model for Single Image to 3D in Seconds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14184-14194} }
Fine-grained Spatiotemporal Grounding on Egocentric Videos: Shuo Liang,

Yiwu Zhong,

Zi-Yuan Hu,

Yeyao Tao,

Liwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Shuo and Zhong, Yiwu and Hu, Zi-Yuan and Tao, Yeyao and Wang, Liwei}, title = {Fine-grained Spatiotemporal Grounding on Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9385-9395} }
3D Test-time Adaptation via Graph Spectral Driven Point Shift: Xin Wei,

Qin Yang,

Yijie Fang,

Mingrui Zhu,

Nannan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Xin and Yang, Qin and Fang, Yijie and Zhu, Mingrui and Wang, Nannan}, title = {3D Test-time Adaptation via Graph Spectral Driven Point Shift}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26762-26771} }
An Empirical Study of Autoregressive Pre-training from Videos: Jathushan Rajasegaran,

Ilija Radosavovic,

Rahul Ravishankar,

Yossi Gandelsman,

Christoph Feichtenhofer,

Jitendra Malik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rajasegaran_2025_ICCV, author = {Rajasegaran, Jathushan and Radosavovic, Ilija and Ravishankar, Rahul and Gandelsman, Yossi and Feichtenhofer, Christoph and Malik, Jitendra}, title = {An Empirical Study of Autoregressive Pre-training from Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19108-19118} }
Latent Diffusion Models with Masked AutoEncoders: Junho Lee,

Jeongwoo Shin,

Hyungwook Choi,

Joonseok Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Junho and Shin, Jeongwoo and Choi, Hyungwook and Lee, Joonseok}, title = {Latent Diffusion Models with Masked AutoEncoders}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17422-17431} }
PlaceIt3D: Language-Guided Object Placement in Real 3D Scenes: Ahmed Abdelreheem,

Filippo Aleotti,

Jamie Watson,

Zawar Qureshi,

Abdelrahman Eldesokey,

Peter Wonka,

Gabriel Brostow,

Sara Vicente,

Guillermo Garcia-Hernando; [pdf] [arXiv]
[bibtex]
@InProceedings{Abdelreheem_2025_ICCV, author = {Abdelreheem, Ahmed and Aleotti, Filippo and Watson, Jamie and Qureshi, Zawar and Eldesokey, Abdelrahman and Wonka, Peter and Brostow, Gabriel and Vicente, Sara and Garcia-Hernando, Guillermo}, title = {PlaceIt3D: Language-Guided Object Placement in Real 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6645-6655} }
TR-PTS: Task-Relevant Parameter and Token Selection for Efficient Tuning: Siqi Luo,

Haoran Yang,

Yi Xin,

Mingyang Yi,

Guangyang Wu,

Guangtao Zhai,

Xiaohong Liu; [pdf]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Siqi and Yang, Haoran and Xin, Yi and Yi, Mingyang and Wu, Guangyang and Zhai, Guangtao and Liu, Xiaohong}, title = {TR-PTS: Task-Relevant Parameter and Token Selection for Efficient Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4360-4369} }
StruMamba3D: Exploring Structural Mamba for Self-supervised Point Cloud Representation Learning: Chuxin Wang,

Yixin Zha,

Wenfei Yang,

Tianzhu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Chuxin and Zha, Yixin and Yang, Wenfei and Zhang, Tianzhu}, title = {StruMamba3D: Exploring Structural Mamba for Self-supervised Point Cloud Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28546-28555} }
Open-World Skill Discovery from Unsegmented Demonstration Videos: Jingwen Deng,

Zihao Wang,

Shaofei Cai,

Anji Liu,

Yitao Liang; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Jingwen and Wang, Zihao and Cai, Shaofei and Liu, Anji and Liang, Yitao}, title = {Open-World Skill Discovery from Unsegmented Demonstration Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10708-10718} }
Staining and Locking Computer Vision Models Without Retraining: Oliver J. Sutton,

Qinghua Zhou,

George Leete,

Alexander N. Gorban,

Ivan Y. Tyukin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sutton_2025_ICCV, author = {Sutton, Oliver J. and Zhou, Qinghua and Leete, George and Gorban, Alexander N. and Tyukin, Ivan Y.}, title = {Staining and Locking Computer Vision Models Without Retraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2346-2355} }
Consistent Time-of-Flight Depth Denoising via Graph-Informed Geometric Attention: Weida Wang,

Changyong He,

Jin Zeng,

Di Qiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Weida and He, Changyong and Zeng, Jin and Qiu, Di}, title = {Consistent Time-of-Flight Depth Denoising via Graph-Informed Geometric Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5188-5197} }
Can Generative Geospatial Diffusion Models Excel as Discriminative Geospatial Foundation Models?: Yuru Jia,

Valerio Marsocci,

Ziyang Gong,

Xue Yang,

Maarten Vergauwen,

Andrea Nascetti; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2025_ICCV, author = {Jia, Yuru and Marsocci, Valerio and Gong, Ziyang and Yang, Xue and Vergauwen, Maarten and Nascetti, Andrea}, title = {Can Generative Geospatial Diffusion Models Excel as Discriminative Geospatial Foundation Models?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8429-8440} }
End-to-End Entity-Predicate Association Reasoning for Dynamic Scene Graph Generation: Liwei Wang,

Yanduo Zhang,

Tao Lu,

Fang Liu,

Huiqin Zhang,

Jiayi Ma,

Huabing Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Liwei and Zhang, Yanduo and Lu, Tao and Liu, Fang and Zhang, Huiqin and Ma, Jiayi and Zhou, Huabing}, title = {End-to-End Entity-Predicate Association Reasoning for Dynamic Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17729-17738} }
AJAHR: Amputated Joint Aware 3D Human Mesh Recovery: Hyunjin Cho,

Giyun Choi,

Jongwon Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cho_2025_ICCV, author = {Cho, Hyunjin and Choi, Giyun and Choi, Jongwon}, title = {AJAHR: Amputated Joint Aware 3D Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7925-7935} }
Bridging the Skeleton-Text Modality Gap: Diffusion-Powered Modality Alignment for Zero-shot Skeleton-based Action Recognition: Jeonghyeok Do,

Munchurl Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Do_2025_ICCV, author = {Do, Jeonghyeok and Kim, Munchurl}, title = {Bridging the Skeleton-Text Modality Gap: Diffusion-Powered Modality Alignment for Zero-shot Skeleton-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12757-12768} }
Fish2Mesh Transformer: 3D Human Mesh Recovery from Egocentric Vision: Tianma Shen,

Aditya Puranik,

James Vong,

Vrushabh Deogirikar,

Ryan Fell,

Julianna Dietrich,

Maria Kyrarini,

Christopher Kitts,

David C. Jeong; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_ICCV, author = {Shen, Tianma and Puranik, Aditya and Vong, James and Deogirikar, Vrushabh and Fell, Ryan and Dietrich, Julianna and Kyrarini, Maria and Kitts, Christopher and Jeong, David C.}, title = {Fish2Mesh Transformer: 3D Human Mesh Recovery from Egocentric Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6498-6507} }
Easy3D: A Simple Yet Effective Method for 3D Interactive Segmentation: Andrea Simonelli,

Norman Müller,

Peter Kontschieder; [pdf] [supp]
[bibtex]
@InProceedings{Simonelli_2025_ICCV, author = {Simonelli, Andrea and M\"uller, Norman and Kontschieder, Peter}, title = {Easy3D: A Simple Yet Effective Method for 3D Interactive Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24707-24716} }
DynImg: Key Frames with Visual Prompts are Good Representation for Multi-Modal Video Understanding: Xiaoyi Bao,

Chenwei Xie,

Hao Tang,

Tingyu Weng,

Xiaofeng Wang,

Yun Zheng,

Xingang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bao_2025_ICCV, author = {Bao, Xiaoyi and Xie, Chenwei and Tang, Hao and Weng, Tingyu and Wang, Xiaofeng and Zheng, Yun and Wang, Xingang}, title = {DynImg: Key Frames with Visual Prompts are Good Representation for Multi-Modal Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23678-23688} }
Interpretable Zero-Shot Learning with Locally-Aligned Vision-Language Model: Shiming Chen,

Bowen Duan,

Salman Khan,

Fahad Shahbaz Khan; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Shiming and Duan, Bowen and Khan, Salman and Khan, Fahad Shahbaz}, title = {Interpretable Zero-Shot Learning with Locally-Aligned Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {478-487} }
MixANT: Observation-dependent Memory Propagation for Stochastic Dense Action Anticipation: Syed Talal Wasim,

Hamid Suleman,

Olga Zatsarynna,

Muzammal Naseer,

Juergen Gall; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wasim_2025_ICCV, author = {Wasim, Syed Talal and Suleman, Hamid and Zatsarynna, Olga and Naseer, Muzammal and Gall, Juergen}, title = {MixANT: Observation-dependent Memory Propagation for Stochastic Dense Action Anticipation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14613-14622} }
MaGS: Reconstructing and Simulating Dynamic 3D Objects with Mesh-adsorbed Gaussian Splatting: Shaojie Ma,

Yawei Luo,

Wei Yang,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Shaojie and Luo, Yawei and Yang, Wei and Yang, Yi}, title = {MaGS: Reconstructing and Simulating Dynamic 3D Objects with Mesh-adsorbed Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8745-8755} }
SceneMI: Motion In-betweening for Modeling Human-Scene Interaction: Inwoo Hwang,

Bing Zhou,

Young Min Kim,

Jian Wang,

Chuan Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hwang_2025_ICCV, author = {Hwang, Inwoo and Zhou, Bing and Kim, Young Min and Wang, Jian and Guo, Chuan}, title = {SceneMI: Motion In-betweening for Modeling Human-Scene Interaction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6034-6045} }
SMoLoRA: Exploring and Defying Dual Catastrophic Forgetting in Continual Visual Instruction Tuning: Ziqi Wang,

Chang Che,

Qi Wang,

Yangyang Li,

Zenglin Shi,

Meng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ziqi and Che, Chang and Wang, Qi and Li, Yangyang and Shi, Zenglin and Wang, Meng}, title = {SMoLoRA: Exploring and Defying Dual Catastrophic Forgetting in Continual Visual Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {177-186} }
Self-Supervised Sparse Sensor Fusion for Long Range Perception: Edoardo Palladin,

Samuel Brucker,

Filippo Ghilotti,

Praveen Narayanan,

Mario Bijelic,

Felix Heide; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Palladin_2025_ICCV, author = {Palladin, Edoardo and Brucker, Samuel and Ghilotti, Filippo and Narayanan, Praveen and Bijelic, Mario and Heide, Felix}, title = {Self-Supervised Sparse Sensor Fusion for Long Range Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27498-27509} }
GeoDistill: Geometry-Guided Self-Distillation for Weakly Supervised Cross-View Localization: Shaowen Tong,

Zimin Xia,

Alexandre Alahi,

Xuming He,

Yujiao Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tong_2025_ICCV, author = {Tong, Shaowen and Xia, Zimin and Alahi, Alexandre and He, Xuming and Shi, Yujiao}, title = {GeoDistill: Geometry-Guided Self-Distillation for Weakly Supervised Cross-View Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25357-25366} }
Leveraging 2D Priors and SDF Guidance for Urban Scene Rendering: Siddharth Tourani,

Jayaram Reddy,

Akash Kumbar,

Satyajit Tourani,

Nishant Goyal,

Madhava Krishna,

N Dinesh Reddy,

Muhammad Haris Khan; [pdf] [supp]
[bibtex]
@InProceedings{Tourani_2025_ICCV, author = {Tourani, Siddharth and Reddy, Jayaram and Kumbar, Akash and Tourani, Satyajit and Goyal, Nishant and Krishna, Madhava and Reddy, N Dinesh and Khan, Muhammad Haris}, title = {Leveraging 2D Priors and SDF Guidance for Urban Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29051-29063} }
SVTRv2: CTC Beats Encoder-Decoder Models in Scene Text Recognition: Yongkun Du,

Zhineng Chen,

Hongtao Xie,

Caiyan Jia,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Yongkun and Chen, Zhineng and Xie, Hongtao and Jia, Caiyan and Jiang, Yu-Gang}, title = {SVTRv2: CTC Beats Encoder-Decoder Models in Scene Text Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20147-20156} }
Blind Noisy Image Deblurring Using Residual Guidance Strategy: Heyan Liu,

Jianing Sun,

Jun Liu,

Xi-Le Zhao,

Tingting Wu,

Tieyong Zeng; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Heyan and Sun, Jianing and Liu, Jun and Zhao, Xi-Le and Wu, Tingting and Zeng, Tieyong}, title = {Blind Noisy Image Deblurring Using Residual Guidance Strategy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11016-11025} }
DeGauss: Dynamic-Static Decomposition with Gaussian Splatting for Distractor-free 3D Reconstruction: Rui Wang,

Quentin Lohmeyer,

Mirko Meboldt,

Siyu Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Rui and Lohmeyer, Quentin and Meboldt, Mirko and Tang, Siyu}, title = {DeGauss: Dynamic-Static Decomposition with Gaussian Splatting for Distractor-free 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6294-6303} }
Time-Aware Auto White Balance in Mobile Photography: Mahmoud Afifi,

Luxi Zhao,

Abhijith Punnappurath,

Mohamed A. Abdelsalam,

Ran Zhang,

Michael S. Brown; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Afifi_2025_ICCV, author = {Afifi, Mahmoud and Zhao, Luxi and Punnappurath, Abhijith and Abdelsalam, Mohamed A. and Zhang, Ran and Brown, Michael S.}, title = {Time-Aware Auto White Balance in Mobile Photography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5038-5047} }
Unlocking the Potential of Diffusion Priors in Blind Face Restoration: Yunqi Miao,

Zhiyu Qu,

Mingqi Gao,

Changrui Chen,

Jifei Song,

Jungong Han,

Jiankang Deng; [pdf] [arXiv]
[bibtex]
@InProceedings{Miao_2025_ICCV, author = {Miao, Yunqi and Qu, Zhiyu and Gao, Mingqi and Chen, Changrui and Song, Jifei and Han, Jungong and Deng, Jiankang}, title = {Unlocking the Potential of Diffusion Priors in Blind Face Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13471-13480} }
InstaScene: Towards Complete 3D Instance Decomposition and Reconstruction from Cluttered Scenes: Zesong Yang,

Bangbang Yang,

Wenqi Dong,

Chenxuan Cao,

Liyuan Cui,

Yuewen Ma,

Zhaopeng Cui,

Hujun Bao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zesong and Yang, Bangbang and Dong, Wenqi and Cao, Chenxuan and Cui, Liyuan and Ma, Yuewen and Cui, Zhaopeng and Bao, Hujun}, title = {InstaScene: Towards Complete 3D Instance Decomposition and Reconstruction from Cluttered Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7771-7781} }
Improved Noise Schedule for Diffusion Training: Tiankai Hang,

Shuyang Gu,

Jianmin Bao,

Fangyun Wei,

Dong Chen,

Xin Geng,

Baining Guo; [pdf] [supp]
[bibtex]
@InProceedings{Hang_2025_ICCV, author = {Hang, Tiankai and Gu, Shuyang and Bao, Jianmin and Wei, Fangyun and Chen, Dong and Geng, Xin and Guo, Baining}, title = {Improved Noise Schedule for Diffusion Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4796-4806} }
ProbRes: Probabilistic Jump Diffusion for Open-World Egocentric Activity Recognition: Sanjoy Kundu,

Shanmukha Vellamcheti,

Sathyanarayanan N. Aakur; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kundu_2025_ICCV, author = {Kundu, Sanjoy and Vellamcheti, Shanmukha and Aakur, Sathyanarayanan N.}, title = {ProbRes: Probabilistic Jump Diffusion for Open-World Egocentric Activity Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14128-14140} }
Information Density Principle for MLLM Benchmarks: Chunyi Li,

Xiaozhe Li,

Zicheng Zhang,

Yuan Tian,

Ziheng Jia,

Xiaohong Liu,

Xiongkuo Min,

Jia Wang,

Haodong Duan,

Kai Chen,

Guangtao Zhai; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Chunyi and Li, Xiaozhe and Zhang, Zicheng and Tian, Yuan and Jia, Ziheng and Liu, Xiaohong and Min, Xiongkuo and Wang, Jia and Duan, Haodong and Chen, Kai and Zhai, Guangtao}, title = {Information Density Principle for MLLM Benchmarks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4167-4177} }
CNS-Bench: Benchmarking Image Classifier Robustness Under Continuous Nuisance Shifts: Olaf Dünkel,

Artur Jesslen,

Jiahao Xie,

Christian Theobalt,

Christian Rupprecht,

Adam Kortylewski; [pdf] [supp]
[bibtex]
@InProceedings{Dunkel_2025_ICCV, author = {D\"unkel, Olaf and Jesslen, Artur and Xie, Jiahao and Theobalt, Christian and Rupprecht, Christian and Kortylewski, Adam}, title = {CNS-Bench: Benchmarking Image Classifier Robustness Under Continuous Nuisance Shifts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19978-19988} }
Knowledge-Guided Part Segmentation: Xuejian Gou,

Fang Liu,

Licheng Jiao,

Shuo Li,

Lingling Li,

Hao Wang,

Xu Liu,

Puhua Chen,

Wenping Ma; [pdf] [supp]
[bibtex]
@InProceedings{Gou_2025_ICCV, author = {Gou, Xuejian and Liu, Fang and Jiao, Licheng and Li, Shuo and Li, Lingling and Wang, Hao and Liu, Xu and Chen, Puhua and Ma, Wenping}, title = {Knowledge-Guided Part Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5490-5500} }
CRAM: Large Scale Video Continual Learning with Bootstrapped Compression: Shivani Mall,

Joao F. Henriques; [pdf] [arXiv]
[bibtex]
@InProceedings{Mall_2025_ICCV, author = {Mall, Shivani and Henriques, Joao F.}, title = {CRAM: Large Scale Video Continual Learning with Bootstrapped Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15045-15055} }
DeSPITE: Exploring Contrastive Deep Skeleton-Pointcloud-IMU-Text Embeddings for Advanced Point Cloud Human Activity Understanding: Thomas Kreutz,

Max Mühlhäuser,

Alejandro Sanchez Guinea; [pdf] [supp]
[bibtex]
@InProceedings{Kreutz_2025_ICCV, author = {Kreutz, Thomas and M\"uhlh\"auser, Max and Guinea, Alejandro Sanchez}, title = {DeSPITE: Exploring Contrastive Deep Skeleton-Pointcloud-IMU-Text Embeddings for Advanced Point Cloud Human Activity Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14633-14643} }
Leveraging Panoptic Scene Graph for Evaluating Fine-Grained Text-to-Image Generation: Xueqing Deng,

Linjie Yang,

Qihang Yu,

Chenglin Yang,

Liang-Chieh Chen; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Xueqing and Yang, Linjie and Yu, Qihang and Yang, Chenglin and Chen, Liang-Chieh}, title = {Leveraging Panoptic Scene Graph for Evaluating Fine-Grained Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15107-15116} }
Noise2Score3D: Tweedie's Approach for Unsupervised Point Cloud Denoising: Xiangbin Wei,

Yuanfeng Wang,

Ao Xu,

Lingyu Zhu,

Dongyong Sun,

Keren Li,

Yang Li,

Qi Qin; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Xiangbin and Wang, Yuanfeng and Xu, Ao and Zhu, Lingyu and Sun, Dongyong and Li, Keren and Li, Yang and Qin, Qi}, title = {Noise2Score3D: Tweedie's Approach for Unsupervised Point Cloud Denoising}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25993-26003} }
I2-World: Intra-Inter Tokenization for Efficient Dynamic 4D Scene Forecasting: Zhimin Liao,

Ping Wei,

Ruijie Zhang,

Shuaijia Chen,

Haoxuan Wang,

Ziyang Ren; [pdf]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Zhimin and Wei, Ping and Zhang, Ruijie and Chen, Shuaijia and Wang, Haoxuan and Ren, Ziyang}, title = {I2-World: Intra-Inter Tokenization for Efficient Dynamic 4D Scene Forecasting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25810-25819} }
Meta-Unlearning on Diffusion Models: Preventing Relearning Unlearned Concepts: Hongcheng Gao,

Tianyu Pang,

Chao Du,

Taihang Hu,

Zhijie Deng,

Min Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Hongcheng and Pang, Tianyu and Du, Chao and Hu, Taihang and Deng, Zhijie and Lin, Min}, title = {Meta-Unlearning on Diffusion Models: Preventing Relearning Unlearned Concepts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2131-2141} }
Auto-Vocabulary Semantic Segmentation: Osman Ülger,

Maksymilian Kulicki,

Yuki Asano,

Martin R. Oswald; [pdf] [supp]
[bibtex]
@InProceedings{Ulger_2025_ICCV, author = {\"Ulger, Osman and Kulicki, Maksymilian and Asano, Yuki and Oswald, Martin R.}, title = {Auto-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24266-24275} }
SpinMeRound: Consistent Multi-View Identity Generation Using Diffusion Models: Stathis Galanakis,

Alexandros Lattas,

Stylianos Moschoglou,

Bernhard Kainz,

Stefanos Zafeiriou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Galanakis_2025_ICCV, author = {Galanakis, Stathis and Lattas, Alexandros and Moschoglou, Stylianos and Kainz, Bernhard and Zafeiriou, Stefanos}, title = {SpinMeRound: Consistent Multi-View Identity Generation Using Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14346-14356} }
MSA2: Multi-task Framework with Structure-aware and Style-adaptive Character Representation for Open-set Chinese Text Recognition: Yangfu Li,

Hongjian Zhan,

Qi Liu,

Li Sun,

Yu-Jie Xiong,

Yue Lu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yangfu and Zhan, Hongjian and Liu, Qi and Sun, Li and Xiong, Yu-Jie and Lu, Yue}, title = {MSA2: Multi-task Framework with Structure-aware and Style-adaptive Character Representation for Open-set Chinese Text Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23095-23104} }
IGL-Nav: Incremental 3D Gaussian Localization for Image-goal Navigation: Wenxuan Guo,

Xiuwei Xu,

Hang Yin,

Ziwei Wang,

Jianjiang Feng,

Jie Zhou,

Jiwen Lu; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Wenxuan and Xu, Xiuwei and Yin, Hang and Wang, Ziwei and Feng, Jianjiang and Zhou, Jie and Lu, Jiwen}, title = {IGL-Nav: Incremental 3D Gaussian Localization for Image-goal Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6808-6817} }
PCR-GS: COLMAP-Free 3D Gaussian Splatting via Pose Co-Regularizations: Yu Wei,

Jiahui Zhang,

Xiaoqin Zhang,

Ling Shao,

Shijian Lu; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Yu and Zhang, Jiahui and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {PCR-GS: COLMAP-Free 3D Gaussian Splatting via Pose Co-Regularizations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26499-26508} }
EA-KD: Entropy-based Adaptive Knowledge Distillation: Chi-Ping Su,

Ching-Hsun Tseng,

Bin Pu,

Lei Zhao,

Jiewen Yang,

Zhuangzhuang Chen,

Shin-Jye Lee; [pdf] [supp]
[bibtex]
@InProceedings{Su_2025_ICCV, author = {Su, Chi-Ping and Tseng, Ching-Hsun and Pu, Bin and Zhao, Lei and Yang, Jiewen and Chen, Zhuangzhuang and Lee, Shin-Jye}, title = {EA-KD: Entropy-based Adaptive Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {731-740} }
D3: Training-Free AI-Generated Video Detection Using Second-Order Features: Chende Zheng,

Ruiqi Suo,

Chenhao Lin,

Zhengyu Zhao,

Le Yang,

Shuai Liu,

Minghui Yang,

Cong Wang,

Chao Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Chende and Suo, Ruiqi and Lin, Chenhao and Zhao, Zhengyu and Yang, Le and Liu, Shuai and Yang, Minghui and Wang, Cong and Shen, Chao}, title = {D3: Training-Free AI-Generated Video Detection Using Second-Order Features}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12852-12862} }
Background Invariance Testing According to Semantic Proximity: Zukang Liao,

Min Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Zukang and Chen, Min}, title = {Background Invariance Testing According to Semantic Proximity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8056-8065} }
Resolving Token-Space Gradient Conflicts: Token Space Manipulation for Transformer-Based Multi-Task Learning: Wooseong Jeong,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_ICCV, author = {Jeong, Wooseong and Yoon, Kuk-Jin}, title = {Resolving Token-Space Gradient Conflicts: Token Space Manipulation for Transformer-Based Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2887-2897} }
ViT-Split: Unleashing the Power of Vision Foundation Models via Efficient Splitting Heads: Yifan Li,

Xin Li,

Tianqin Li,

Wenbin He,

Yu Kong,

Liu Ren; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yifan and Li, Xin and Li, Tianqin and He, Wenbin and Kong, Yu and Ren, Liu}, title = {ViT-Split: Unleashing the Power of Vision Foundation Models via Efficient Splitting Heads}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1979-1989} }
Fusion Meets Diverse Conditions: A High-diversity Benchmark and Baseline for UAV-based Multimodal Object Detection with Condition Cues: Chen Chen,

Kangcheng Bin,

Ting Hu,

Jiahao Qi,

Xingyue Liu,

Tianpeng Liu,

Zhen Liu,

Yongxiang Liu,

Ping Zhong; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Chen and Bin, Kangcheng and Hu, Ting and Qi, Jiahao and Liu, Xingyue and Liu, Tianpeng and Liu, Zhen and Liu, Yongxiang and Zhong, Ping}, title = {Fusion Meets Diverse Conditions: A High-diversity Benchmark and Baseline for UAV-based Multimodal Object Detection with Condition Cues}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27958-27967} }
Stealthy Backdoor Attack in Federated Learning via Adaptive Layer-wise Gradient Alignment: Qingqian Yang,

Peishen Yan,

Xiaoyu Wu,

Jiaru Zhang,

Tao Song,

Yang Hua,

Hao Wang,

Liangliang Wang,

Haibing Guan; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Qingqian and Yan, Peishen and Wu, Xiaoyu and Zhang, Jiaru and Song, Tao and Hua, Yang and Wang, Hao and Wang, Liangliang and Guan, Haibing}, title = {Stealthy Backdoor Attack in Federated Learning via Adaptive Layer-wise Gradient Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29163-29172} }
Hybrid Layout Control for Diffusion Transformer: Fewer Annotations, Superior Aesthetics: Keming Wu,

Junwen Chen,

Zhanhao Liang,

Yinuo Wang,

Ji Li,

Chao Zhang,

Bin Wang,

Yuhui Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Keming and Chen, Junwen and Liang, Zhanhao and Wang, Yinuo and Li, Ji and Zhang, Chao and Wang, Bin and Yuan, Yuhui}, title = {Hybrid Layout Control for Diffusion Transformer: Fewer Annotations, Superior Aesthetics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17930-17940} }
BUFFER-X: Towards Zero-Shot Point Cloud Registration in Diverse Scenes: Minkyun Seo,

Hyungtae Lim,

Kanghee Lee,

Luca Carlone,

Jaesik Park; [pdf] [supp]
[bibtex]
@InProceedings{Seo_2025_ICCV, author = {Seo, Minkyun and Lim, Hyungtae and Lee, Kanghee and Carlone, Luca and Park, Jaesik}, title = {BUFFER-X: Towards Zero-Shot Point Cloud Registration in Diverse Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3851-3862} }
VITAL: More Understandable Feature Visualization through Distribution Alignment and Relevant Information Flow: Ada Görgün,

Bernt Schiele,

Jonas Fischer; [pdf] [supp]
[bibtex]
@InProceedings{Gorgun_2025_ICCV, author = {G\"org\"un, Ada and Schiele, Bernt and Fischer, Jonas}, title = {VITAL: More Understandable Feature Visualization through Distribution Alignment and Relevant Information Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4403-4412} }
DisCoPatch: Taming Adversarially-driven Batch Statistics for Improved Out-of-Distribution Detection: Francisco Caetano,

Christiaan Viviers,

Luis A. Zavala-Mondragón,

Peter H.N. De With,

Fons van der Sommen; [pdf] [supp]
[bibtex]
@InProceedings{Caetano_2025_ICCV, author = {Caetano, Francisco and Viviers, Christiaan and Zavala-Mondrag\'on, Luis A. and De With, Peter H.N. and van der Sommen, Fons}, title = {DisCoPatch: Taming Adversarially-driven Batch Statistics for Improved Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2898-2908} }
EC-Flow: Enabling Versatile Robotic Manipulation from Action-Unlabeled Videos via Embodiment-Centric Flow: Yixiang Chen,

Peiyan Li,

Yan Huang,

Jiabing Yang,

Kehan Chen,

Liang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yixiang and Li, Peiyan and Huang, Yan and Yang, Jiabing and Chen, Kehan and Wang, Liang}, title = {EC-Flow: Enabling Versatile Robotic Manipulation from Action-Unlabeled Videos via Embodiment-Centric Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11958-11968} }
One Look is Enough: Seamless Patchwise Refinement for Zero-Shot Monocular Depth Estimation on High-Resolution Images: Byeongjun Kwon,

Munchurl Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2025_ICCV, author = {Kwon, Byeongjun and Kim, Munchurl}, title = {One Look is Enough: Seamless Patchwise Refinement for Zero-Shot Monocular Depth Estimation on High-Resolution Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8077-8087} }
Omegance: A Single Parameter for Various Granularities in Diffusion-Based Synthesis: Xinyu Hou,

Zongsheng Yue,

Xiaoming Li,

Chen Change Loy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2025_ICCV, author = {Hou, Xinyu and Yue, Zongsheng and Li, Xiaoming and Loy, Chen Change}, title = {Omegance: A Single Parameter for Various Granularities in Diffusion-Based Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19353-19362} }
Frequency-Semantic Enhanced Variational Autoencoder for Zero-Shot Skeleton-based Action Recognition: Wenhan Wu,

Zhishuai Guo,

Chen Chen,

Hongfei Xue,

Aidong Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Wenhan and Guo, Zhishuai and Chen, Chen and Xue, Hongfei and Lu, Aidong}, title = {Frequency-Semantic Enhanced Variational Autoencoder for Zero-Shot Skeleton-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11122-11131} }
CleanPose: Category-Level Object Pose Estimation via Causal Learning and Knowledge Distillation: Xiao Lin,

Yun Peng,

Liuyi Wang,

Xianyou Zhong,

Minghao Zhu,

Yi Feng,

Jingwei Yang,

Chengju Liu,

Qijun Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Xiao and Peng, Yun and Wang, Liuyi and Zhong, Xianyou and Zhu, Minghao and Feng, Yi and Yang, Jingwei and Liu, Chengju and Chen, Qijun}, title = {CleanPose: Category-Level Object Pose Estimation via Causal Learning and Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5990-6000} }
Lark: Low-Rank Updates After Knowledge Localization for Few-shot Class-Incremental Learning: Jinxin Shi,

Jiabao Zhao,

Yifan Yang,

Xingjiao Wu,

Jiawen Li,

Liang He; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Jinxin and Zhao, Jiabao and Yang, Yifan and Wu, Xingjiao and Li, Jiawen and He, Liang}, title = {Lark: Low-Rank Updates After Knowledge Localization for Few-shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3607-3617} }
Articulate3D: Holistic Understanding of 3D Scenes as Universal Scene Description: Anna-Maria Halacheva,

Yang Miao,

Jan-Nico Zaech,

Xi Wang,

Luc Van Gool,

Danda Pani Paudel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Halacheva_2025_ICCV, author = {Halacheva, Anna-Maria and Miao, Yang and Zaech, Jan-Nico and Wang, Xi and Van Gool, Luc and Paudel, Danda Pani}, title = {Articulate3D: Holistic Understanding of 3D Scenes as Universal Scene Description}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5633-5644} }
DiffPCI: Large Motion Point Cloud frame Interpolation with Diffusion Model: Tianyu Zhang,

Haobo Jiang,

Jian Yang,

Jin Xie; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Tianyu and Jiang, Haobo and Yang, Jian and Xie, Jin}, title = {DiffPCI: Large Motion Point Cloud frame Interpolation with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27348-27358} }
TryOn-Refiner: Conditional Rectified-flow-based TryOn Refiner for More Accurate Detail Reconstruction: Wen Qian; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2025_ICCV, author = {Qian, Wen}, title = {TryOn-Refiner: Conditional Rectified-flow-based TryOn Refiner for More Accurate Detail Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15669-15679} }
AM-Adapter: Appearance Matching Adapter for Exemplar-based Semantic Image Synthesis in-the-Wild: Siyoon Jin,

Jisu Nam,

Jiyoung Kim,

Dahyun Chung,

Yeong-Seok Kim,

Joonhyung Park,

Heonjeong Chu,

Seungryong Kim; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Siyoon and Nam, Jisu and Kim, Jiyoung and Chung, Dahyun and Kim, Yeong-Seok and Park, Joonhyung and Chu, Heonjeong and Kim, Seungryong}, title = {AM-Adapter: Appearance Matching Adapter for Exemplar-based Semantic Image Synthesis in-the-Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17077-17086} }
Auto-Controlled Image Perception in MLLMs via Visual Perception Tokens: Runpeng Yu,

Xinyin Ma,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Runpeng and Ma, Xinyin and Wang, Xinchao}, title = {Auto-Controlled Image Perception in MLLMs via Visual Perception Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21822-21831} }
Addressing Representation Collapse in Vector Quantized Models with One Linear Layer: Yongxin Zhu,

Bocheng Li,

Yifei Xin,

Zhihua Xia,

Linli Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yongxin and Li, Bocheng and Xin, Yifei and Xia, Zhihua and Xu, Linli}, title = {Addressing Representation Collapse in Vector Quantized Models with One Linear Layer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22968-22977} }
MosaicDiff: Training-free Structural Pruning for Diffusion Model Acceleration Reflecting Pretraining Dynamics: Bowei Guo,

Shengkun Tang,

Cong Zeng,

Zhiqiang Shen; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Bowei and Tang, Shengkun and Zeng, Cong and Shen, Zhiqiang}, title = {MosaicDiff: Training-free Structural Pruning for Diffusion Model Acceleration Reflecting Pretraining Dynamics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1655-1664} }
Not Only Vision: Evolve Visual Speech Recognition via Peripheral Information: Zhaoxin Yuan,

Shuang Yang,

Shiguang Shan,

Xilin Chen; [pdf] [supp]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Zhaoxin and Yang, Shuang and Shan, Shiguang and Chen, Xilin}, title = {Not Only Vision: Evolve Visual Speech Recognition via Peripheral Information}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3091-3100} }
Borrowing Eyes for the Blind Spot: Overcoming Data Scarcity in Malicious Video Detection via Cross-Domain Retrieval Augmentation: Rongpei Hong,

Jian Lang,

Ting Zhong,

Fan Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Hong_2025_ICCV, author = {Hong, Rongpei and Lang, Jian and Zhong, Ting and Zhou, Fan}, title = {Borrowing Eyes for the Blind Spot: Overcoming Data Scarcity in Malicious Video Detection via Cross-Domain Retrieval Augmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22728-22737} }
Trans-Adapter: A Plug-and-Play Framework for Transparent Image Inpainting: Yuekun Dai,

Haitian Li,

Shangchen Zhou,

Chen Change Loy; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2025_ICCV, author = {Dai, Yuekun and Li, Haitian and Zhou, Shangchen and Loy, Chen Change}, title = {Trans-Adapter: A Plug-and-Play Framework for Transparent Image Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15015-15024} }
MetaMorph: Multimodal Understanding and Generation via Instruction Tuning: Shengbang Tong,

David Fan,

Jiachen Li,

Yunyang Xiong,

Xinlei Chen,

Koustuv Sinha,

Michael Rabbat,

Yann LeCun,

Saining Xie,

Zhuang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Tong_2025_ICCV, author = {Tong, Shengbang and Fan, David and Li, Jiachen and Xiong, Yunyang and Chen, Xinlei and Sinha, Koustuv and Rabbat, Michael and LeCun, Yann and Xie, Saining and Liu, Zhuang}, title = {MetaMorph: Multimodal Understanding and Generation via Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17001-17012} }
CHARM3R: Towards Unseen Camera Height Robust Monocular 3D Detector: Abhinav Kumar,

Yuliang Guo,

Zhihao Zhang,

Xinyu Huang,

Liu Ren,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumar_2025_ICCV, author = {Kumar, Abhinav and Guo, Yuliang and Zhang, Zhihao and Huang, Xinyu and Ren, Liu and Liu, Xiaoming}, title = {CHARM3R: Towards Unseen Camera Height Robust Monocular 3D Detector}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8777-8788} }
SuperDec: 3D Scene Decomposition with Superquadrics Primitives: Elisabetta Fedele,

Boyang Sun,

Leonidas Guibas,

Marc Pollefeys,

Francis Engelmann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fedele_2025_ICCV, author = {Fedele, Elisabetta and Sun, Boyang and Guibas, Leonidas and Pollefeys, Marc and Engelmann, Francis}, title = {SuperDec: 3D Scene Decomposition with Superquadrics Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24625-24635} }
Sculpting Memory: Multi-Concept Forgetting in Diffusion Models via Dynamic Mask and Concept-Aware Optimization: Gen Li,

Yang Xiao,

Jie Ji,

Kaiyuan Deng,

Bo Hui,

Linke Guo,

Xiaolong Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Gen and Xiao, Yang and Ji, Jie and Deng, Kaiyuan and Hui, Bo and Guo, Linke and Ma, Xiaolong}, title = {Sculpting Memory: Multi-Concept Forgetting in Diffusion Models via Dynamic Mask and Concept-Aware Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19659-19668} }
LaRender: Training-Free Occlusion Control in Image Generation via Latent Rendering: Xiaohang Zhan,

Dingming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2025_ICCV, author = {Zhan, Xiaohang and Liu, Dingming}, title = {LaRender: Training-Free Occlusion Control in Image Generation via Latent Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19679-19688} }
TimeFormer: Capturing Temporal Relationships of Deformable 3D Gaussians for Robust Reconstruction: Dadong Jiang,

Zhi Hou,

Zhihui Ke,

Xianghui Yang,

Xiaobo Zhou,

Tie Qiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Dadong and Hou, Zhi and Ke, Zhihui and Yang, Xianghui and Zhou, Xiaobo and Qiu, Tie}, title = {TimeFormer: Capturing Temporal Relationships of Deformable 3D Gaussians for Robust Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8721-8732} }
LLM Thought Divergence and Convergence for Dialogue-Based Image Generation Control: Hui Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Hui}, title = {LLM Thought Divergence and Convergence for Dialogue-Based Image Generation Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18101-18110} }
LATINO-PRO: LAtent consisTency INverse sOlver with PRompt Optimization: Alessio Spagnoletti,

Jean Prost,

Andrés Almansa,

Nicolas Papadakis,

Marcelo Pereyra; [pdf] [supp]
[bibtex]
@InProceedings{Spagnoletti_2025_ICCV, author = {Spagnoletti, Alessio and Prost, Jean and Almansa, Andr\'es and Papadakis, Nicolas and Pereyra, Marcelo}, title = {LATINO-PRO: LAtent consisTency INverse sOlver with PRompt Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19597-19607} }
Lightweight and Fast Real-time Image Enhancement via Decomposition of the Spatial-aware Lookup Tables: Wontae Kim,

Keuntek Lee,

Nam Ik Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Wontae and Lee, Keuntek and Cho, Nam Ik}, title = {Lightweight and Fast Real-time Image Enhancement via Decomposition of the Spatial-aware Lookup Tables}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11895-11905} }
Geminio: Language-Guided Gradient Inversion Attacks in Federated Learning: Junjie Shan,

Ziqi Zhao,

Jialin Lu,

Rui Zhang,

Siu Ming Yiu,

Ka-Ho Chow; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shan_2025_ICCV, author = {Shan, Junjie and Zhao, Ziqi and Lu, Jialin and Zhang, Rui and Yiu, Siu Ming and Chow, Ka-Ho}, title = {Geminio: Language-Guided Gradient Inversion Attacks in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2718-2727} }
DONUT: A Decoder-Only Model for Trajectory Prediction: Markus Knoche,

Daan de Geus,

Bastian Leibe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Knoche_2025_ICCV, author = {Knoche, Markus and de Geus, Daan and Leibe, Bastian}, title = {DONUT: A Decoder-Only Model for Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28903-28912} }
Training-free Generation of Temporally Consistent Rewards from VLMs: Yinuo Zhao,

Jiale Yuan,

Zhiyuan Xu,

Xiaoshuai Hao,

Xinyi Zhang,

Kun Wu,

Zhengping Che,

Chi Harold Liu,

Jian Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Yinuo and Yuan, Jiale and Xu, Zhiyuan and Hao, Xiaoshuai and Zhang, Xinyi and Wu, Kun and Che, Zhengping and Liu, Chi Harold and Tang, Jian}, title = {Training-free Generation of Temporally Consistent Rewards from VLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8133-8143} }
Compression-Aware One-Step Diffusion Model for JPEG Artifact Removal: Jinpei Guo,

Zheng Chen,

Wenbo Li,

Yong Guo,

Yulun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Jinpei and Chen, Zheng and Li, Wenbo and Guo, Yong and Zhang, Yulun}, title = {Compression-Aware One-Step Diffusion Model for JPEG Artifact Removal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14930-14939} }
AnyPortal: Zero-Shot Consistent Video Background Replacement: Wenshuo Gao,

Xicheng Lan,

Shuai Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Wenshuo and Lan, Xicheng and Yang, Shuai}, title = {AnyPortal: Zero-Shot Consistent Video Background Replacement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18990-18999} }
Scene Coordinate Reconstruction Priors: Wenjing Bian,

Axel Barroso-Laguna,

Tommaso Cavallari,

Victor Adrian Prisacariu,

Eric Brachmann; [pdf] [supp]
[bibtex]
@InProceedings{Bian_2025_ICCV, author = {Bian, Wenjing and Barroso-Laguna, Axel and Cavallari, Tommaso and Prisacariu, Victor Adrian and Brachmann, Eric}, title = {Scene Coordinate Reconstruction Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25765-25776} }
Towards a Universal Image Degradation Model via Content-Degradation Disentanglement: Wenbo Yang,

Zhongling Wang,

Zhou Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Wenbo and Wang, Zhongling and Wang, Zhou}, title = {Towards a Universal Image Degradation Model via Content-Degradation Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12966-12975} }
Balanced Image Stylization with Style Matching Score: Yuxin Jiang,

Liming Jiang,

Shuai Yang,

Jia-Wei Liu,

Ivor W. Tsang,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Yuxin and Jiang, Liming and Yang, Shuai and Liu, Jia-Wei and Tsang, Ivor W. and Shou, Mike Zheng}, title = {Balanced Image Stylization with Style Matching Score}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17346-17355} }
MultiModal Action Conditioned Video Simulation: Yichen Li,

Antonio Torralba; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yichen and Torralba, Antonio}, title = {MultiModal Action Conditioned Video Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14173-14183} }
Breaking Rectangular Shackles: Cross-View Object Segmentation for Fine-Grained Object Geo-Localization: Qingwang Zhang,

Yingying Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Qingwang and Zhu, Yingying}, title = {Breaking Rectangular Shackles: Cross-View Object Segmentation for Fine-Grained Object Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8197-8206} }
Temporal Overlapping Prediction: A Self-supervised Pre-training Method for LiDAR Moving Object Segmentation: Ziliang Miao,

Runjian Chen,

Yixi Cai,

Buwei He,

Wenquan Zhao,

Wenqi Shao,

Bo Zhang,

Fu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miao_2025_ICCV, author = {Miao, Ziliang and Chen, Runjian and Cai, Yixi and He, Buwei and Zhao, Wenquan and Shao, Wenqi and Zhang, Bo and Zhang, Fu}, title = {Temporal Overlapping Prediction: A Self-supervised Pre-training Method for LiDAR Moving Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26653-26663} }
No More Sibling Rivalry: Debiasing Human-Object Interaction Detection: Bin Yang,

Yulin Zhang,

Hong-Yu Zhou,

Sibei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Bin and Zhang, Yulin and Zhou, Hong-Yu and Yang, Sibei}, title = {No More Sibling Rivalry: Debiasing Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22707-22717} }
Imbalance in Balance: Online Concept Balancing in Generation Models: Yukai Shi,

Jiarong Ou,

Rui Chen,

Haotian Yang,

Jiahao Wang,

Xin Tao,

Pengfei Wan,

Di Zhang,

Kun Gai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Yukai and Ou, Jiarong and Chen, Rui and Yang, Haotian and Wang, Jiahao and Tao, Xin and Wan, Pengfei and Zhang, Di and Gai, Kun}, title = {Imbalance in Balance: Online Concept Balancing in Generation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17432-17442} }
Unleashing High-Quality Image Generation in Diffusion Sampling Using Second-Order Levenberg-Marquardt-Langevin: Fangyikang Wang,

Hubery Yin,

Lei Qian,

Yinan Li,

Shaobin Zhuang,

Huminhao Zhu,

Yilin Zhang,

Yanlong Tang,

Chao Zhang,

Hanbin Zhao,

Hui Qian,

Chen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Fangyikang and Yin, Hubery and Qian, Lei and Li, Yinan and Zhuang, Shaobin and Zhu, Huminhao and Zhang, Yilin and Tang, Yanlong and Zhang, Chao and Zhao, Hanbin and Qian, Hui and Li, Chen}, title = {Unleashing High-Quality Image Generation in Diffusion Sampling Using Second-Order Levenberg-Marquardt-Langevin}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10453-10464} }
Temperature in Cosine-based Softmax Loss: Takumi Kobayashi; [pdf] [supp]
[bibtex]
@InProceedings{Kobayashi_2025_ICCV, author = {Kobayashi, Takumi}, title = {Temperature in Cosine-based Softmax Loss}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22199-22208} }
SD2Actor: Continuous State Decomposition via Diffusion Embeddings for Robotic Manipulation: Jiayi Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jiayi}, title = {SD2Actor: Continuous State Decomposition via Diffusion Embeddings for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13751-13760} }
Free-Form Motion Control: Controlling the 6D Poses of Camera and Objects in Video Generation: Xincheng Shuai,

Henghui Ding,

Zhenyuan Qin,

Hao Luo,

Xingjun Ma,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shuai_2025_ICCV, author = {Shuai, Xincheng and Ding, Henghui and Qin, Zhenyuan and Luo, Hao and Ma, Xingjun and Tao, Dacheng}, title = {Free-Form Motion Control: Controlling the 6D Poses of Camera and Objects in Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12449-12458} }
GaussianProperty: Integrating Physical Properties to 3D Gaussians with LMMs: Xinli Xu,

Wenhang Ge,

Dicong Qiu,

ZhiFei Chen,

Dongyu Yan,

Zhuoyun Liu,

Haoyu Zhao,

Hanfeng Zhao,

Shunsi Zhang,

Junwei Liang,

Ying-Cong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Xinli and Ge, Wenhang and Qiu, Dicong and Chen, ZhiFei and Yan, Dongyu and Liu, Zhuoyun and Zhao, Haoyu and Zhao, Hanfeng and Zhang, Shunsi and Liang, Junwei and Chen, Ying-Cong}, title = {GaussianProperty: Integrating Physical Properties to 3D Gaussians with LMMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7231-7240} }
Exploring Probabilistic Modeling Beyond Domain Generalization for Semantic Segmentation: I-Hsiang Chen,

Hua-En Chang,

Wei-Ting Chen,

Jenq-Neng Hwang,

Sy-Yen Kuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, I-Hsiang and Chang, Hua-En and Chen, Wei-Ting and Hwang, Jenq-Neng and Kuo, Sy-Yen}, title = {Exploring Probabilistic Modeling Beyond Domain Generalization for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21755-21765} }
Learning Robust Stereo Matching in the Wild with Selective Mixture-of-Experts: Yun Wang,

Longguang Wang,

Chenghao Zhang,

Yongjian Zhang,

Zhanjie Zhang,

Ao Ma,

Chenyou Fan,

Tin Lun Lam,

Junjie Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yun and Wang, Longguang and Zhang, Chenghao and Zhang, Yongjian and Zhang, Zhanjie and Ma, Ao and Fan, Chenyou and Lam, Tin Lun and Hu, Junjie}, title = {Learning Robust Stereo Matching in the Wild with Selective Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21276-21287} }
Token-Efficient VLM: High-Resolution Image Understanding via Dynamic Region Proposal: Yitong Jiang,

Jinwei Gu,

Tianfan Xue,

Ka Chun Cheung,

Pavlo Molchanov,

Hongxu Yin,

Sifei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Yitong and Gu, Jinwei and Xue, Tianfan and Cheung, Ka Chun and Molchanov, Pavlo and Yin, Hongxu and Liu, Sifei}, title = {Token-Efficient VLM: High-Resolution Image Understanding via Dynamic Region Proposal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24147-24158} }
Controlling Multimodal LLMs via Reward-guided Decoding: Oscar Mañas,

Pierluca D'Oro,

Koustuv Sinha,

Adriana Romero-Soriano,

Michal Drozdzal,

Aishwarya Agrawal; [pdf] [supp]
[bibtex]
@InProceedings{Manas_2025_ICCV, author = {Ma\~nas, Oscar and D'Oro, Pierluca and Sinha, Koustuv and Romero-Soriano, Adriana and Drozdzal, Michal and Agrawal, Aishwarya}, title = {Controlling Multimodal LLMs via Reward-guided Decoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1391-1401} }
Multi-Modal Few-Shot Temporal Action Segmentation: Zijia Lu,

Ehsan Elhamifar; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Zijia and Elhamifar, Ehsan}, title = {Multi-Modal Few-Shot Temporal Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14106-14116} }
What If: Understanding Motion Through Sparse Interactions: Stefan Andreas Baumann,

Nick Stracke,

Timy Phan,

Björn Ommer; [pdf] [supp]
[bibtex]
@InProceedings{Baumann_2025_ICCV, author = {Baumann, Stefan Andreas and Stracke, Nick and Phan, Timy and Ommer, Bj\"orn}, title = {What If: Understanding Motion Through Sparse Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10286-10296} }
Robust 3D Object Detection using Probabilistic Point Clouds from Single-Photon LiDARs: Bhavya Goyal,

Felipe Gutierrez-Barragan,

Wei Lin,

Andreas Velten,

Yin Li,

Mohit Gupta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Goyal_2025_ICCV, author = {Goyal, Bhavya and Gutierrez-Barragan, Felipe and Lin, Wei and Velten, Andreas and Li, Yin and Gupta, Mohit}, title = {Robust 3D Object Detection using Probabilistic Point Clouds from Single-Photon LiDARs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28417-28427} }
SAMPLE: Semantic Alignment through Temporal-Adaptive Multimodal Prompt Learning for Event-Based Open-Vocabulary Action Recognition: Jing Wang,

Rui Zhao,

Ruiqin Xiong,

Xingtao Wang,

Xiaopeng Fan,

Tiejun Huang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jing and Zhao, Rui and Xiong, Ruiqin and Wang, Xingtao and Fan, Xiaopeng and Huang, Tiejun}, title = {SAMPLE: Semantic Alignment through Temporal-Adaptive Multimodal Prompt Learning for Event-Based Open-Vocabulary Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14409-14419} }
Semantic Watermarking Reinvented: Enhancing Robustness and Generation Quality with Fourier Integrity: Sung Ju Lee,

Nam Ik Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Sung Ju and Cho, Nam Ik}, title = {Semantic Watermarking Reinvented: Enhancing Robustness and Generation Quality with Fourier Integrity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18759-18769} }
Auto-Regressively Generating Multi-View Consistent Images: JiaKui Hu,

Yuxiao Yang,

Jialun Liu,

Jinbo Wu,

Chen Zhao,

Yanye Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, JiaKui and Yang, Yuxiao and Liu, Jialun and Wu, Jinbo and Zhao, Chen and Lu, Yanye}, title = {Auto-Regressively Generating Multi-View Consistent Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2556-2566} }
TimeExpert: An Expert-Guided Video LLM for Video Temporal Grounding: Zuhao Yang,

Yingchen Yu,

Yunqing Zhao,

Shijian Lu,

Song Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zuhao and Yu, Yingchen and Zhao, Yunqing and Lu, Shijian and Bai, Song}, title = {TimeExpert: An Expert-Guided Video LLM for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24286-24296} }
AIM: Amending Inherent Interpretability via Self-Supervised Masking: Eyad Alshami,

Shashank Agnihotri,

Bernt Schiele,

Margret Keuper; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alshami_2025_ICCV, author = {Alshami, Eyad and Agnihotri, Shashank and Schiele, Bernt and Keuper, Margret}, title = {AIM: Amending Inherent Interpretability via Self-Supervised Masking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {993-1003} }
I Am Big, You Are Little; I Am Right, You Are Wrong: David A. Kelly,

Akchunya Chanchal,

Nathan Blake; [pdf] [arXiv]
[bibtex]
@InProceedings{Kelly_2025_ICCV, author = {Kelly, David A. and Chanchal, Akchunya and Blake, Nathan}, title = {I Am Big, You Are Little; I Am Right, You Are Wrong}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {817-826} }
Jigsaw++: Imagining Complete Shape Priors for Object Reassembly: Jiaxin Lu,

Gang Hua,

Qixing Huang; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Jiaxin and Hua, Gang and Huang, Qixing}, title = {Jigsaw++: Imagining Complete Shape Priors for Object Reassembly}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6704-6714} }
GS-Occ3D: Scaling Vision-only Occupancy Reconstruction with Gaussian Splatting: Baijun Ye,

Minghui Qin,

Saining Zhang,

Moonjun Gong,

Shaoting Zhu,

Hao Zhao,

Hang Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2025_ICCV, author = {Ye, Baijun and Qin, Minghui and Zhang, Saining and Gong, Moonjun and Zhu, Shaoting and Zhao, Hao and Zhao, Hang}, title = {GS-Occ3D: Scaling Vision-only Occupancy Reconstruction with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25925-25937} }
CoMatch: Dynamic Covisibility-Aware Transformer for Bilateral Subpixel-Level Semi-Dense Image Matching: Zizhuo Li,

Yifan Lu,

Linfeng Tang,

Shihua Zhang,

Jiayi Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zizhuo and Lu, Yifan and Tang, Linfeng and Zhang, Shihua and Ma, Jiayi}, title = {CoMatch: Dynamic Covisibility-Aware Transformer for Bilateral Subpixel-Level Semi-Dense Image Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18521-18530} }
A Unified Interpretation of Training-Time Out-of-Distribution Detection: Xu Cheng,

Xin Jiang,

Zechao Li; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Xu and Jiang, Xin and Li, Zechao}, title = {A Unified Interpretation of Training-Time Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2142-2151} }
Memory-Efficient Generative Models via Product Quantization: Jie Shao,

Hanxiao Zhang,

Hao Yu,

Jianxin Wu; [pdf] [supp]
[bibtex]
@InProceedings{Shao_2025_ICCV, author = {Shao, Jie and Zhang, Hanxiao and Yu, Hao and Wu, Jianxin}, title = {Memory-Efficient Generative Models via Product Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16871-16881} }
Unsupervised Visible-Infrared Person Re-identification under Unpaired Settings: Haoyu Yao,

Bin Yang,

Wenke Huang,

Bo Du,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Yao_2025_ICCV, author = {Yao, Haoyu and Yang, Bin and Huang, Wenke and Du, Bo and Ye, Mang}, title = {Unsupervised Visible-Infrared Person Re-identification under Unpaired Settings}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11916-11926} }
Soft Separation and Distillation: Toward Global Uniformity in Federated Unsupervised Learning: Hung-Chieh Fang,

Hsuan-Tien Lin,

Irwin King,

Yifei Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Hung-Chieh and Lin, Hsuan-Tien and King, Irwin and Zhang, Yifei}, title = {Soft Separation and Distillation: Toward Global Uniformity in Federated Unsupervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2971-2980} }
TaxaDiffusion: Progressively Trained Diffusion Model for Fine-Grained Species Generation: Amin Karimi Monsefi,

Mridul Khurana,

Rajiv Ramnath,

Anuj Karpatne,

Wei-Lun Chao,

Cheng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Monsefi_2025_ICCV, author = {Monsefi, Amin Karimi and Khurana, Mridul and Ramnath, Rajiv and Karpatne, Anuj and Chao, Wei-Lun and Zhang, Cheng}, title = {TaxaDiffusion: Progressively Trained Diffusion Model for Fine-Grained Species Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8579-8589} }
Multimodal LLMs as Customized Reward Models for Text-to-Image Generation: Shijie Zhou,

Ruiyi Zhang,

Huaisheng Zhu,

Branislav Kveton,

Yufan Zhou,

Jiuxiang Gu,

Jian Chen,

Changyou Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Shijie and Zhang, Ruiyi and Zhu, Huaisheng and Kveton, Branislav and Zhou, Yufan and Gu, Jiuxiang and Chen, Jian and Chen, Changyou}, title = {Multimodal LLMs as Customized Reward Models for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19638-19648} }
MEMFOF: High-Resolution Training for Memory-Efficient Multi-Frame Optical Flow Estimation: Vladislav Bargatin,

Egor Chistov,

Alexander Yakovenko,

Dmitriy Vatolin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bargatin_2025_ICCV, author = {Bargatin, Vladislav and Chistov, Egor and Yakovenko, Alexander and Vatolin, Dmitriy}, title = {MEMFOF: High-Resolution Training for Memory-Efficient Multi-Frame Optical Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8187-8196} }
Diving into the Fusion of Monocular Priors for Generalized Stereo Matching: Chengtang Yao,

Lidong Yu,

Zhidan Liu,

Jiaxi Zeng,

Yuwei Wu,

Yunde Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_ICCV, author = {Yao, Chengtang and Yu, Lidong and Liu, Zhidan and Zeng, Jiaxi and Wu, Yuwei and Jia, Yunde}, title = {Diving into the Fusion of Monocular Priors for Generalized Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14887-14897} }
RAGNet: Large-scale Reasoning-based Affordance Segmentation Benchmark towards General Grasping: Dongming Wu,

Yanping Fu,

Saike Huang,

Yingfei Liu,

Fan Jia,

Nian Liu,

Feng Dai,

Tiancai Wang,

Rao Muhammad Anwer,

Fahad Shahbaz Khan,

Jianbing Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Dongming and Fu, Yanping and Huang, Saike and Liu, Yingfei and Jia, Fan and Liu, Nian and Dai, Feng and Wang, Tiancai and Anwer, Rao Muhammad and Khan, Fahad Shahbaz and Shen, Jianbing}, title = {RAGNet: Large-scale Reasoning-based Affordance Segmentation Benchmark towards General Grasping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11980-11990} }
R-LiViT: A LiDAR-Visual-Thermal Dataset Enabling Vulnerable Road User Focused Roadside Perception: Jonas Mirlach,

Lei Wan,

Andreas Wiedholz,

Hannan Ejaz Keen,

Andreas Eich; [pdf]
[bibtex]
@InProceedings{Mirlach_2025_ICCV, author = {Mirlach, Jonas and Wan, Lei and Wiedholz, Andreas and Keen, Hannan Ejaz and Eich, Andreas}, title = {R-LiViT: A LiDAR-Visual-Thermal Dataset Enabling Vulnerable Road User Focused Roadside Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28375-28384} }
Spatially-Varying Autofocus: Yingsi Qin,

Aswin C. Sankaranarayanan,

Matthew O'Toole; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Yingsi and Sankaranarayanan, Aswin C. and O'Toole, Matthew}, title = {Spatially-Varying Autofocus}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24645-24654} }
ZeroStereo: Zero-shot Stereo Matching from Single Images: Xianqi Wang,

Hao Yang,

Gangwei Xu,

Junda Cheng,

Min Lin,

Yong Deng,

Jinliang Zang,

Yurui Chen,

Xin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xianqi and Yang, Hao and Xu, Gangwei and Cheng, Junda and Lin, Min and Deng, Yong and Zang, Jinliang and Chen, Yurui and Yang, Xin}, title = {ZeroStereo: Zero-shot Stereo Matching from Single Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28177-28187} }
A3GS: Arbitrary Artistic Style into Arbitrary 3D Gaussian Splatting: Zhiyuan Fang,

Rengan Xie,

Xuancheng Jin,

Qi Ye,

Wei Chen,

Wenting Zheng,

Rui Wang,

Yuchi Huo; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Zhiyuan and Xie, Rengan and Jin, Xuancheng and Ye, Qi and Chen, Wei and Zheng, Wenting and Wang, Rui and Huo, Yuchi}, title = {A3GS: Arbitrary Artistic Style into Arbitrary 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17751-17760} }
SEREP: Semantic Facial Expression Representation for Robust In-the-Wild Capture and Retargeting: Arthur Josi,

Luiz Gustavo Hafemann,

Abdallah Dib,

Emeline Got,

Rafael M. O. Cruz,

Marc-André Carbonneau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Josi_2025_ICCV, author = {Josi, Arthur and Hafemann, Luiz Gustavo and Dib, Abdallah and Got, Emeline and Cruz, Rafael M. O. and Carbonneau, Marc-Andr\'e}, title = {SEREP: Semantic Facial Expression Representation for Robust In-the-Wild Capture and Retargeting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14538-14548} }
Visual Modality Prompt for Adapting Vision-Language Object Detectors: Heitor R. Medeiros,

Atif Belal,

Srikanth Muralidharan,

Eric Granger,

Marco Pedersoli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Medeiros_2025_ICCV, author = {Medeiros, Heitor R. and Belal, Atif and Muralidharan, Srikanth and Granger, Eric and Pedersoli, Marco}, title = {Visual Modality Prompt for Adapting Vision-Language Object Detectors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2172-2182} }
The Silent Assistant: NoiseQuery as Implicit Guidance for Goal-Driven Image Generation: Ruoyu Wang,

Huayang Huang,

Ye Zhu,

Olga Russakovsky,

Yu Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ruoyu and Huang, Huayang and Zhu, Ye and Russakovsky, Olga and Wu, Yu}, title = {The Silent Assistant: NoiseQuery as Implicit Guidance for Goal-Driven Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17618-17628} }
AnyBimanual: Transferring Unimanual Policy for General Bimanual Manipulation: Guanxing Lu,

Tengbo Yu,

Haoyuan Deng,

Season Si Chen,

Yansong Tang,

Ziwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Guanxing and Yu, Tengbo and Deng, Haoyuan and Chen, Season Si and Tang, Yansong and Wang, Ziwei}, title = {AnyBimanual: Transferring Unimanual Policy for General Bimanual Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13662-13672} }
Training-Free Industrial Defect Generation with Diffusion Models: Ruyi Xu,

Yen-Tzu Chiu,

Tai-I Chen,

Oscar Chew,

Yung-Yu Chuang,

Wen-Huang Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Ruyi and Chiu, Yen-Tzu and Chen, Tai-I and Chew, Oscar and Chuang, Yung-Yu and Cheng, Wen-Huang}, title = {Training-Free Industrial Defect Generation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24214-24223} }
Hierarchical Event Memory for Accurate and Low-latency Online Video Temporal Grounding: Minghang Zheng,

Yuxin Peng,

Benyuan Sun,

Yi Yang,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Minghang and Peng, Yuxin and Sun, Benyuan and Yang, Yi and Liu, Yang}, title = {Hierarchical Event Memory for Accurate and Low-latency Online Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21589-21599} }
Spatial Preference Rewarding for MLLMs Spatial Understanding: Han Qiu,

Peng Gao,

Lewei Lu,

Xiaoqin Zhang,

Ling Shao,

Shijian Lu; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2025_ICCV, author = {Qiu, Han and Gao, Peng and Lu, Lewei and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {Spatial Preference Rewarding for MLLMs Spatial Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {720-730} }
Correspondence as Video: Test-Time Adaption on SAM2 for Reference Segmentation in the Wild: Haoran Wang,

Zekun Li,

Jian Zhang,

Lei Qi,

Yinghuan Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haoran and Li, Zekun and Zhang, Jian and Qi, Lei and Shi, Yinghuan}, title = {Correspondence as Video: Test-Time Adaption on SAM2 for Reference Segmentation in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8177-8186} }
Interaction-Merged Motion Planning: Effectively Leveraging Diverse Motion Datasets for Robust Planning: Giwon Lee,

Wooseong Jeong,

Daehee Park,

Jaewoo Jeong,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Giwon and Jeong, Wooseong and Park, Daehee and Jeong, Jaewoo and Yoon, Kuk-Jin}, title = {Interaction-Merged Motion Planning: Effectively Leveraging Diverse Motion Datasets for Robust Planning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28610-28621} }
FreeFlux: Understanding and Exploiting Layer-Specific Roles in RoPE-Based MMDiT for Versatile Image Editing: Tianyi Wei,

Yifan Zhou,

Dongdong Chen,

Xingang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Tianyi and Zhou, Yifan and Chen, Dongdong and Pan, Xingang}, title = {FreeFlux: Understanding and Exploiting Layer-Specific Roles in RoPE-Based MMDiT for Versatile Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16745-16754} }
WalkVLM: Aid Visually Impaired People Walking by Vision Language Model: Zhiqiang Yuan,

Ting Zhang,

Yeshuang Zhu,

Jiapei Zhang,

Ying Deng,

Zexi Jia,

Peixiang Luo,

Xiaoyue Duan,

Jie Zhou,

Jinchao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Zhiqiang and Zhang, Ting and Zhu, Yeshuang and Zhang, Jiapei and Deng, Ying and Jia, Zexi and Luo, Peixiang and Duan, Xiaoyue and Zhou, Jie and Zhang, Jinchao}, title = {WalkVLM: Aid Visually Impaired People Walking by Vision Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9845-9854} }
WildSeg3D: Segment Any 3D Objects in the Wild from 2D Images: Yansong Guo,

Jie Hu,

Yansong Qu,

Liujuan Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Yansong and Hu, Jie and Qu, Yansong and Cao, Liujuan}, title = {WildSeg3D: Segment Any 3D Objects in the Wild from 2D Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5166-5176} }
CAPTURE: Evaluating Spatial Reasoning in Vision Language Models via Occluded Object Counting: Atin Pothiraj,

Elias Stengel-Eskin,

Jaemin Cho,

Mohit Bansal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pothiraj_2025_ICCV, author = {Pothiraj, Atin and Stengel-Eskin, Elias and Cho, Jaemin and Bansal, Mohit}, title = {CAPTURE: Evaluating Spatial Reasoning in Vision Language Models via Occluded Object Counting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8001-8010} }
DeRIS: Decoupling Perception and Cognition for Enhanced Referring Image Segmentation through Loopback Synergy: Ming Dai,

Wenxuan Cheng,

Jiang-jiang Liu,

Sen Yang,

Wenxiao Cai,

Yanpeng Sun,

Wankou Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2025_ICCV, author = {Dai, Ming and Cheng, Wenxuan and Liu, Jiang-jiang and Yang, Sen and Cai, Wenxiao and Sun, Yanpeng and Yang, Wankou}, title = {DeRIS: Decoupling Perception and Cognition for Enhanced Referring Image Segmentation through Loopback Synergy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19936-19946} }
PersPose: 3D Human Pose Estimation with Perspective Encoding and Perspective Rotation: Xiaoyang Hao,

Han Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Hao_2025_ICCV, author = {Hao, Xiaoyang and Li, Han}, title = {PersPose: 3D Human Pose Estimation with Perspective Encoding and Perspective Rotation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8110-8119} }
Exploring Multimodal Diffusion Transformers for Enhanced Prompt-based Image Editing: Joonghyuk Shin,

Alchan Hwang,

Yujin Kim,

Daneul Kim,

Jaesik Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shin_2025_ICCV, author = {Shin, Joonghyuk and Hwang, Alchan and Kim, Yujin and Kim, Daneul and Park, Jaesik}, title = {Exploring Multimodal Diffusion Transformers for Enhanced Prompt-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19492-19502} }
IM-LUT: Interpolation Mixing Look-Up Tables for Image Super-Resolution: Sejin Park,

Sangmin Lee,

Kyong Hwan Jin,

Seung-Won Jung; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Sejin and Lee, Sangmin and Jin, Kyong Hwan and Jung, Seung-Won}, title = {IM-LUT: Interpolation Mixing Look-Up Tables for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14317-14325} }
TeEFusion: Blending Text Embeddings to Distill Classifier-Free Guidance: Minghao Fu,

Guo-Hua Wang,

Xiaohao Chen,

Qing-Guo Chen,

Zhao Xu,

Weihua Luo,

Kaifu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Minghao and Wang, Guo-Hua and Chen, Xiaohao and Chen, Qing-Guo and Xu, Zhao and Luo, Weihua and Zhang, Kaifu}, title = {TeEFusion: Blending Text Embeddings to Distill Classifier-Free Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16652-16661} }
Scaling Omni-modal Pretraining with Multimodal Context: Advancing Universal Representation Learning Across Modalities: Yiyuan Zhang,

Handong Li,

Jing Liu,

Xiangyu Yue; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yiyuan and Li, Handong and Liu, Jing and Yue, Xiangyu}, title = {Scaling Omni-modal Pretraining with Multimodal Context: Advancing Universal Representation Learning Across Modalities}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1336-1348} }
ZIM: Zero-Shot Image Matting for Anything: Beomyoung Kim,

Chanyong Shin,

Joonhyun Jeong,

Hyungsik Jung,

Se-Yun Lee,

Sewhan Chun,

Dong-Hyun Hwang,

Joonsang Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Beomyoung and Shin, Chanyong and Jeong, Joonhyun and Jung, Hyungsik and Lee, Se-Yun and Chun, Sewhan and Hwang, Dong-Hyun and Yu, Joonsang}, title = {ZIM: Zero-Shot Image Matting for Anything}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23828-23838} }
LayerD: Decomposing Raster Graphic Designs into Layers: Tomoyuki Suzuki,

Kang-Jun Liu,

Naoto Inoue,

Kota Yamaguchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Suzuki_2025_ICCV, author = {Suzuki, Tomoyuki and Liu, Kang-Jun and Inoue, Naoto and Yamaguchi, Kota}, title = {LayerD: Decomposing Raster Graphic Designs into Layers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17783-17792} }
ALOcc: Adaptive Lifting-Based 3D Semantic Occupancy and Cost Volume-Based Flow Predictions: Dubing Chen,

Jin Fang,

Wencheng Han,

Xinjing Cheng,

Junbo Yin,

Chengzhong Xu,

Fahad Shahbaz Khan,

Jianbing Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Dubing and Fang, Jin and Han, Wencheng and Cheng, Xinjing and Yin, Junbo and Xu, Chengzhong and Khan, Fahad Shahbaz and Shen, Jianbing}, title = {ALOcc: Adaptive Lifting-Based 3D Semantic Occupancy and Cost Volume-Based Flow Predictions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4156-4166} }
Heuristic-Induced Multimodal Risk Distribution Jailbreak Attack for Multimodal Large Language Models: Teng Ma,

Xiaojun Jia,

Ranjie Duan,

Xinfeng Li,

Yihao Huang,

Xiaoshuang Jia,

Zhixuan Chu,

Wenqi Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Teng and Jia, Xiaojun and Duan, Ranjie and Li, Xinfeng and Huang, Yihao and Jia, Xiaoshuang and Chu, Zhixuan and Ren, Wenqi}, title = {Heuristic-Induced Multimodal Risk Distribution Jailbreak Attack for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2686-2696} }
Skip-Vision: Efficient and Scalable Acceleration of Vision-Language Models via Adaptive Token Skipping: Weili Zeng,

Ziyuan Huang,

Kaixiang Ji,

Yichao Yan; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Weili and Huang, Ziyuan and Ji, Kaixiang and Yan, Yichao}, title = {Skip-Vision: Efficient and Scalable Acceleration of Vision-Language Models via Adaptive Token Skipping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21384-21397} }
Enrich and Detect: Video Temporal Grounding with Multimodal LLMs: Shraman Pramanick,

Effrosyni Mavroudi,

Yale Song,

Rama Chellappa,

Lorenzo Torresani,

Triantafyllos Afouras; [pdf] [supp]
[bibtex]
@InProceedings{Pramanick_2025_ICCV, author = {Pramanick, Shraman and Mavroudi, Effrosyni and Song, Yale and Chellappa, Rama and Torresani, Lorenzo and Afouras, Triantafyllos}, title = {Enrich and Detect: Video Temporal Grounding with Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24297-24308} }
Class Token as Proxy: Optimal Transport-assisted Proxy Learning for Weakly Supervised Semantic Segmentation: Jian Wang,

Tianhong Dai,

Bingfeng Zhang,

Siyue Yu,

Eng Gee Lim,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jian and Dai, Tianhong and Zhang, Bingfeng and Yu, Siyue and Lim, Eng Gee and Xiao, Jimin}, title = {Class Token as Proxy: Optimal Transport-assisted Proxy Learning for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21645-21654} }
DexVLG: Dexterous Vision-Language-Grasp Model at Scale: Jiawei He,

Danshi Li,

Xinqiang Yu,

Zekun Qi,

Wenyao Zhang,

Jiayi Chen,

Zhaoxiang Zhang,

Zhizheng Zhang,

Li Yi,

He Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Jiawei and Li, Danshi and Yu, Xinqiang and Qi, Zekun and Zhang, Wenyao and Chen, Jiayi and Zhang, Zhaoxiang and Zhang, Zhizheng and Yi, Li and Wang, He}, title = {DexVLG: Dexterous Vision-Language-Grasp Model at Scale}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14248-14258} }
FIND: Few-Shot Anomaly Inspection with Normal-Only Multi-Modal Data: Yiting Li,

Fayao Liu,

Jingyi Liao,

Sichao Tian,

Chuan-Sheng Foo,

Xulei Yang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yiting and Liu, Fayao and Liao, Jingyi and Tian, Sichao and Foo, Chuan-Sheng and Yang, Xulei}, title = {FIND: Few-Shot Anomaly Inspection with Normal-Only Multi-Modal Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23290-23299} }
MaskSAM: Auto-prompt SAM with Mask Classification for Volumetric Medical Image Segmentation: Bin Xie,

Hao Tang,

Bin Duan,

Dawen Cai,

Yan Yan,

Gady Agam; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Bin and Tang, Hao and Duan, Bin and Cai, Dawen and Yan, Yan and Agam, Gady}, title = {MaskSAM: Auto-prompt SAM with Mask Classification for Volumetric Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24423-24433} }
Boundary Probing for Input Privacy Protection When Using LMM Services: Xiaofei Hui,

Haoxuan Qu,

Ping Hu,

Hossein Rahmani,

Jun Liu; [pdf] [supp]
[bibtex]
@InProceedings{Hui_2025_ICCV, author = {Hui, Xiaofei and Qu, Haoxuan and Hu, Ping and Rahmani, Hossein and Liu, Jun}, title = {Boundary Probing for Input Privacy Protection When Using LMM Services}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {467-477} }
Knowledge Distillation with Refined Logits: Wujie Sun,

Defang Chen,

Siwei Lyu,

Genlang Chen,

Chun Chen,

Can Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Wujie and Chen, Defang and Lyu, Siwei and Chen, Genlang and Chen, Chun and Wang, Can}, title = {Knowledge Distillation with Refined Logits}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1110-1119} }
Tracking Tiny Drones against Clutter: Large-Scale Infrared Benchmark with Motion-Centric Adaptive Algorithm: Jiahao Zhang,

Zongli Jiang,

Jinli Zhang,

Yixin Wei,

Liang Li,

Yizheng Wang,

Gang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jiahao and Jiang, Zongli and Zhang, Jinli and Wei, Yixin and Li, Liang and Wang, Yizheng and Wang, Gang}, title = {Tracking Tiny Drones against Clutter: Large-Scale Infrared Benchmark with Motion-Centric Adaptive Algorithm}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7361-7371} }
AID: Adapting Image2Video Diffusion Models for Instruction-guided Video Prediction: Zhen Xing,

Qi Dai,

Zejia Weng,

Zuxuan Wu,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2025_ICCV, author = {Xing, Zhen and Dai, Qi and Weng, Zejia and Wu, Zuxuan and Jiang, Yu-Gang}, title = {AID: Adapting Image2Video Diffusion Models for Instruction-guided Video Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21243-21253} }
Enhancing Few-Shot Vision-Language Classification with Large Multimodal Model Features: Chancharik Mitra,

Brandon Huang,

Tianning Chai,

Zhiqiu Lin,

Assaf Arbelle,

Rogerio Feris,

Leonid Karlinsky,

Trevor Darrell,

Deva Ramanan,

Roei Herzig; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mitra_2025_ICCV, author = {Mitra, Chancharik and Huang, Brandon and Chai, Tianning and Lin, Zhiqiu and Arbelle, Assaf and Feris, Rogerio and Karlinsky, Leonid and Darrell, Trevor and Ramanan, Deva and Herzig, Roei}, title = {Enhancing Few-Shot Vision-Language Classification with Large Multimodal Model Features}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2760-2772} }
DeepShield: Fortifying Deepfake Video Detection with Local and Global Forgery Analysis: Yinqi Cai,

Jichang Li,

Zhaolun Li,

Weikai Chen,

Rushi Lan,

Xi Xie,

Xiaonan Luo,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2025_ICCV, author = {Cai, Yinqi and Li, Jichang and Li, Zhaolun and Chen, Weikai and Lan, Rushi and Xie, Xi and Luo, Xiaonan and Li, Guanbin}, title = {DeepShield: Fortifying Deepfake Video Detection with Local and Global Forgery Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12524-12534} }
Synthesizing Near-Boundary OOD Samples for Out-of-Distribution Detection: Jinglun Li,

Kaixun Jiang,

Zhaoyu Chen,

Bo Lin,

Yao Tang,

Weifeng Ge,

Wenqiang Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jinglun and Jiang, Kaixun and Chen, Zhaoyu and Lin, Bo and Tang, Yao and Ge, Weifeng and Zhang, Wenqiang}, title = {Synthesizing Near-Boundary OOD Samples for Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4496-4506} }
Splat-based 3D Scene Reconstruction with Extreme Motion-blur: Hyeonjoong Jang,

Dongyoung Choi,

Donggun Kim,

Woohyun Kang,

Min H. Kim; [pdf] [supp]
[bibtex]
@InProceedings{Jang_2025_ICCV, author = {Jang, Hyeonjoong and Choi, Dongyoung and Kim, Donggun and Kang, Woohyun and Kim, Min H.}, title = {Splat-based 3D Scene Reconstruction with Extreme Motion-blur}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26425-26434} }
Towards Robustness of Person Search against Corruptions: Woojung Son,

Yoonki Cho,

Guoyuan An,

Chanmi Lee,

Sung-Eui Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Son_2025_ICCV, author = {Son, Woojung and Cho, Yoonki and An, Guoyuan and Lee, Chanmi and Yoon, Sung-Eui}, title = {Towards Robustness of Person Search against Corruptions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23408-23418} }
INSTINCT: Instance-Level Interaction Architecture for Query-Based Collaborative Perception: Yunjiang Xu,

Lingzhi Li,

Jin Wang,

Yupeng Ouyang,

Benyuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Yunjiang and Li, Lingzhi and Wang, Jin and Ouyang, Yupeng and Yang, Benyuan}, title = {INSTINCT: Instance-Level Interaction Architecture for Query-Based Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25464-25473} }
DriveX: Omni Scene Modeling for Learning Generalizable World Knowledge in Autonomous Driving: Chen Shi,

Shaoshuai Shi,

Kehua Sheng,

Bo Zhang,

Li Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Chen and Shi, Shaoshuai and Sheng, Kehua and Zhang, Bo and Jiang, Li}, title = {DriveX: Omni Scene Modeling for Learning Generalizable World Knowledge in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28599-28609} }
MagicCity: Geometry-Aware 3D City Generation from Satellite Imagery with Multi-View Consistency: Xingbo Yao,

Xuanmin Wang,

Hao Wu,

Chengliang Ping,

Doudou Zhang,

Hui Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Yao_2025_ICCV, author = {Yao, Xingbo and Wang, Xuanmin and Wu, Hao and Ping, Chengliang and Zhang, Doudou and Xiong, Hui}, title = {MagicCity: Geometry-Aware 3D City Generation from Satellite Imagery with Multi-View Consistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25325-25334} }
EDFFDNet: Towards Accurate and Efficient Unsupervised Multi-Grid Image Registration: Haokai Zhu,

Bo Qu,

Si-Yuan Cao,

Runmin Zhang,

Shujie Chen,

Bailin Yang,

Hui-Liang Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Haokai and Qu, Bo and Cao, Si-Yuan and Zhang, Runmin and Chen, Shujie and Yang, Bailin and Shen, Hui-Liang}, title = {EDFFDNet: Towards Accurate and Efficient Unsupervised Multi-Grid Image Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5102-5111} }
RadGPT: Constructing 3D Image-Text Tumor Datasets: Pedro R.A.S. Bassi,

Mehmet Can Yavuz,

Ibrahim Ethem Hamamci,

Sezgin Er,

Xiaoxi Chen,

Wenxuan Li,

Bjoern Menze,

Sergio Decherchi,

Andrea Cavalli,

Kang Wang,

Yang Yang,

Alan Yuille,

Zongwei Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Bassi_2025_ICCV, author = {Bassi, Pedro R.A.S. and Yavuz, Mehmet Can and Hamamci, Ibrahim Ethem and Er, Sezgin and Chen, Xiaoxi and Li, Wenxuan and Menze, Bjoern and Decherchi, Sergio and Cavalli, Andrea and Wang, Kang and Yang, Yang and Yuille, Alan and Zhou, Zongwei}, title = {RadGPT: Constructing 3D Image-Text Tumor Datasets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23720-23730} }
Adaptive Prompt Learning via Gaussian Outlier Synthesis for Out-of-distribution Detection: Yongkang Zhang,

Dongyu She,

Zhong Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yongkang and She, Dongyu and Zhou, Zhong}, title = {Adaptive Prompt Learning via Gaussian Outlier Synthesis for Out-of-distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3235-3244} }
LangBridge: Interpreting Image as a Combination of Language Embeddings: Jiaqi Liao,

Yuwei Niu,

Fanqing Meng,

Hao Li,

Changyao Tian,

Yinuo Du,

Yuwen Xiong,

Dianqi Li,

Xizhou Zhu,

Li Yuan,

Jifeng Dai,

Yu Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Jiaqi and Niu, Yuwei and Meng, Fanqing and Li, Hao and Tian, Changyao and Du, Yinuo and Xiong, Yuwen and Li, Dianqi and Zhu, Xizhou and Yuan, Li and Dai, Jifeng and Cheng, Yu}, title = {LangBridge: Interpreting Image as a Combination of Language Embeddings}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23752-23762} }
Anchor Token Matching: Implicit Structure Locking for Training-free AR Image Editing: Taihang Hu,

Linxuan Li,

Kai Wang,

Yaxing Wang,

Jian Yang,

Ming-Ming Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Taihang and Li, Linxuan and Wang, Kai and Wang, Yaxing and Yang, Jian and Cheng, Ming-Ming}, title = {Anchor Token Matching: Implicit Structure Locking for Training-free AR Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18166-18176} }
ArgoTweak: Towards Self-Updating HD Maps through Structured Priors: Lena Wild,

Rafael Valencia,

Patric Jensfelt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wild_2025_ICCV, author = {Wild, Lena and Valencia, Rafael and Jensfelt, Patric}, title = {ArgoTweak: Towards Self-Updating HD Maps through Structured Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6091-6100} }
FedMVP: Federated Multimodal Visual Prompt Tuning for Vision-Language Models: Mainak Singha,

Subhankar Roy,

Sarthak Mehrotra,

Ankit Jha,

Moloud Abdar,

Biplab Banerjee,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singha_2025_ICCV, author = {Singha, Mainak and Roy, Subhankar and Mehrotra, Sarthak and Jha, Ankit and Abdar, Moloud and Banerjee, Biplab and Ricci, Elisa}, title = {FedMVP: Federated Multimodal Visual Prompt Tuning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17869-17878} }
Robust Dataset Condensation using Supervised Contrastive Learning: Nicole Hee-Yeon Kim,

Hwanjun Song; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Nicole Hee-Yeon and Song, Hwanjun}, title = {Robust Dataset Condensation using Supervised Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2857-2866} }
TIP-I2V: A Million-Scale Real Text and Image Prompt Dataset for Image-to-Video Generation: Wenhao Wang,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Wenhao and Yang, Yi}, title = {TIP-I2V: A Million-Scale Real Text and Image Prompt Dataset for Image-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14898-14908} }
From Reusing to Forecasting: Accelerating Diffusion Models with TaylorSeers: Jiacheng Liu,

Chang Zou,

Yuanhuiyi Lyu,

Junjie Chen,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Jiacheng and Zou, Chang and Lyu, Yuanhuiyi and Chen, Junjie and Zhang, Linfeng}, title = {From Reusing to Forecasting: Accelerating Diffusion Models with TaylorSeers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15853-15863} }
VisNumBench: Evaluating Number Sense of Multimodal Large Language Models: Tengjin Weng,

Jingyi Wang,

Wenhao Jiang,

Zhong Ming; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Weng_2025_ICCV, author = {Weng, Tengjin and Wang, Jingyi and Jiang, Wenhao and Ming, Zhong}, title = {VisNumBench: Evaluating Number Sense of Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3830-3840} }
Invisible Watermarks, Visible Gains: Steering Machine Unlearning with Bi-Level Watermarking Design: Yuhao Sun,

Yihua Zhang,

Gaowen Liu,

Hongtao Xie,

Sijia Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Yuhao and Zhang, Yihua and Liu, Gaowen and Xie, Hongtao and Liu, Sijia}, title = {Invisible Watermarks, Visible Gains: Steering Machine Unlearning with Bi-Level Watermarking Design}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2417-2428} }
FICGen: Frequency-Inspired Contextual Disentanglement for Layout-driven Degraded Image Generation: Wenzhuang Wang,

Yifan Zhao,

Mingcan Ma,

Ming Liu,

Zhonglin Jiang,

Yong Chen,

Jia Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Wenzhuang and Zhao, Yifan and Ma, Mingcan and Liu, Ming and Jiang, Zhonglin and Chen, Yong and Li, Jia}, title = {FICGen: Frequency-Inspired Contextual Disentanglement for Layout-driven Degraded Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19097-19107} }
Quantifying and Narrowing the Unknown: Interactive Text-to-Video Retrieval via Uncertainty Minimization: Bingqing Zhang,

Zhuo Cao,

Heming Du,

Yang Li,

Xue Li,

Jiajun Liu,

Sen Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Bingqing and Cao, Zhuo and Du, Heming and Li, Yang and Li, Xue and Liu, Jiajun and Wang, Sen}, title = {Quantifying and Narrowing the Unknown: Interactive Text-to-Video Retrieval via Uncertainty Minimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22120-22130} }
ShortV: Efficient Multimodal Large Language Models by Freezing Visual Tokens in Ineffective Layers: Qianhao Yuan,

Qingyu Zhang,

Yanjiang Liu,

Jiawei Chen,

Yaojie Lu,

Hongyu Lin,

Jia Zheng,

Xianpei Han,

Le Sun; [pdf] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Qianhao and Zhang, Qingyu and Liu, Yanjiang and Chen, Jiawei and Lu, Yaojie and Lin, Hongyu and Zheng, Jia and Han, Xianpei and Sun, Le}, title = {ShortV: Efficient Multimodal Large Language Models by Freezing Visual Tokens in Ineffective Layers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {329-339} }
SpikeDiff: Zero-shot High-Quality Video Reconstruction from Chromatic Spike Camera and Sub-millisecond Spike Streams: Siqi Yang,

Jinxiu Liang,

Zhaojun Huang,

Yeliduosi Xiaokaiti,

Yakun Chang,

Zhaofei Yu,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Siqi and Liang, Jinxiu and Huang, Zhaojun and Xiaokaiti, Yeliduosi and Chang, Yakun and Yu, Zhaofei and Shi, Boxin}, title = {SpikeDiff: Zero-shot High-Quality Video Reconstruction from Chromatic Spike Camera and Sub-millisecond Spike Streams}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7905-7914} }
Selective Contrastive Learning for Weakly Supervised Affordance Grounding: WonJun Moon,

Hyun Seok Seong,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2025_ICCV, author = {Moon, WonJun and Seong, Hyun Seok and Heo, Jae-Pil}, title = {Selective Contrastive Learning for Weakly Supervised Affordance Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5210-5220} }
GECKO: Gigapixel Vision-Concept Contrastive Pretraining in Histopathology: Saarthak Kapse,

Pushpak Pati,

Srikar Yellapragada,

Srijan Das,

Rajarsi R. Gupta,

Joel Saltz,

Dimitris Samaras,

Prateek Prasanna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kapse_2025_ICCV, author = {Kapse, Saarthak and Pati, Pushpak and Yellapragada, Srikar and Das, Srijan and Gupta, Rajarsi R. and Saltz, Joel and Samaras, Dimitris and Prasanna, Prateek}, title = {GECKO: Gigapixel Vision-Concept Contrastive Pretraining in Histopathology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20020-20030} }
Single-Scanline Relative Pose Estimation for Rolling Shutter Cameras: Petr Hruby,

Marc Pollefeys; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hruby_2025_ICCV, author = {Hruby, Petr and Pollefeys, Marc}, title = {Single-Scanline Relative Pose Estimation for Rolling Shutter Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7143-7153} }
Compression of 3D Gaussian Splatting with Optimized Feature Planes and Standard Video Codecs: Soonbin Lee,

Fangwen Shu,

Yago Sanchez,

Thomas Schierl,

Cornelius Hellge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Soonbin and Shu, Fangwen and Sanchez, Yago and Schierl, Thomas and Hellge, Cornelius}, title = {Compression of 3D Gaussian Splatting with Optimized Feature Planes and Standard Video Codecs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25496-25505} }
CULTURE3D: A Large-Scale and Diverse Dataset of Cultural Landmarks and Terrains for Gaussian-Based Scene Rendering: Xinyi Zheng,

Steve Zhang,

Weizhe Lin,

Aaron Zhang,

Walterio W. Mayol-Cuevas,

Yunze Liu,

Junxiao Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Xinyi and Zhang, Steve and Lin, Weizhe and Zhang, Aaron and Mayol-Cuevas, Walterio W. and Liu, Yunze and Shen, Junxiao}, title = {CULTURE3D: A Large-Scale and Diverse Dataset of Cultural Landmarks and Terrains for Gaussian-Based Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29064-29074} }
Real3D: Towards Scaling Large Reconstruction Models with Real Images: Hanwen Jiang,

Qixing Huang,

Georgios Pavlakos; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Hanwen and Huang, Qixing and Pavlakos, Georgios}, title = {Real3D: Towards Scaling Large Reconstruction Models with Real Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5821-5833} }
FA: Forced Prompt Learning of Vision-Language Models for Out-of-Distribution Detection: Xinhua Lu,

Runhe Lai,

Yanqi Wu,

Kanghao Chen,

Wei-Shi Zheng,

Ruixuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Xinhua and Lai, Runhe and Wu, Yanqi and Chen, Kanghao and Zheng, Wei-Shi and Wang, Ruixuan}, title = {FA: Forced Prompt Learning of Vision-Language Models for Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1152-1161} }
Geo4D: Leveraging Video Generators for Geometric 4D Scene Reconstruction: Zeren Jiang,

Chuanxia Zheng,

Iro Laina,

Diane Larlus,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Zeren and Zheng, Chuanxia and Laina, Iro and Larlus, Diane and Vedaldi, Andrea}, title = {Geo4D: Leveraging Video Generators for Geometric 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20658-20671} }
REPARO: Compositional 3D Assets Generation with Differentiable 3D Layout Alignment: Haonan Han,

Rui Yang,

Huan Liao,

Jiankai Xing,

Zunnan Xu,

Xiaoming Yu,

Junwei Zha,

Xiu Li,

Wanhua Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Haonan and Yang, Rui and Liao, Huan and Xing, Jiankai and Xu, Zunnan and Yu, Xiaoming and Zha, Junwei and Li, Xiu and Li, Wanhua}, title = {REPARO: Compositional 3D Assets Generation with Differentiable 3D Layout Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25367-25377} }
VACE: All-in-One Video Creation and Editing: Zeyinzi Jiang,

Zhen Han,

Chaojie Mao,

Jingfeng Zhang,

Yulin Pan,

Yu Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Zeyinzi and Han, Zhen and Mao, Chaojie and Zhang, Jingfeng and Pan, Yulin and Liu, Yu}, title = {VACE: All-in-One Video Creation and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17191-17202} }
SparseMM: Head Sparsity Emerges from Visual Concept Responses in MLLMs: Jiahui Wang,

Zuyan Liu,

Yongming Rao,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jiahui and Liu, Zuyan and Rao, Yongming and Lu, Jiwen}, title = {SparseMM: Head Sparsity Emerges from Visual Concept Responses in MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23177-23187} }
Focal Plane Visual Feature Generation and Matching on a Pixel Processor Array: Hongyi Zhang,

Laurie Bose,

Jianing Chen,

Piotr Dudek,

Walterio Mayol-Cuevas; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hongyi and Bose, Laurie and Chen, Jianing and Dudek, Piotr and Mayol-Cuevas, Walterio}, title = {Focal Plane Visual Feature Generation and Matching on a Pixel Processor Array}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29031-29039} }
Q-Frame: Query-aware Frame Selection and Multi-Resolution Adaptation for Video-LLMs: Shaojie Zhang,

Jiahui Yang,

Jianqin Yin,

Zhenbo Luo,

Jian Luan; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shaojie and Yang, Jiahui and Yin, Jianqin and Luo, Zhenbo and Luan, Jian}, title = {Q-Frame: Query-aware Frame Selection and Multi-Resolution Adaptation for Video-LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22056-22065} }
Prompt-driven Transferable Adversarial Attack on Person Re-Identification with Attribute-aware Textual Inversion: Yuan Bian,

Min Liu,

Yunqi Yi,

Xueping Wang,

Shuai Jiang,

Yaonan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bian_2025_ICCV, author = {Bian, Yuan and Liu, Min and Yi, Yunqi and Wang, Xueping and Jiang, Shuai and Wang, Yaonan}, title = {Prompt-driven Transferable Adversarial Attack on Person Re-Identification with Attribute-aware Textual Inversion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22599-22609} }
OV-SCAN: Semantically Consistent Alignment for Novel Object Discovery in Open-Vocabulary 3D Object Detection: Adrian Chow,

Evelien Riddell,

Yimu Wang,

Sean Sedwards,

Krzysztof Czarnecki; [pdf] [supp]
[bibtex]
@InProceedings{Chow_2025_ICCV, author = {Chow, Adrian and Riddell, Evelien and Wang, Yimu and Sedwards, Sean and Czarnecki, Krzysztof}, title = {OV-SCAN: Semantically Consistent Alignment for Novel Object Discovery in Open-Vocabulary 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7990-8000} }
ContextFace: Generating Facial Expressions from Emotional Contexts: Min-jung Kim,

Minsang Kim,

Seung Jun Baek; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Min-jung and Kim, Minsang and Baek, Seung Jun}, title = {ContextFace: Generating Facial Expressions from Emotional Contexts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11383-11392} }
MDP-Omni: Parameter-free Multimodal Depth Prior-based Sampling for Omnidirectional Stereo Matching: Eunjin Son,

HyungGi Jo,

Wookyong Kwon,

Sang Jun Lee; [pdf]
[bibtex]
@InProceedings{Son_2025_ICCV, author = {Son, Eunjin and Jo, HyungGi and Kwon, Wookyong and Lee, Sang Jun}, title = {MDP-Omni: Parameter-free Multimodal Depth Prior-based Sampling for Omnidirectional Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26178-26187} }
The Inter-Intra Modal Measure: A Predictive Lens on Fine-Tuning Outcomes in Vision-Language Models: Laura Niss,

Kevin Vogt-Lowell,

Theodoros Tsiligkaridis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Niss_2025_ICCV, author = {Niss, Laura and Vogt-Lowell, Kevin and Tsiligkaridis, Theodoros}, title = {The Inter-Intra Modal Measure: A Predictive Lens on Fine-Tuning Outcomes in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2396-2406} }
Error Recognition in Procedural Videos using Generalized Task Graph: Shih-Po Lee,

Ehsan Elhamifar; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Shih-Po and Elhamifar, Ehsan}, title = {Error Recognition in Procedural Videos using Generalized Task Graph}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10009-10021} }
Reangle-A-Video: 4D Video Generation as Video-to-Video Translation: Hyeonho Jeong,

Suhyeon Lee,

Jong Chul Ye; [pdf] [supp]
[bibtex]
@InProceedings{Jeong_2025_ICCV, author = {Jeong, Hyeonho and Lee, Suhyeon and Ye, Jong Chul}, title = {Reangle-A-Video: 4D Video Generation as Video-to-Video Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11164-11175} }
DualReal: Adaptive Joint Training for Lossless Identity-Motion Fusion in Video Customization: Wenchuan Wang,

Mengqi Huang,

Yijing Tu,

Zhendong Mao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Wenchuan and Huang, Mengqi and Tu, Yijing and Mao, Zhendong}, title = {DualReal: Adaptive Joint Training for Lossless Identity-Motion Fusion in Video Customization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16565-16575} }
RoboTron-Sim: Improving Real-World Driving via Simulated Hard-Case: Baihui Xiao,

Chengjian Feng,

Zhijian Huang,

Feng Yan,

Yujie Zhong,

Lin Ma; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2025_ICCV, author = {Xiao, Baihui and Feng, Chengjian and Huang, Zhijian and Yan, Feng and Zhong, Yujie and Ma, Lin}, title = {RoboTron-Sim: Improving Real-World Driving via Simulated Hard-Case}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27380-27389} }
Towards Scalable Spatial Intelligence via 2D-to-3D Data Lifting: Xingyu Miao,

Haoran Duan,

Quanhao Qian,

Jiuniu Wang,

Yang Long,

Ling Shao,

Deli Zhao,

Ran Xu,

Gongjie Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miao_2025_ICCV, author = {Miao, Xingyu and Duan, Haoran and Qian, Quanhao and Wang, Jiuniu and Long, Yang and Shao, Ling and Zhao, Deli and Xu, Ran and Zhang, Gongjie}, title = {Towards Scalable Spatial Intelligence via 2D-to-3D Data Lifting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {945-959} }
LDIP: Long Distance Information Propagation for Video Super-Resolution: Michael Bernasconi,

Abdelaziz Djelouah,

Yang Zhang,

Markus Gross,

Christopher Schroers; [pdf] [supp]
[bibtex]
@InProceedings{Bernasconi_2025_ICCV, author = {Bernasconi, Michael and Djelouah, Abdelaziz and Zhang, Yang and Gross, Markus and Schroers, Christopher}, title = {LDIP: Long Distance Information Propagation for Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11558-11567} }
Cross-Architecture Distillation Made Simple with Redundancy Suppression: Weijia Zhang,

Yuehao Liu,

Wu Ran,

Chao Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Weijia and Liu, Yuehao and Ran, Wu and Ma, Chao}, title = {Cross-Architecture Distillation Made Simple with Redundancy Suppression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23256-23266} }
M2EIT: Multi-Domain Mixture of Experts for Robust Neural Inertial Tracking: Yan Li,

Yang Xu,

Changhao Chen,

Zhongchen Shi,

Wei Chen,

Liang Xie,

Hongbo Chen,

Erwei Yin; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yan and Xu, Yang and Chen, Changhao and Shi, Zhongchen and Chen, Wei and Xie, Liang and Chen, Hongbo and Yin, Erwei}, title = {M2EIT: Multi-Domain Mixture of Experts for Robust Neural Inertial Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28207-28216} }
ShortFT: Diffusion Model Alignment via Shortcut-based Fine-Tuning: Xiefan Guo,

Miaomiao Cui,

Liefeng Bo,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Xiefan and Cui, Miaomiao and Bo, Liefeng and Huang, Di}, title = {ShortFT: Diffusion Model Alignment via Shortcut-based Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {678-687} }
Who Controls the Authorization? Invertible Networks for Copyright Protection in Text-to-Image Synthesis: Baoyue Hu,

Yang Wei,

Junhao Xiao,

Wendong Huang,

Xiuli Bi,

Bin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Baoyue and Wei, Yang and Xiao, Junhao and Huang, Wendong and Bi, Xiuli and Xiao, Bin}, title = {Who Controls the Authorization? Invertible Networks for Copyright Protection in Text-to-Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15832-15841} }
Identity-aware Language Gaussian Splatting for Open-vocabulary 3D Semantic Segmentation: SungMin Jang,

Wonjun Kim; [pdf] [supp]
[bibtex]
@InProceedings{Jang_2025_ICCV, author = {Jang, SungMin and Kim, Wonjun}, title = {Identity-aware Language Gaussian Splatting for Open-vocabulary 3D Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20467-20476} }
IDEATOR: Jailbreaking and Benchmarking Large Vision-Language Models Using Themselves: Ruofan Wang,

Juncheng Li,

Yixu Wang,

Bo Wang,

Xiaosen Wang,

Yan Teng,

Yingchun Wang,

Xingjun Ma,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ruofan and Li, Juncheng and Wang, Yixu and Wang, Bo and Wang, Xiaosen and Teng, Yan and Wang, Yingchun and Ma, Xingjun and Jiang, Yu-Gang}, title = {IDEATOR: Jailbreaking and Benchmarking Large Vision-Language Models Using Themselves}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8875-8884} }
Degradation-Modeled Multipath Diffusion for Tunable Metalens Photography: Jianing Zhang,

Jiayi Zhu,

Feiyu Ji,

Xiaokang Yang,

Xiaoyun Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jianing and Zhu, Jiayi and Ji, Feiyu and Yang, Xiaokang and Yuan, Xiaoyun}, title = {Degradation-Modeled Multipath Diffusion for Tunable Metalens Photography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25914-25924} }
Driving View Synthesis on Free-form Trajectories with Generative Prior: Zeyu Yang,

Zijie Pan,

Yuankun Yang,

Xiatian Zhu,

Li Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zeyu and Pan, Zijie and Yang, Yuankun and Zhu, Xiatian and Zhang, Li}, title = {Driving View Synthesis on Free-form Trajectories with Generative Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28083-28092} }
AnimeGamer: Infinite Anime Life Simulation with Next Game State Prediction: Junhao Cheng,

Yuying Ge,

Yixiao Ge,

Jing Liao,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Junhao and Ge, Yuying and Ge, Yixiao and Liao, Jing and Shan, Ying}, title = {AnimeGamer: Infinite Anime Life Simulation with Next Game State Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10875-10885} }
OmniPaint: Mastering Object-Oriented Editing via Disentangled Insertion-Removal Inpainting: Yongsheng Yu,

Ziyun Zeng,

Haitian Zheng,

Jiebo Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Yongsheng and Zeng, Ziyun and Zheng, Haitian and Luo, Jiebo}, title = {OmniPaint: Mastering Object-Oriented Editing via Disentangled Insertion-Removal Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17324-17334} }
UIPro: Unleashing Superior Interaction Capability For GUI Agents: Hongxin Li,

Jingran Su,

Jingfan Chen,

Zheng Ju,

Yuntao Chen,

Qing Li,

Zhaoxiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Hongxin and Su, Jingran and Chen, Jingfan and Ju, Zheng and Chen, Yuntao and Li, Qing and Zhang, Zhaoxiang}, title = {UIPro: Unleashing Superior Interaction Capability For GUI Agents}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1613-1623} }
Class-Wise Federated Averaging for Efficient Personalization: Gyuejeong Lee,

Daeyoung Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Gyuejeong and Choi, Daeyoung}, title = {Class-Wise Federated Averaging for Efficient Personalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1773-1782} }
StochasticSplats: Stochastic Rasterization for Sorting-Free 3D Gaussian Splatting: Shakiba Kheradmand,

Delio Vicini,

George Kopanas,

Dmitry Lagun,

Kwang Moo Yi,

Mark Matthews,

Andrea Tagliasacchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kheradmand_2025_ICCV, author = {Kheradmand, Shakiba and Vicini, Delio and Kopanas, George and Lagun, Dmitry and Yi, Kwang Moo and Matthews, Mark and Tagliasacchi, Andrea}, title = {StochasticSplats: Stochastic Rasterization for Sorting-Free 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26326-26335} }
A0: An Affordance-Aware Hierarchical Model for General Robotic Manipulation: Rongtao Xu,

Jian Zhang,

Minghao Guo,

Youpeng Wen,

Haoting Yang,

Min Lin,

Jianzheng Huang,

Zhe Li,

Kaidong Zhang,

Liqiong Wang,

Yuxuan Kuang,

Meng Cao,

Feng Zheng,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Rongtao and Zhang, Jian and Guo, Minghao and Wen, Youpeng and Yang, Haoting and Lin, Min and Huang, Jianzheng and Li, Zhe and Zhang, Kaidong and Wang, Liqiong and Kuang, Yuxuan and Cao, Meng and Zheng, Feng and Liang, Xiaodan}, title = {A0: An Affordance-Aware Hierarchical Model for General Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13491-13501} }
FinMMR: Make Financial Numerical Reasoning More Multimodal, Comprehensive, and Challenging: Zichen Tang,

Haihong E,

Jiacheng Liu,

Zhongjun Yang,

Rongjin Li,

Zihua Rong,

Haoyang He,

Zhuodi Hao,

Xinyang Hu,

Kun Ji,

Ziyan Ma,

Mengyuan Ji,

Jun Zhang,

Chenghao Ma,

Qianhe Zheng,

Yang Liu,

Yiling Huang,

Xinyi Hu,

Qing Huang,

Zijian Xie,

Shiyao Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_ICCV, author = {Tang, Zichen and E, Haihong and Liu, Jiacheng and Yang, Zhongjun and Li, Rongjin and Rong, Zihua and He, Haoyang and Hao, Zhuodi and Hu, Xinyang and Ji, Kun and Ma, Ziyan and Ji, Mengyuan and Zhang, Jun and Ma, Chenghao and Zheng, Qianhe and Liu, Yang and Huang, Yiling and Hu, Xinyi and Huang, Qing and Xie, Zijian and Peng, Shiyao}, title = {FinMMR: Make Financial Numerical Reasoning More Multimodal, Comprehensive, and Challenging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3245-3257} }
A Tiny Change, A Giant Leap: Long-Tailed Class-Incremental Learning via Geometric Prototype Alignment: Xinyi Lai,

Luojun Lin,

Weijie Chen,

Yuanlong Yu; [pdf] [supp]
[bibtex]
@InProceedings{Lai_2025_ICCV, author = {Lai, Xinyi and Lin, Luojun and Chen, Weijie and Yu, Yuanlong}, title = {A Tiny Change, A Giant Leap: Long-Tailed Class-Incremental Learning via Geometric Prototype Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1444-1453} }
X-Prompt: Generalizable Auto-Regressive Visual Learning with In-Context Prompting: Zeyi Sun,

Ziyang Chu,

Pan Zhang,

Tong Wu,

Yuhang Zang,

Xiaoyi Dong,

Yuanjun Xiong,

Dahua Lin,

Jiaqi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Zeyi and Chu, Ziyang and Zhang, Pan and Wu, Tong and Zang, Yuhang and Dong, Xiaoyi and Xiong, Yuanjun and Lin, Dahua and Wang, Jiaqi}, title = {X-Prompt: Generalizable Auto-Regressive Visual Learning with In-Context Prompting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17268-17280} }
HORT: Monocular Hand-held Objects Reconstruction with Transformers: Zerui Chen,

Rolandos Alexandros Potamias,

Shizhe Chen,

Cordelia Schmid; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Zerui and Potamias, Rolandos Alexandros and Chen, Shizhe and Schmid, Cordelia}, title = {HORT: Monocular Hand-held Objects Reconstruction with Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6046-6057} }
Early Timestep Zero-Shot Candidate Selection for Instruction-Guided Image Editing: Joowon Kim,

Ziseok Lee,

Donghyeon Cho,

Sanghyun Jo,

Yeonsung Jung,

Kyungsu Kim,

Eunho Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Joowon and Lee, Ziseok and Cho, Donghyeon and Jo, Sanghyun and Jung, Yeonsung and Kim, Kyungsu and Yang, Eunho}, title = {Early Timestep Zero-Shot Candidate Selection for Instruction-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18844-18854} }
Identity Preserving 3D Head Stylization with Multiview Score Distillation: Bahri Batuhan Bilecen,

Ahmet Berke Gökmen,

Furkan Guzelant,

Aysegul Dundar; [pdf] [supp]
[bibtex]
@InProceedings{Bilecen_2025_ICCV, author = {Bilecen, Bahri Batuhan and G\"okmen, Ahmet Berke and Guzelant, Furkan and Dundar, Aysegul}, title = {Identity Preserving 3D Head Stylization with Multiview Score Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12169-12179} }
Aligning Information Capacity Between Vision and Language via Dense-to-Sparse Feature Distillation for Image-Text Matching: Yang Liu,

Wentao Feng,

Zhuoyao Liu,

Shudong Huang,

Jiancheng Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yang and Feng, Wentao and Liu, Zhuoyao and Huang, Shudong and Lv, Jiancheng}, title = {Aligning Information Capacity Between Vision and Language via Dense-to-Sparse Feature Distillation for Image-Text Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21679-21688} }
A Lesson in Splats: Teacher-Guided Diffusion for 3D Gaussian Splats Generation with 2D Supervision: Chensheng Peng,

Ido Sobol,

Masayoshi Tomizuka,

Kurt Keutzer,

Chenfeng Xu,

Or Litany; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_ICCV, author = {Peng, Chensheng and Sobol, Ido and Tomizuka, Masayoshi and Keutzer, Kurt and Xu, Chenfeng and Litany, Or}, title = {A Lesson in Splats: Teacher-Guided Diffusion for 3D Gaussian Splats Generation with 2D Supervision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28707-28717} }
Boosting Vision Semantic Density with Anatomy Normality Modeling for Medical Vision-language Pre-training: Weiwei Cao,

Jianpeng Zhang,

Zhongyi Shui,

Sinuo Wang,

Zeli Chen,

Xi Li,

Le Lu,

Xianghua Ye,

Qi Zhang,

Tingbo Liang,

Ling Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Weiwei and Zhang, Jianpeng and Shui, Zhongyi and Wang, Sinuo and Chen, Zeli and Li, Xi and Lu, Le and Ye, Xianghua and Zhang, Qi and Liang, Tingbo and Zhang, Ling}, title = {Boosting Vision Semantic Density with Anatomy Normality Modeling for Medical Vision-language Pre-training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23041-23050} }
GAP: Gaussianize Any Point Clouds with Text Guidance: Weiqi Zhang,

Junsheng Zhou,

Haotian Geng,

Wenyuan Zhang,

Yu-Shen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Weiqi and Zhou, Junsheng and Geng, Haotian and Zhang, Wenyuan and Liu, Yu-Shen}, title = {GAP: Gaussianize Any Point Clouds with Text Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25627-25638} }
Flow4Agent: Long-form Video Understanding via Motion Prior from Optical Flow: Ruyang Liu,

Shangkun Sun,

Haoran Tang,

Wei Gao,

Ge Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Ruyang and Sun, Shangkun and Tang, Haoran and Gao, Wei and Li, Ge}, title = {Flow4Agent: Long-form Video Understanding via Motion Prior from Optical Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23817-23827} }
Moto: Latent Motion Token as the Bridging Language for Learning Robot Manipulation from Videos: Yi Chen,

Yuying Ge,

Weiliang Tang,

Yizhuo Li,

Yixiao Ge,

Mingyu Ding,

Ying Shan,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yi and Ge, Yuying and Tang, Weiliang and Li, Yizhuo and Ge, Yixiao and Ding, Mingyu and Shan, Ying and Liu, Xihui}, title = {Moto: Latent Motion Token as the Bridging Language for Learning Robot Manipulation from Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19752-19763} }
Causal-Entity Reflected Egocentric Traffic Accident Video Synthesis: Lei-Lei Li,

Jianwu Fang,

Junbin Xiao,

Shanmin Pang,

Hongkai Yu,

Chen Lv,

Jianru Xue,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Lei-Lei and Fang, Jianwu and Xiao, Junbin and Pang, Shanmin and Yu, Hongkai and Lv, Chen and Xue, Jianru and Chua, Tat-Seng}, title = {Causal-Entity Reflected Egocentric Traffic Accident Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11208-11218} }
BezierGS: Dynamic Urban Scene Reconstruction with Bezier Curve Gaussian Splatting: Zipei Ma,

Junzhe Jiang,

Yurui Chen,

Li Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Zipei and Jiang, Junzhe and Chen, Yurui and Zhang, Li}, title = {BezierGS: Dynamic Urban Scene Reconstruction with Bezier Curve Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25519-25528} }
RealCam-I2V: Real-World Image-to-Video Generation with Interactive Complex Camera Control: Teng Li,

Guangcong Zheng,

Rui Jiang,

Shuigen Zhan,

Tao Wu,

Yehao Lu,

Yining Lin,

Chuanyun Deng,

Yepan Xiong,

Min Chen,

Lin Cheng,

Xi Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Teng and Zheng, Guangcong and Jiang, Rui and Zhan, Shuigen and Wu, Tao and Lu, Yehao and Lin, Yining and Deng, Chuanyun and Xiong, Yepan and Chen, Min and Cheng, Lin and Li, Xi}, title = {RealCam-I2V: Real-World Image-to-Video Generation with Interactive Complex Camera Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28785-28796} }
Breaking the Encoder Barrier for Seamless Video-Language Understanding: Handong Li,

Yiyuan Zhang,

Longteng Guo,

Xiangyu Yue,

Jing Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Handong and Zhang, Yiyuan and Guo, Longteng and Yue, Xiangyu and Liu, Jing}, title = {Breaking the Encoder Barrier for Seamless Video-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23167-23176} }
Simulating Dual-Pixel Images From Ray Tracing For Depth Estimation: Fengchen He,

Dayang Zhao,

Hao Xu,

Tingwei Quan,

Shaoqun Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Fengchen and Zhao, Dayang and Xu, Hao and Quan, Tingwei and Zeng, Shaoqun}, title = {Simulating Dual-Pixel Images From Ray Tracing For Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26106-26115} }
How Would It Sound? Material-Controlled Multimodal Acoustic Profile Generation for Indoor Scenes: Mahnoor Fatima Saad,

Ziad Al-Halah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saad_2025_ICCV, author = {Saad, Mahnoor Fatima and Al-Halah, Ziad}, title = {How Would It Sound? Material-Controlled Multimodal Acoustic Profile Generation for Indoor Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12232-12241} }
SplatTalk: 3D VQA with Gaussian Splatting: Anh Thai,

Songyou Peng,

Kyle Genova,

Leonidas Guibas,

Thomas Funkhouser; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thai_2025_ICCV, author = {Thai, Anh and Peng, Songyou and Genova, Kyle and Guibas, Leonidas and Funkhouser, Thomas}, title = {SplatTalk: 3D VQA with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4712-4721} }
LocalDyGS: Multi-view Global Dynamic Scene Modeling via Adaptive Local Implicit Feature Decoupling: Jiahao Wu,

Rui Peng,

Jianbo Jiao,

Jiayu Yang,

Luyang Tang,

Kaiqiang Xiong,

Jie Liang,

Jinbo Yan,

Runling Liu,

Ronggang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Jiahao and Peng, Rui and Jiao, Jianbo and Yang, Jiayu and Tang, Luyang and Xiong, Kaiqiang and Liang, Jie and Yan, Jinbo and Liu, Runling and Wang, Ronggang}, title = {LocalDyGS: Multi-view Global Dynamic Scene Modeling via Adaptive Local Implicit Feature Decoupling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9519-9529} }
Teeth Reconstruction and Performance Capture Using a Phone Camera: Weixi Zheng,

Jingwang Ling,

Zhibo Wang,

Quan Wang,

Feng Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Weixi and Ling, Jingwang and Wang, Zhibo and Wang, Quan and Xu, Feng}, title = {Teeth Reconstruction and Performance Capture Using a Phone Camera}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9998-10008} }
Knowledge Distillation for Learned Image Compression: Yunuo Chen,

Zezheng Lyu,

Bing He,

Ning Cao,

Gang Chen,

Guo Lu,

Wenjun Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yunuo and Lyu, Zezheng and He, Bing and Cao, Ning and Chen, Gang and Lu, Guo and Zhang, Wenjun}, title = {Knowledge Distillation for Learned Image Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4996-5006} }
Training-Free Class Purification for Open-Vocabulary Semantic Segmentation: Qi Chen,

Lingxiao Yang,

Yun Chen,

Nailong Zhao,

Jianhuang Lai,

Jie Shao,

Xiaohua Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Qi and Yang, Lingxiao and Chen, Yun and Zhao, Nailong and Lai, Jianhuang and Shao, Jie and Xie, Xiaohua}, title = {Training-Free Class Purification for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23124-23134} }
Unsupervised Identification of Protein Compositions and Conformations via Implicit Content-Transformation Disentanglement: Mostofa Rafid Uddin,

Jana Armouti,

Min Xu; [pdf] [supp]
[bibtex]
@InProceedings{Uddin_2025_ICCV, author = {Uddin, Mostofa Rafid and Armouti, Jana and Xu, Min}, title = {Unsupervised Identification of Protein Compositions and Conformations via Implicit Content-Transformation Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7483-7493} }
VIGFace: Virtual Identity Generation for Privacy-Free Face Recognition Dataset: Minsoo Kim,

Min-Cheol Sagong,

Gi Pyo Nam,

Junghyun Cho,

Ig-Jae Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Minsoo and Sagong, Min-Cheol and Nam, Gi Pyo and Cho, Junghyun and Kim, Ig-Jae}, title = {VIGFace: Virtual Identity Generation for Privacy-Free Face Recognition Dataset}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10043-10053} }
Generalizable Object Re-Identification via Visual In-Context Prompting: Zhizhong Huang,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Zhizhong and Liu, Xiaoming}, title = {Generalizable Object Re-Identification via Visual In-Context Prompting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22539-22550} }
GenHaze: Pioneering Controllable One-Step Realistic Haze Generation for Real-World Dehazing: Sixiang Chen,

Tian Ye,

Yunlong Lin,

Yeying Jin,

Yijun Yang,

Haoyu Chen,

Jianyu Lai,

Song Fei,

Zhaohu Xing,

Fugee Tsung,

Lei Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Sixiang and Ye, Tian and Lin, Yunlong and Jin, Yeying and Yang, Yijun and Chen, Haoyu and Lai, Jianyu and Fei, Song and Xing, Zhaohu and Tsung, Fugee and Zhu, Lei}, title = {GenHaze: Pioneering Controllable One-Step Realistic Haze Generation for Real-World Dehazing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9194-9205} }
LLaVA-CoT: Let Vision Language Models Reason Step-by-Step: Guowei Xu,

Peng Jin,

Ziang Wu,

Hao Li,

Yibing Song,

Lichao Sun,

Li Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Guowei and Jin, Peng and Wu, Ziang and Li, Hao and Song, Yibing and Sun, Lichao and Yuan, Li}, title = {LLaVA-CoT: Let Vision Language Models Reason Step-by-Step}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2087-2098} }
O-MaMa: Learning Object Mask Matching between Egocentric and Exocentric Views: Lorenzo Mur-Labadia,

Maria Santos-Villafranca,

Jesus Bermudez-Cameo,

Alejandro Perez-Yus,

Ruben Martinez-Cantin,

Jose J. Guerrero; [pdf] [supp]
[bibtex]
@InProceedings{Mur-Labadia_2025_ICCV, author = {Mur-Labadia, Lorenzo and Santos-Villafranca, Maria and Bermudez-Cameo, Jesus and Perez-Yus, Alejandro and Martinez-Cantin, Ruben and Guerrero, Jose J.}, title = {O-MaMa: Learning Object Mask Matching between Egocentric and Exocentric Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6892-6903} }
Rethinking the Embodied Gap in Vision-and-Language Navigation: A Holistic Study of Physical and Visual Disparities: Liuyi Wang,

Xinyuan Xia,

Hui Zhao,

Hanqing Wang,

Tai Wang,

Yilun Chen,

Chengju Liu,

Qijun Chen,

Jiangmiao Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Liuyi and Xia, Xinyuan and Zhao, Hui and Wang, Hanqing and Wang, Tai and Chen, Yilun and Liu, Chengju and Chen, Qijun and Pang, Jiangmiao}, title = {Rethinking the Embodied Gap in Vision-and-Language Navigation: A Holistic Study of Physical and Visual Disparities}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9455-9465} }
Pseudo-SD: Pseudo Controlled Stable Diffusion for Semi-Supervised and Cross-Domain Semantic Segmentation: Dong Zhao,

Qi Zang,

Shuang Wang,

Nicu Sebe,

Zhun Zhong; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Dong and Zang, Qi and Wang, Shuang and Sebe, Nicu and Zhong, Zhun}, title = {Pseudo-SD: Pseudo Controlled Stable Diffusion for Semi-Supervised and Cross-Domain Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22393-22403} }
MaTVLM: Hybrid Mamba-Transformer for Efficient Vision-Language Modeling: Yingyue Li,

Bencheng Liao,

Wenyu Liu,

Xinggang Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yingyue and Liao, Bencheng and Liu, Wenyu and Wang, Xinggang}, title = {MaTVLM: Hybrid Mamba-Transformer for Efficient Vision-Language Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20878-20888} }
ETCH: Generalizing Body Fitting to Clothed Humans via Equivariant Tightness: Boqian Li,

Haiwen Feng,

Zeyu Cai,

Michael J. Black,

Yuliang Xiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Boqian and Feng, Haiwen and Cai, Zeyu and Black, Michael J. and Xiu, Yuliang}, title = {ETCH: Generalizing Body Fitting to Clothed Humans via Equivariant Tightness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8264-8274} }
SVIP: Semantically Contextualized Visual Patches for Zero-Shot Learning: Zhi Chen,

Zecheng Zhao,

Jingcai Guo,

Jingjing Li,

Zi Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Zhi and Zhao, Zecheng and Guo, Jingcai and Li, Jingjing and Huang, Zi}, title = {SVIP: Semantically Contextualized Visual Patches for Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3346-3356} }
MamTiff-CAD: Multi-Scale Latent Diffusion with Mamba+ for Complex Parametric Sequence: Liyuan Deng,

Yunpeng Bai,

Yongkang Dai,

Xiaoshui Huang,

Hongping Gan,

Dongshuo Huang,

Hao Jiacheng,

Yilei Shi; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Liyuan and Bai, Yunpeng and Dai, Yongkang and Huang, Xiaoshui and Gan, Hongping and Huang, Dongshuo and Jiacheng, Hao and Shi, Yilei}, title = {MamTiff-CAD: Multi-Scale Latent Diffusion with Mamba+ for Complex Parametric Sequence}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10517-10526} }
Forgetting Through Transforming: Enabling Federated Unlearning via Class-Aware Representation Transformation: Qi Guo,

Zhen Tian,

Minghao Yao,

Saiyu Qi,

Yong Qi,

Bingyi Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Qi and Tian, Zhen and Yao, Minghao and Qi, Saiyu and Qi, Yong and Liu, Bingyi}, title = {Forgetting Through Transforming: Enabling Federated Unlearning via Class-Aware Representation Transformation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1474-1483} }
ReasonVQA: A Multi-hop Reasoning Benchmark with Structural Knowledge for Visual Question Answering: Duong T. Tran,

Trung-Kien Tran,

Manfred Hauswirth,

Danh Le Phuoc; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2025_ICCV, author = {Tran, Duong T. and Tran, Trung-Kien and Hauswirth, Manfred and Le Phuoc, Danh}, title = {ReasonVQA: A Multi-hop Reasoning Benchmark with Structural Knowledge for Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18793-18803} }
HiNeuS: High-fidelity Neural Surface Mitigating Low-texture and Reflective Ambiguity: Yida Wang,

Xueyang Zhang,

Kun Zhan,

Peng Jia,

Xianpeng Lang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yida and Zhang, Xueyang and Zhan, Kun and Jia, Peng and Lang, Xianpeng}, title = {HiNeuS: High-fidelity Neural Surface Mitigating Low-texture and Reflective Ambiguity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25746-25755} }
Rethinking DPO-style Diffusion Aligning Frameworks: Xun Wu,

Shaohan Huang,

Lingjie Jiang,

Furu Wei; [pdf]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Xun and Huang, Shaohan and Jiang, Lingjie and Wei, Furu}, title = {Rethinking DPO-style Diffusion Aligning Frameworks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18068-18077} }
VSC: Visual Search Compositional Text-to-Image Diffusion Model: Do Huu Dat,

Nam Hyeon-Woo,

Po-Yuan Mao,

Tae-Hyun Oh; [pdf] [supp]
[bibtex]
@InProceedings{Dat_2025_ICCV, author = {Dat, Do Huu and Hyeon-Woo, Nam and Mao, Po-Yuan and Oh, Tae-Hyun}, title = {VSC: Visual Search Compositional Text-to-Image Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19153-19162} }
FRET: Feature Redundancy Elimination for Test Time Adaptation: Linjing You,

Jiabao Lu,

Xiayuan Huang,

Xiangli Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2025_ICCV, author = {You, Linjing and Lu, Jiabao and Huang, Xiayuan and Nie, Xiangli}, title = {FRET: Feature Redundancy Elimination for Test Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2120-2130} }
From One to More: Contextual Part Latents for 3D Generation: Shaocong Dong,

Lihe Ding,

Xiao Chen,

Yaokun Li,

Yuxin Wang,

Yucheng Wang,

Qi Wang,

Jaehyeok Kim,

Chenjian Gao,

Zhanpeng Huang,

Zibin Wang,

Tianfan Xue,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Shaocong and Ding, Lihe and Chen, Xiao and Li, Yaokun and Wang, Yuxin and Wang, Yucheng and Wang, Qi and Kim, Jaehyeok and Gao, Chenjian and Huang, Zhanpeng and Wang, Zibin and Xue, Tianfan and Xu, Dan}, title = {From One to More: Contextual Part Latents for 3D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8230-8240} }
CaptionSmiths: Flexibly Controlling Language Pattern in Image Captioning: Kuniaki Saito,

Donghyun Kim,

Kwanyong Park,

Atsushi Hashimoto,

Yoshitaka Ushiku; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saito_2025_ICCV, author = {Saito, Kuniaki and Kim, Donghyun and Park, Kwanyong and Hashimoto, Atsushi and Ushiku, Yoshitaka}, title = {CaptionSmiths: Flexibly Controlling Language Pattern in Image Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19872-19881} }
CoHD: A Counting-Aware Hierarchical Decoding Framework for Generalized Referring Expression Segmentation: Zhuoyan Luo,

Yinghao Wu,

Tianheng Cheng,

Yong Liu,

Yicheng Xiao,

Hongfa Wang,

Xiao-Ping Zhang,

Yujiu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Zhuoyan and Wu, Yinghao and Cheng, Tianheng and Liu, Yong and Xiao, Yicheng and Wang, Hongfa and Zhang, Xiao-Ping and Yang, Yujiu}, title = {CoHD: A Counting-Aware Hierarchical Decoding Framework for Generalized Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22685-22694} }
PersonalVideo: High ID-Fidelity Video Customization without Dynamic and Semantic Degradation: Hengjia Li,

Haonan Qiu,

Shiwei Zhang,

Xiang Wang,

Yujie Wei,

Zekun Li,

Yingya Zhang,

Boxi Wu,

Deng Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Hengjia and Qiu, Haonan and Zhang, Shiwei and Wang, Xiang and Wei, Yujie and Li, Zekun and Zhang, Yingya and Wu, Boxi and Cai, Deng}, title = {PersonalVideo: High ID-Fidelity Video Customization without Dynamic and Semantic Degradation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19406-19416} }
MP-HSIR: A Multi-Prompt Framework for Universal Hyperspectral Image Restoration: Zhehui Wu,

Yong Chen,

Naoto Yokoya,

Wei He; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Zhehui and Chen, Yong and Yokoya, Naoto and He, Wei}, title = {MP-HSIR: A Multi-Prompt Framework for Universal Hyperspectral Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13009-13020} }
Learning A Unified Template for Gait Recognition: Panjian Huang,

Saihui Hou,

Junzhou Huang,

Yongzhen Huang; [pdf]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Panjian and Hou, Saihui and Huang, Junzhou and Huang, Yongzhen}, title = {Learning A Unified Template for Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12459-12469} }
LANGTRAJ: Diffusion Model and Dataset for Language-Conditioned Trajectory Simulation: Wei-Jer Chang,

Wei Zhan,

Masayoshi Tomizuka,

Manmohan Chandraker,

Francesco Pittaluga; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chang_2025_ICCV, author = {Chang, Wei-Jer and Zhan, Wei and Tomizuka, Masayoshi and Chandraker, Manmohan and Pittaluga, Francesco}, title = {LANGTRAJ: Diffusion Model and Dataset for Language-Conditioned Trajectory Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26622-26631} }
Generative Active Learning for Long-tail Trajectory Prediction via Controllable Diffusion Model: Daehee Park,

Monu Surana,

Pranav Desai,

Ashish Mehta,

Reuben MV John,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Daehee and Surana, Monu and Desai, Pranav and Mehta, Ashish and John, Reuben MV and Yoon, Kuk-Jin}, title = {Generative Active Learning for Long-tail Trajectory Prediction via Controllable Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27839-27850} }
REGEN: Learning Compact Video Embedding with (Re-)Generative Decoder: Yitian Zhang,

Long Mai,

Aniruddha Mahapatra,

David Bourgin,

Yicong Hong,

Jonah Casebeer,

Feng Liu,

Yun Fu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yitian and Mai, Long and Mahapatra, Aniruddha and Bourgin, David and Hong, Yicong and Casebeer, Jonah and Liu, Feng and Fu, Yun}, title = {REGEN: Learning Compact Video Embedding with (Re-)Generative Decoder}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18453-18462} }
DADet: Safeguarding Image Conditional Diffusion Models against Adversarial and Backdoor Attacks via Diffusion Anomaly Detection: Hongwei Yu,

Xinlong Ding,

Jiawei Li,

Jinlong Wang,

Yudong Zhang,

Rongquan Wang,

Huimin Ma,

Jiansheng Chen; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Hongwei and Ding, Xinlong and Li, Jiawei and Wang, Jinlong and Zhang, Yudong and Wang, Rongquan and Ma, Huimin and Chen, Jiansheng}, title = {DADet: Safeguarding Image Conditional Diffusion Models against Adversarial and Backdoor Attacks via Diffusion Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17411-17421} }
CL-Splats: Continual Learning of Gaussian Splatting with Local Optimization: Jan Ackermann,

Jonas Kulhanek,

Shengqu Cai,

Haofei Xu,

Marc Pollefeys,

Gordon Wetzstein,

Leonidas J. Guibas,

Songyou Peng; [pdf] [supp]
[bibtex]
@InProceedings{Ackermann_2025_ICCV, author = {Ackermann, Jan and Kulhanek, Jonas and Cai, Shengqu and Xu, Haofei and Pollefeys, Marc and Wetzstein, Gordon and Guibas, Leonidas J. and Peng, Songyou}, title = {CL-Splats: Continual Learning of Gaussian Splatting with Local Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7808-7817} }
Bridging Local Inductive Bias and Long-Range Dependencies with Pixel-Mamba for End-to-end Whole Slide Image Analysis: Zhongwei Qiu,

Hanqing Chao,

Tiancheng Lin,

Wanxing Chang,

Zijiang Yang,

Wenpei Jiao,

Yixuan Shen,

Yunshuo Zhang,

Yelin Yang,

Wenbin Liu,

Hui Jiang,

Yun Bian,

Ke Yan,

Dakai Jin,

Le Lu; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2025_ICCV, author = {Qiu, Zhongwei and Chao, Hanqing and Lin, Tiancheng and Chang, Wanxing and Yang, Zijiang and Jiao, Wenpei and Shen, Yixuan and Zhang, Yunshuo and Yang, Yelin and Liu, Wenbin and Jiang, Hui and Bian, Yun and Yan, Ke and Jin, Dakai and Lu, Le}, title = {Bridging Local Inductive Bias and Long-Range Dependencies with Pixel-Mamba for End-to-end Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22738-22747} }
Inpaint4Drag: Repurposing Inpainting Models for Drag-Based Image Editing via Bidirectional Warping: Jingyi Lu,

Kai Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Jingyi and Han, Kai}, title = {Inpaint4Drag: Repurposing Inpainting Models for Drag-Based Image Editing via Bidirectional Warping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18304-18313} }
Transformed Low-rank Adaptation via Tensor Decomposition and Its Applications to Text-to-image Models: Zerui Tao,

Yuhta Takida,

Naoki Murata,

Qibin Zhao,

Yuki Mitsufuji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2025_ICCV, author = {Tao, Zerui and Takida, Yuhta and Murata, Naoki and Zhao, Qibin and Mitsufuji, Yuki}, title = {Transformed Low-rank Adaptation via Tensor Decomposition and Its Applications to Text-to-image Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16333-16344} }
PlaneRAS: Learning Planar Primitives for 3D Plane Recovery: Fang Zhang,

Wenzhao Zheng,

Linqing Zhao,

Zelan Zhu,

Jiwen Lu,

Xiuzhuang Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Fang and Zheng, Wenzhao and Zhao, Linqing and Zhu, Zelan and Lu, Jiwen and Zhou, Xiuzhuang}, title = {PlaneRAS: Learning Planar Primitives for 3D Plane Recovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6882-6891} }
TARS: Traffic-Aware Radar Scene Flow Estimation: Jialong Wu,

Marco Braun,

Dominic Spata,

Matthias Rottmann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Jialong and Braun, Marco and Spata, Dominic and Rottmann, Matthias}, title = {TARS: Traffic-Aware Radar Scene Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26075-26084} }
Multi-Cache Enhanced Prototype Learning for Test-Time Generalization of Vision-Language Models: Xinyu Chen,

Haotian Zhai,

Can Zhang,

Xiupeng Shi,

Ruirui Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Xinyu and Zhai, Haotian and Zhang, Can and Shi, Xiupeng and Li, Ruirui}, title = {Multi-Cache Enhanced Prototype Learning for Test-Time Generalization of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2281-2291} }
MotionFollower: Editing Video Motion via Score-Guided Diffusion: Shuyuan Tu,

Qi Dai,

Zihao Zhang,

Sicheng Xie,

Zhi-Qi Cheng,

Chong Luo,

Xintong Han,

Zuxuan Wu,

Yu-Gang Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Tu_2025_ICCV, author = {Tu, Shuyuan and Dai, Qi and Zhang, Zihao and Xie, Sicheng and Cheng, Zhi-Qi and Luo, Chong and Han, Xintong and Wu, Zuxuan and Jiang, Yu-Gang}, title = {MotionFollower: Editing Video Motion via Score-Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12822-12831} }
Learning Few-Step Diffusion Models by Trajectory Distribution Matching: Yihong Luo,

Tianyang Hu,

Jiacheng Sun,

Yujun Cai,

Jing Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Yihong and Hu, Tianyang and Sun, Jiacheng and Cai, Yujun and Tang, Jing}, title = {Learning Few-Step Diffusion Models by Trajectory Distribution Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17719-17728} }
Unified Multi-Agent Trajectory Modeling with Masked Trajectory Diffusion: Songru Yang,

Zhenwei Shi,

Zhengxia Zou; [pdf]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Songru and Shi, Zhenwei and Zou, Zhengxia}, title = {Unified Multi-Agent Trajectory Modeling with Masked Trajectory Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27563-27574} }
Certifiably Optimal Anisotropic Rotation Averaging: Carl Olsson,

Yaroslava Lochman,

Johan Malmport,

Christopher Zach; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Olsson_2025_ICCV, author = {Olsson, Carl and Lochman, Yaroslava and Malmport, Johan and Zach, Christopher}, title = {Certifiably Optimal Anisotropic Rotation Averaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14856-14865} }
TeRA: Rethinking Text-guided Realistic 3D Avatar Generation: Yanwen Wang,

Yiyu Zhuang,

Jiawei Zhang,

Li Wang,

Yifei Zeng,

Xun Cao,

Xinxin Zuo,

Hao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yanwen and Zhuang, Yiyu and Zhang, Jiawei and Wang, Li and Zeng, Yifei and Cao, Xun and Zuo, Xinxin and Zhu, Hao}, title = {TeRA: Rethinking Text-guided Realistic 3D Avatar Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10686-10697} }
Understanding Flatness in Generative Models: Its Role and Benefits: Taehwan Lee,

Kyeongkook Seo,

Jaejun Yoo,

Sung Whan Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Taehwan and Seo, Kyeongkook and Yoo, Jaejun and Yoon, Sung Whan}, title = {Understanding Flatness in Generative Models: Its Role and Benefits}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4908-4917} }
UniGlyph: Unified Segmentation-Conditioned Diffusion for Precise Visual Text Synthesis: Yuanrui Wang,

Cong Han,

Yafei Li,

Zhipeng Jin,

Xiawei Li,

SiNan Du,

Wen Tao,

Shuanglong Li,

Yi Yang,

Chun Yuan,

Liu Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuanrui and Han, Cong and Li, Yafei and Jin, Zhipeng and Li, Xiawei and Du, SiNan and Tao, Wen and Li, Shuanglong and Yang, Yi and Yuan, Chun and Lin, Liu}, title = {UniGlyph: Unified Segmentation-Conditioned Diffusion for Precise Visual Text Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18335-18344} }
DCHM: Depth-Consistent Human Modeling for Multiview Detection: Jiahao Ma,

Tianyu Wang,

Miaomiao Liu,

David Ahmedt-Aristizabal,

Chuong Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Jiahao and Wang, Tianyu and Liu, Miaomiao and Ahmedt-Aristizabal, David and Nguyen, Chuong}, title = {DCHM: Depth-Consistent Human Modeling for Multiview Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7731-7740} }
ChatReID: Open-ended Interactive Person Retrieval via Hierarchical Progressive Tuning for Vision Language Models: Ke Niu,

Haiyang Yu,

Mengyang Zhao,

Teng Fu,

Siyang Yi,

Wei Lu,

Bin Li,

Xuelin Qian,

Xiangyang Xue; [pdf] [arXiv]
[bibtex]
@InProceedings{Niu_2025_ICCV, author = {Niu, Ke and Yu, Haiyang and Zhao, Mengyang and Fu, Teng and Yi, Siyang and Lu, Wei and Li, Bin and Qian, Xuelin and Xue, Xiangyang}, title = {ChatReID: Open-ended Interactive Person Retrieval via Hierarchical Progressive Tuning for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24245-24254} }
TAViS: Text-bridged Audio-Visual Segmentation with Foundation Models: Ziyang Luo,

Nian Liu,

Xuguang Yang,

Salman Khan,

Rao Muhammad Anwer,

Hisham Cholakkal,

Fahad Shahbaz Khan,

Junwei Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Ziyang and Liu, Nian and Yang, Xuguang and Khan, Salman and Anwer, Rao Muhammad and Cholakkal, Hisham and Khan, Fahad Shahbaz and Han, Junwei}, title = {TAViS: Text-bridged Audio-Visual Segmentation with Foundation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24014-24023} }
ORION: A Holistic End-to-End Autonomous Driving Framework by Vision-Language Instructed Action Generation: Haoyu Fu,

Diankun Zhang,

Zongchuang Zhao,

Jianfeng Cui,

Dingkang Liang,

Chong Zhang,

Dingyuan Zhang,

Hongwei Xie,

Bing Wang,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Haoyu and Zhang, Diankun and Zhao, Zongchuang and Cui, Jianfeng and Liang, Dingkang and Zhang, Chong and Zhang, Dingyuan and Xie, Hongwei and Wang, Bing and Bai, Xiang}, title = {ORION: A Holistic End-to-End Autonomous Driving Framework by Vision-Language Instructed Action Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24823-24834} }
GeoExplorer: Active Geo-localization with Curiosity-Driven Exploration: Li Mi,

Manon Béchaz,

Zeming Chen,

Antoine Bosselut,

Devis Tuia; [pdf] [supp]
[bibtex]
@InProceedings{Mi_2025_ICCV, author = {Mi, Li and B\'echaz, Manon and Chen, Zeming and Bosselut, Antoine and Tuia, Devis}, title = {GeoExplorer: Active Geo-localization with Curiosity-Driven Exploration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6122-6131} }
LaCoOT: Layer Collapse through Optimal Transport: Victor Quétu,

Zhu Liao,

Nour Hezbri,

Fabio Pizzati,

Enzo Tartaglione; [pdf] [supp]
[bibtex]
@InProceedings{Quetu_2025_ICCV, author = {Qu\'etu, Victor and Liao, Zhu and Hezbri, Nour and Pizzati, Fabio and Tartaglione, Enzo}, title = {LaCoOT: Layer Collapse through Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20497-20507} }
Back on Track: Bundle Adjustment for Dynamic Scene Reconstruction: Weirong Chen,

Ganlin Zhang,

Felix Wimbauer,

Rui Wang,

Nikita Araslanov,

Andrea Vedaldi,

Daniel Cremers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Weirong and Zhang, Ganlin and Wimbauer, Felix and Wang, Rui and Araslanov, Nikita and Vedaldi, Andrea and Cremers, Daniel}, title = {Back on Track: Bundle Adjustment for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4951-4960} }
ReferDINO: Referring Video Object Segmentation with Visual Grounding Foundations: Tianming Liang,

Kun-Yu Lin,

Chaolei Tan,

Jianguo Zhang,

Wei-Shi Zheng,

Jian-Fang Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Tianming and Lin, Kun-Yu and Tan, Chaolei and Zhang, Jianguo and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {ReferDINO: Referring Video Object Segmentation with Visual Grounding Foundations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20009-20019} }
LA-MOTR: End-to-End Multi-Object Tracking by Learnable Association: Peng Wang,

Yongcai Wang,

Hualong Cao,

Wang Chen,

Deying Li; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Peng and Wang, Yongcai and Cao, Hualong and Chen, Wang and Li, Deying}, title = {LA-MOTR: End-to-End Multi-Object Tracking by Learnable Association}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12438-12448} }
NuiScene: Exploring Efficient Generation of Unbounded Outdoor Scenes: Han-Hung Lee,

Qinghong Han,

Angel X. Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Han-Hung and Han, Qinghong and Chang, Angel X.}, title = {NuiScene: Exploring Efficient Generation of Unbounded Outdoor Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26509-26518} }
NegRefine: Refining Negative Label-Based Zero-Shot OOD Detection: Amirhossein Ansari,

Ke Wang,

Pulei Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ansari_2025_ICCV, author = {Ansari, Amirhossein and Wang, Ke and Xiong, Pulei}, title = {NegRefine: Refining Negative Label-Based Zero-Shot OOD Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {573-582} }
DM-EFS: Dynamically Multiplexed Expanded Features Set Form for Robust and Efficient Small Object Detection: Aashish Sharma; [pdf] [supp]
[bibtex]
@InProceedings{Sharma_2025_ICCV, author = {Sharma, Aashish}, title = {DM-EFS: Dynamically Multiplexed Expanded Features Set Form for Robust and Efficient Small Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24569-24579} }
OmniVTON: Training-Free Universal Virtual Try-On: Zhaotong Yang,

Yuhui Li,

Shengfeng He,

Xinzhe Li,

Yangyang Xu,

Junyu Dong,

Yong Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zhaotong and Li, Yuhui and He, Shengfeng and Li, Xinzhe and Xu, Yangyang and Dong, Junyu and Du, Yong}, title = {OmniVTON: Training-Free Universal Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16702-16711} }
ACAM-KD: Adaptive and Cooperative Attention Masking for Knowledge Distillation: Qizhen Lan,

Qing Tian; [pdf]
[bibtex]
@InProceedings{Lan_2025_ICCV, author = {Lan, Qizhen and Tian, Qing}, title = {ACAM-KD: Adaptive and Cooperative Attention Masking for Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3957-3966} }
InfGen: A Resolution-Agnostic Paradigm for Scalable Image Synthesis: Tao Han,

Wanghan Xu,

Junchao Gong,

Xiaoyu Yue,

Song Guo,

Luping Zhou,

Lei Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Tao and Xu, Wanghan and Gong, Junchao and Yue, Xiaoyu and Guo, Song and Zhou, Luping and Bai, Lei}, title = {InfGen: A Resolution-Agnostic Paradigm for Scalable Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17941-17950} }
PlanGen: Towards Unified Layout Planning and Image Generation in Auto-Regressive Vision Language Models: Runze He,

Bo Cheng,

Yuhang Ma,

Qingxiang Jia,

Shanyuan Liu,

Ao Ma,

Xiaoyu Wu,

Liebucha Wu,

Dawei Leng,

Yuhui Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Runze and Cheng, Bo and Ma, Yuhang and Jia, Qingxiang and Liu, Shanyuan and Ma, Ao and Wu, Xiaoyu and Wu, Liebucha and Leng, Dawei and Yin, Yuhui}, title = {PlanGen: Towards Unified Layout Planning and Image Generation in Auto-Regressive Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18143-18154} }
AirCache: Activating Inter-modal Relevancy KV Cache Compression for Efficient Large Vision-Language Model Inference: Kai Huang,

Hao Zou,

Bochen Wang,

Ye Xi,

Zhen Xie,

Hao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Kai and Zou, Hao and Wang, Bochen and Xi, Ye and Xie, Zhen and Wang, Hao}, title = {AirCache: Activating Inter-modal Relevancy KV Cache Compression for Efficient Large Vision-Language Model Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23958-23967} }
Completing 3D Partial Assemblies with View-Consistent 2D-3D Correspondence: Weihao Wang,

Yu Lan,

Mingyu You,

Bin He; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Weihao and Lan, Yu and You, Mingyu and He, Bin}, title = {Completing 3D Partial Assemblies with View-Consistent 2D-3D Correspondence}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7741-7750} }
Latte: Collaborative Test-Time Adaptation of Vision-Language Models in Federated Learning: Wenxuan Bao,

Ruxi Deng,

Ruizhong Qiu,

Tianxin Wei,

Hanghang Tong,

Jingrui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bao_2025_ICCV, author = {Bao, Wenxuan and Deng, Ruxi and Qiu, Ruizhong and Wei, Tianxin and Tong, Hanghang and He, Jingrui}, title = {Latte: Collaborative Test-Time Adaptation of Vision-Language Models in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {133-143} }
SSVQ: Unleashing the Potential of Vector Quantization with Sign-Splitting: Shuaiting Li,

Juncan Deng,

Chengxuan Wang,

Kedong Xu,

Rongtao Deng,

Hong Gu,

Haibin Shen,

Kejie Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Shuaiting and Deng, Juncan and Wang, Chengxuan and Xu, Kedong and Deng, Rongtao and Gu, Hong and Shen, Haibin and Huang, Kejie}, title = {SSVQ: Unleashing the Potential of Vector Quantization with Sign-Splitting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23710-23719} }
From Linearity to Non-Linearity: How Masked Autoencoders Capture Spatial Correlations: Anthony Bisulco,

Rahul Ramesh,

Randall Balestriero,

Pratik Chaudhari; [pdf] [supp]
[bibtex]
@InProceedings{Bisulco_2025_ICCV, author = {Bisulco, Anthony and Ramesh, Rahul and Balestriero, Randall and Chaudhari, Pratik}, title = {From Linearity to Non-Linearity: How Masked Autoencoders Capture Spatial Correlations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16441-16450} }
SliderSpace: Decomposing the Visual Capabilities of Diffusion Models: Rohit Gandikota,

Zongze Wu,

Richard Zhang,

David Bau,

Eli Shechtman,

Nick Kolkin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gandikota_2025_ICCV, author = {Gandikota, Rohit and Wu, Zongze and Zhang, Richard and Bau, David and Shechtman, Eli and Kolkin, Nick}, title = {SliderSpace: Decomposing the Visual Capabilities of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15994-16003} }
TrustMark: Robust Watermarking and Watermark Removal for Arbitrary Resolution Images: Tu Bui,

Shruti Agarwal,

John Collomosse; [pdf] [supp]
[bibtex]
@InProceedings{Bui_2025_ICCV, author = {Bui, Tu and Agarwal, Shruti and Collomosse, John}, title = {TrustMark: Robust Watermarking and Watermark Removal for Arbitrary Resolution Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18629-18639} }
TikZero: Zero-Shot Text-Guided Graphics Program Synthesis: Jonas Belouadi,

Eddy Ilg,

Margret Keuper,

Hideki Tanaka,

Masao Utiyama,

Raj Dabre,

Steffen Eger,

Simone Ponzetto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Belouadi_2025_ICCV, author = {Belouadi, Jonas and Ilg, Eddy and Keuper, Margret and Tanaka, Hideki and Utiyama, Masao and Dabre, Raj and Eger, Steffen and Ponzetto, Simone}, title = {TikZero: Zero-Shot Text-Guided Graphics Program Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17793-17806} }
DrivingGPT: Unifying Driving World Modeling and Planning with Multi-modal Autoregressive Transformers: Yuntao Chen,

Yuqi Wang,

Zhaoxiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yuntao and Wang, Yuqi and Zhang, Zhaoxiang}, title = {DrivingGPT: Unifying Driving World Modeling and Planning with Multi-modal Autoregressive Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26890-26900} }
One-Shot Knowledge Transfer for Scalable Person Re-Identification: Longhua Li,

Lei Qi,

Xin Geng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Longhua and Qi, Lei and Geng, Xin}, title = {One-Shot Knowledge Transfer for Scalable Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {668-677} }
GS-ID: Illumination Decomposition on Gaussian Splatting via Adaptive Light Aggregation and Diffusion-Guided Material Priors: Kang Du,

Zhihao Liang,

Yulin Shen,

Zeyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Kang and Liang, Zhihao and Shen, Yulin and Wang, Zeyu}, title = {GS-ID: Illumination Decomposition on Gaussian Splatting via Adaptive Light Aggregation and Diffusion-Guided Material Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26220-26229} }
FreeScale: Unleashing the Resolution of Diffusion Models via Tuning-Free Scale Fusion: Haonan Qiu,

Shiwei Zhang,

Yujie Wei,

Ruihang Chu,

Hangjie Yuan,

Xiang Wang,

Yingya Zhang,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2025_ICCV, author = {Qiu, Haonan and Zhang, Shiwei and Wei, Yujie and Chu, Ruihang and Yuan, Hangjie and Wang, Xiang and Zhang, Yingya and Liu, Ziwei}, title = {FreeScale: Unleashing the Resolution of Diffusion Models via Tuning-Free Scale Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16893-16903} }
TokenUnify: Scaling Up Autoregressive Pretraining for Neuron Segmentation: Yinda Chen,

Haoyuan Shi,

Xiaoyu Liu,

Te Shi,

Ruobing Zhang,

Dong Liu,

Zhiwei Xiong,

Feng Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yinda and Shi, Haoyuan and Liu, Xiaoyu and Shi, Te and Zhang, Ruobing and Liu, Dong and Xiong, Zhiwei and Wu, Feng}, title = {TokenUnify: Scaling Up Autoregressive Pretraining for Neuron Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13604-13613} }
SAS: Segment Any 3D Scene with Integrated 2D Priors: Zhuoyuan Li,

Jiahao Lu,

Jiacheng Deng,

Hanzhi Chang,

Lifan Wu,

Yanzhe Liang,

Tianzhu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhuoyuan and Lu, Jiahao and Deng, Jiacheng and Chang, Hanzhi and Wu, Lifan and Liang, Yanzhe and Zhang, Tianzhu}, title = {SAS: Segment Any 3D Scene with Integrated 2D Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8306-8318} }
OMNI-DC: Highly Robust Depth Completion with Multiresolution Depth Integration: Yiming Zuo,

Willow Yang,

Zeyu Ma,

Jia Deng; [pdf] [supp]
[bibtex]
@InProceedings{Zuo_2025_ICCV, author = {Zuo, Yiming and Yang, Willow and Ma, Zeyu and Deng, Jia}, title = {OMNI-DC: Highly Robust Depth Completion with Multiresolution Depth Integration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9287-9297} }
4D Gaussian Splatting SLAM: Yanyan Li,

Youxu Fang,

Zunjie Zhu,

Kunyi Li,

Yong Ding,

Federico Tombari; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yanyan and Fang, Youxu and Zhu, Zunjie and Li, Kunyi and Ding, Yong and Tombari, Federico}, title = {4D Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25019-25028} }
PHD: Personalized 3D Human Body Fitting with Point Diffusion: Hsuan-I Ho,

Chen Guo,

Po-Chen Wu,

Ivan Shugurov,

Chengcheng Tang,

Abhay Mittal,

Sizhe An,

Manuel Kaufmann,

Linguang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ho_2025_ICCV, author = {Ho, Hsuan-I and Guo, Chen and Wu, Po-Chen and Shugurov, Ivan and Tang, Chengcheng and Mittal, Abhay and An, Sizhe and Kaufmann, Manuel and Zhang, Linguang}, title = {PHD: Personalized 3D Human Body Fitting with Point Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7526-7537} }
SteerX: Creating Any Camera-Free 3D and 4D Scenes with Geometric Steering: Byeongjun Park,

Hyojun Go,

Hyelin Nam,

Byung-Hoon Kim,

Hyungjin Chung,

Changick Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Byeongjun and Go, Hyojun and Nam, Hyelin and Kim, Byung-Hoon and Chung, Hyungjin and Kim, Changick}, title = {SteerX: Creating Any Camera-Free 3D and 4D Scenes with Geometric Steering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27326-27337} }
Hierarchical Material Recognition from Local Appearance: Matthew Beveridge,

Shree K. Nayar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Beveridge_2025_ICCV, author = {Beveridge, Matthew and Nayar, Shree K.}, title = {Hierarchical Material Recognition from Local Appearance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8165-8176} }
Unleashing the Temporal Potential of Stereo Event Cameras for Continuous-Time 3D Object Detection: Jae-Young Kang,

Hoonhee Cho,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_ICCV, author = {Kang, Jae-Young and Cho, Hoonhee and Yoon, Kuk-Jin}, title = {Unleashing the Temporal Potential of Stereo Event Cameras for Continuous-Time 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6869-6881} }
SweetTok: Semantic-Aware Spatial-Temporal Tokenizer for Compact Video Discretization: Zhentao Tan,

Ben Xue,

Jian Jia,

Junhao Wang,

Wencai Ye,

Shaoyun Shi,

Mingjie Sun,

Wenjin Wu,

Quan Chen,

Peng Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2025_ICCV, author = {Tan, Zhentao and Xue, Ben and Jia, Jian and Wang, Junhao and Ye, Wencai and Shi, Shaoyun and Sun, Mingjie and Wu, Wenjin and Chen, Quan and Jiang, Peng}, title = {SweetTok: Semantic-Aware Spatial-Temporal Tokenizer for Compact Video Discretization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23541-23550} }
D-Attn: Decomposed Attention for Large Vision-and-Language Model: Chia-Wen Kuo,

Sijie Zhu,

Fan Chen,

Xiaohui Shen,

Longyin Wen; [pdf] [supp]
[bibtex]
@InProceedings{Kuo_2025_ICCV, author = {Kuo, Chia-Wen and Zhu, Sijie and Chen, Fan and Shen, Xiaohui and Wen, Longyin}, title = {D-Attn: Decomposed Attention for Large Vision-and-Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23935-23944} }
R1-VL: Learning to Reason with Multimodal Large Language Models via Step-wise Group Relative Policy Optimization: Jingyi Zhang,

Jiaxing Huang,

Huanjin Yao,

Shunyu Liu,

Xikun Zhang,

Shijian Lu,

Dacheng Tao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jingyi and Huang, Jiaxing and Yao, Huanjin and Liu, Shunyu and Zhang, Xikun and Lu, Shijian and Tao, Dacheng}, title = {R1-VL: Learning to Reason with Multimodal Large Language Models via Step-wise Group Relative Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1859-1869} }
Not All Degradations Are Equal: A Targeted Feature Denoising Framework for Generalizable Image Super-Resolution: Hongjun Wang,

Jiyuan Chen,

Zhengwei Yin,

Xuan Song,

Yinqiang Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Hongjun and Chen, Jiyuan and Yin, Zhengwei and Song, Xuan and Zheng, Yinqiang}, title = {Not All Degradations Are Equal: A Targeted Feature Denoising Framework for Generalizable Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14152-14161} }
Rep-MTL: Unleashing the Power of Representation-level Task Saliency for Multi-Task Learning: Zedong Wang,

Siyuan Li,

Dan Xu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zedong and Li, Siyuan and Xu, Dan}, title = {Rep-MTL: Unleashing the Power of Representation-level Task Saliency for Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3413-3423} }
TLB-VFI: Temporal-Aware Latent Brownian Bridge Diffusion for Video Frame Interpolation: Zonglin Lyu,

Chen Chen; [pdf] [supp]
[bibtex]
@InProceedings{Lyu_2025_ICCV, author = {Lyu, Zonglin and Chen, Chen}, title = {TLB-VFI: Temporal-Aware Latent Brownian Bridge Diffusion for Video Frame Interpolation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16260-16269} }
Gaussian Splatting with Discretized SDF for Relightable Assets: Zuo-Liang Zhu,

Jian Yang,

Beibei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Zuo-Liang and Yang, Jian and Wang, Beibei}, title = {Gaussian Splatting with Discretized SDF for Relightable Assets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25155-25164} }
Region-based Cluster Discrimination for Visual Representation Learning: Yin Xie,

Kaicheng Yang,

Xiang An,

Kun Wu,

Yongle Zhao,

Weimo Deng,

Zimin Ran,

Yumeng Wang,

Ziyong Feng,

Roy Miles,

Ismail Elezi,

Jiankang Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Yin and Yang, Kaicheng and An, Xiang and Wu, Kun and Zhao, Yongle and Deng, Weimo and Ran, Zimin and Wang, Yumeng and Feng, Ziyong and Miles, Roy and Elezi, Ismail and Deng, Jiankang}, title = {Region-based Cluster Discrimination for Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1793-1803} }
AAA-Gaussians: Anti-Aliased and Artifact-Free 3D Gaussian Rendering: Michael Steiner,

Thomas Köhler,

Lukas Radl,

Felix Windisch,

Dieter Schmalstieg,

Markus Steinberger; [pdf] [supp]
[bibtex]
@InProceedings{Steiner_2025_ICCV, author = {Steiner, Michael and K\"ohler, Thomas and Radl, Lukas and Windisch, Felix and Schmalstieg, Dieter and Steinberger, Markus}, title = {AAA-Gaussians: Anti-Aliased and Artifact-Free 3D Gaussian Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27650-27659} }
EvRT-DETR: Latent Space Adaptation of Image Detectors for Event-based Vision: Dmitrii Torbunov,

Yihui Ren,

Animesh Ghose,

Odera Dim,

Yonggang Cui; [pdf] [supp]
[bibtex]
@InProceedings{Torbunov_2025_ICCV, author = {Torbunov, Dmitrii and Ren, Yihui and Ghose, Animesh and Dim, Odera and Cui, Yonggang}, title = {EvRT-DETR: Latent Space Adaptation of Image Detectors for Event-based Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9812-9821} }
MeshPad: Interactive Sketch-Conditioned Artist-Reminiscent Mesh Generation and Editing: Haoxuan Li,

Ziya Erkoç,

Lei Li,

Daniele Sirigatti,

Vladislav Rosov,

Angela Dai,

Matthias Nießner; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Haoxuan and Erko\c{c}, Ziya and Li, Lei and Sirigatti, Daniele and Rosov, Vladislav and Dai, Angela and Nie{\ss}ner, Matthias}, title = {MeshPad: Interactive Sketch-Conditioned Artist-Reminiscent Mesh Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16227-16237} }
ResidualViT for Efficient Temporally Dense Video Encoding: Mattia Soldan,

Fabian Caba Heilbron,

Bernard Ghanem,

Josef Sivic,

Bryan Russell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Soldan_2025_ICCV, author = {Soldan, Mattia and Heilbron, Fabian Caba and Ghanem, Bernard and Sivic, Josef and Russell, Bryan}, title = {ResidualViT for Efficient Temporally Dense Video Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22305-22315} }
UniCombine: Unified Multi-Conditional Combination with Diffusion Transformer: Haoxuan Wang,

Jinlong Peng,

Qingdong He,

Hao Yang,

Ying Jin,

Jiafu Wu,

Xiaobin Hu,

Yanjie Pan,

Zhenye Gan,

Mingmin Chi,

Bo Peng,

Yabiao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haoxuan and Peng, Jinlong and He, Qingdong and Yang, Hao and Jin, Ying and Wu, Jiafu and Hu, Xiaobin and Pan, Yanjie and Gan, Zhenye and Chi, Mingmin and Peng, Bo and Wang, Yabiao}, title = {UniCombine: Unified Multi-Conditional Combination with Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18325-18334} }
Easi3R: Estimating Disentangled Motion from DUSt3R Without Training: Xingyu Chen,

Yue Chen,

Yuliang Xiu,

Andreas Geiger,

Anpei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Xingyu and Chen, Yue and Xiu, Yuliang and Geiger, Andreas and Chen, Anpei}, title = {Easi3R: Estimating Disentangled Motion from DUSt3R Without Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9158-9168} }
Addressing Text Embedding Leakage in Diffusion-based Image Editing: Sunung Mun,

Jinhwan Nam,

Sunghyun Cho,

Jungseul Ok; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mun_2025_ICCV, author = {Mun, Sunung and Nam, Jinhwan and Cho, Sunghyun and Ok, Jungseul}, title = {Addressing Text Embedding Leakage in Diffusion-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16451-16460} }
CaO2: Rectifying Inconsistencies in Diffusion-Based Dataset Distillation: Haoxuan Wang,

Zhenghao Zhao,

Junyi Wu,

Yuzhang Shang,

Gaowen Liu,

Yan Yan; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haoxuan and Zhao, Zhenghao and Wu, Junyi and Shang, Yuzhang and Liu, Gaowen and Yan, Yan}, title = {CaO2: Rectifying Inconsistencies in Diffusion-Based Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4722-4731} }
HQ-CLIP: Leveraging Large Vision-Language Models to Create High-Quality Image-Text Datasets and CLIP Models: Zhixiang Wei,

Guangting Wang,

Xiaoxiao Ma,

Ke Mei,

Huaian Chen,

Yi Jin,

Fengyun Rao; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Zhixiang and Wang, Guangting and Ma, Xiaoxiao and Mei, Ke and Chen, Huaian and Jin, Yi and Rao, Fengyun}, title = {HQ-CLIP: Leveraging Large Vision-Language Models to Create High-Quality Image-Text Datasets and CLIP Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22447-22456} }
Bridging the Gap Between Ideal and Real-world Evaluation: Benchmarking AI-Generated Image Detection in Challenging Scenarios: Chunxiao Li,

Xiaoxiao Wang,

Meiling Li,

Boming Miao,

Peng Sun,

Yunjian Zhang,

Xiangyang Ji,

Yao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Chunxiao and Wang, Xiaoxiao and Li, Meiling and Miao, Boming and Sun, Peng and Zhang, Yunjian and Ji, Xiangyang and Zhu, Yao}, title = {Bridging the Gap Between Ideal and Real-world Evaluation: Benchmarking AI-Generated Image Detection in Challenging Scenarios}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20379-20389} }
Progressive Test Time Energy Adaptation for Medical Image Segmentation: Xiaoran Zhang,

Byung-Woo Hong,

Hyoungseob Park,

Daniel H. Pak,

Anne-Marie Rickmann,

Lawrence H. Staib,

James S. Duncan,

Alex Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiaoran and Hong, Byung-Woo and Park, Hyoungseob and Pak, Daniel H. and Rickmann, Anne-Marie and Staib, Lawrence H. and Duncan, James S. and Wong, Alex}, title = {Progressive Test Time Energy Adaptation for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22338-22348} }
Sequential Gaussian Avatars with Hierarchical Motion Context: Wangze Xu,

Yifan Zhan,

Zhihang Zhong,

Xiao Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Wangze and Zhan, Yifan and Zhong, Zhihang and Sun, Xiao}, title = {Sequential Gaussian Avatars with Hierarchical Motion Context}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13592-13603} }
Beyond Losses Reweighting: Empowering Multi-Task Learning via the Generalization Perspective: Hoang Phan,

Lam Tran,

Quyen Tran,

Ngoc Tran,

Tuan Truong,

Qi Lei,

Nhat Ho,

Dinh Phung,

Trung Le; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Phan_2025_ICCV, author = {Phan, Hoang and Tran, Lam and Tran, Quyen and Tran, Ngoc and Truong, Tuan and Lei, Qi and Ho, Nhat and Phung, Dinh and Le, Trung}, title = {Beyond Losses Reweighting: Empowering Multi-Task Learning via the Generalization Perspective}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2440-2450} }
Zero-shot Inexact CAD Model Alignment from a Single Image: Pattaramanee Arsomngern,

Sasikarn Khwanmuang,

Matthias Nießner,

Supasorn Suwajanakorn; [pdf] [supp]
[bibtex]
@InProceedings{Arsomngern_2025_ICCV, author = {Arsomngern, Pattaramanee and Khwanmuang, Sasikarn and Nie{\ss}ner, Matthias and Suwajanakorn, Supasorn}, title = {Zero-shot Inexact CAD Model Alignment from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6231-6241} }
Vulnerability-Aware Spatio-Temporal Learning for Generalizable Deepfake Video Detection: Dat Nguyen,

Marcella Astrid,

Anis Kacem,

Enjie Ghorbel,

Djamila Aouada; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Dat and Astrid, Marcella and Kacem, Anis and Ghorbel, Enjie and Aouada, Djamila}, title = {Vulnerability-Aware Spatio-Temporal Learning for Generalizable Deepfake Video Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10786-10796} }
Efficient Autoregressive Shape Generation via Octree-Based Adaptive Tokenization: Kangle Deng,

Hsueh-Ti Derek Liu,

Yiheng Zhu,

Xiaoxia Sun,

Chong Shang,

Kiran S. Bhat,

Deva Ramanan,

Jun-Yan Zhu,

Maneesh Agrawala,

Tinghui Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Kangle and Liu, Hsueh-Ti Derek and Zhu, Yiheng and Sun, Xiaoxia and Shang, Chong and Bhat, Kiran S. and Ramanan, Deva and Zhu, Jun-Yan and Agrawala, Maneesh and Zhou, Tinghui}, title = {Efficient Autoregressive Shape Generation via Octree-Based Adaptive Tokenization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11685-11696} }
Cycle-Consistent Learning for Joint Layout-to-Image Generation and Object Detection: Xinhao Cai,

Qiuxia Lai,

Gensheng Pei,

Xiangbo Shu,

Yazhou Yao,

Wenguan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2025_ICCV, author = {Cai, Xinhao and Lai, Qiuxia and Pei, Gensheng and Shu, Xiangbo and Yao, Yazhou and Wang, Wenguan}, title = {Cycle-Consistent Learning for Joint Layout-to-Image Generation and Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6797-6807} }
MUG: Pseudo Labeling Augmented Audio-Visual Mamba Network for Audio-Visual Video Parsing: Langyu Wang,

Bingke Zhu,

Yingying Chen,

Yiyuan Zhang,

Ming Tang,

Jinqiao Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Langyu and Zhu, Bingke and Chen, Yingying and Zhang, Yiyuan and Tang, Ming and Wang, Jinqiao}, title = {MUG: Pseudo Labeling Augmented Audio-Visual Mamba Network for Audio-Visual Video Parsing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20637-20646} }
LIFT: Latent Implicit Functions for Task- and Data-Agnostic Encoding: Amirhossein Kazerouni,

Soroush Mehraban,

Michael Brudno,

Babak Taati; [pdf] [supp]
[bibtex]
@InProceedings{Kazerouni_2025_ICCV, author = {Kazerouni, Amirhossein and Mehraban, Soroush and Brudno, Michael and Taati, Babak}, title = {LIFT: Latent Implicit Functions for Task- and Data-Agnostic Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4828-4837} }
Long-Context State-Space Video World Models: Ryan Po,

Yotam Nitzan,

Richard Zhang,

Berlin Chen,

Tri Dao,

Eli Shechtman,

Gordon Wetzstein,

Xun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Po_2025_ICCV, author = {Po, Ryan and Nitzan, Yotam and Zhang, Richard and Chen, Berlin and Dao, Tri and Shechtman, Eli and Wetzstein, Gordon and Huang, Xun}, title = {Long-Context State-Space Video World Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8733-8744} }
Dataset Distillation via the Wasserstein Metric: Haoyang Liu,

Yijiang Li,

Tiancheng Xing,

Peiran Wang,

Vibhu Dalal,

Luwei Li,

Jingrui He,

Haohan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Haoyang and Li, Yijiang and Xing, Tiancheng and Wang, Peiran and Dalal, Vibhu and Li, Luwei and He, Jingrui and Wang, Haohan}, title = {Dataset Distillation via the Wasserstein Metric}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1205-1215} }
IRGPT: Understanding Real-world Infrared Image with Bi-cross-modal Curriculum on Large-scale Benchmark: Zhe Cao,

Jin Zhang,

Ruiheng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Zhe and Zhang, Jin and Zhang, Ruiheng}, title = {IRGPT: Understanding Real-world Infrared Image with Bi-cross-modal Curriculum on Large-scale Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {166-176} }
Mixed Signals: A Diverse Point Cloud Dataset for Heterogeneous LiDAR V2X Collaboration: Katie Z Luo,

Minh-Quan Dao,

Zhenzhen Liu,

Mark Campbell,

Wei-Lun Chao,

Kilian Q Weinberger,

Ezio Malis,

Vincent Fremont,

Bharath Hariharan,

Mao Shan,

Stewart Worrall,

Julie Stephany Berrio Perez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Katie Z and Dao, Minh-Quan and Liu, Zhenzhen and Campbell, Mark and Chao, Wei-Lun and Weinberger, Kilian Q and Malis, Ezio and Fremont, Vincent and Hariharan, Bharath and Shan, Mao and Worrall, Stewart and Perez, Julie Stephany Berrio}, title = {Mixed Signals: A Diverse Point Cloud Dataset for Heterogeneous LiDAR V2X Collaboration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28763-28773} }
Exploring Weather-aware Aggregation and Adaptation for Semantic Segmentation under Adverse Conditions: Yuwen Pan,

Rui Sun,

Wangkai Li,

Tianzhu Zhang; [pdf]
[bibtex]
@InProceedings{Pan_2025_ICCV, author = {Pan, Yuwen and Sun, Rui and Li, Wangkai and Zhang, Tianzhu}, title = {Exploring Weather-aware Aggregation and Adaptation for Semantic Segmentation under Adverse Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13952-13962} }
OracleFusion: Assisting the Decipherment of Oracle Bone Script with Structurally Constrained Semantic Typography: Caoshuo Li,

Zengmao Ding,

Xiaobin Hu,

Bang Li,

Donghao Luo,

AndyPian Wu,

Chaoyang Wang,

Chengjie Wang,

Taisong Jin,

Seven Shu,

Yunsheng Wu,

Yongge Liu,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Caoshuo and Ding, Zengmao and Hu, Xiaobin and Li, Bang and Luo, Donghao and Wu, AndyPian and Wang, Chaoyang and Wang, Chengjie and Jin, Taisong and Shu, Seven and Wu, Yunsheng and Liu, Yongge and Ji, Rongrong}, title = {OracleFusion: Assisting the Decipherment of Oracle Bone Script with Structurally Constrained Semantic Typography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19893-19902} }
Streaming VideoLLMs for Real-Time Procedural Video Understanding: Dibyadip Chatterjee,

Edoardo Remelli,

Yale Song,

Bugra Tekin,

Abhay Mittal,

Bharat Bhatnagar,

Necati Cihan Camgoz,

Shreyas Hampali,

Eric Sauser,

Shugao Ma,

Angela Yao,

Fadime Sener; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chatterjee_2025_ICCV, author = {Chatterjee, Dibyadip and Remelli, Edoardo and Song, Yale and Tekin, Bugra and Mittal, Abhay and Bhatnagar, Bharat and Camgoz, Necati Cihan and Hampali, Shreyas and Sauser, Eric and Ma, Shugao and Yao, Angela and Sener, Fadime}, title = {Streaming VideoLLMs for Real-Time Procedural Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22586-22598} }
Efficient Visual Place Recognition Through Multimodal Semantic Knowledge Integration: Sitao Zhang,

Hongda Mao,

Qingshuang Chen,

Yelin Kim; [pdf]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Sitao and Mao, Hongda and Chen, Qingshuang and Kim, Yelin}, title = {Efficient Visual Place Recognition Through Multimodal Semantic Knowledge Integration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5601-5610} }
Rethinking Multi-modal Object Detection from the Perspective of Mono-Modality Feature Learning: Tianyi Zhao,

Boyang Liu,

Yanglei Gao,

Yiming Sun,

Maoxun Yuan,

Xingxing Wei; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Tianyi and Liu, Boyang and Gao, Yanglei and Sun, Yiming and Yuan, Maoxun and Wei, Xingxing}, title = {Rethinking Multi-modal Object Detection from the Perspective of Mono-Modality Feature Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6364-6373} }
VEGGIE: Instructional Editing and Reasoning Video Concepts with Grounded Generation: Shoubin Yu,

Difan Liu,

Ziqiao Ma,

Yicong Hong,

Yang Zhou,

Hao Tan,

Joyce Chai,

Mohit Bansal; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Shoubin and Liu, Difan and Ma, Ziqiao and Hong, Yicong and Zhou, Yang and Tan, Hao and Chai, Joyce and Bansal, Mohit}, title = {VEGGIE: Instructional Editing and Reasoning Video Concepts with Grounded Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15147-15158} }
Task-Aware Prompt Gradient Projection for Parameter-Efficient Tuning Federated Class-Incremental Learning: Hualong Ke,

Jiangming Shi,

Yachao Zhang,

Fangyong Wang,

Yuan Xie,

Yanyun Qu; [pdf] [supp]
[bibtex]
@InProceedings{Ke_2025_ICCV, author = {Ke, Hualong and Shi, Jiangming and Zhang, Yachao and Wang, Fangyong and Xie, Yuan and Qu, Yanyun}, title = {Task-Aware Prompt Gradient Projection for Parameter-Efficient Tuning Federated Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2631-2641} }
Mixture-of-Scores: Robust Image-Text Data Valuation via Three Lines of Code: Sitong Wu,

Haoru Tan,

Yukang Chen,

Shaofeng Zhang,

Jingyao Li,

Bei Yu,

Xiaojuan Qi,

Jiaya Jia; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Sitong and Tan, Haoru and Chen, Yukang and Zhang, Shaofeng and Li, Jingyao and Yu, Bei and Qi, Xiaojuan and Jia, Jiaya}, title = {Mixture-of-Scores: Robust Image-Text Data Valuation via Three Lines of Code}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24603-24614} }
DMesh++: An Efficient Differentiable Mesh for Complex Shapes: Sanghyun Son,

Matheus Gadelha,

Yang Zhou,

Matthew Fisher,

Zexiang Xu,

Yi-Ling Qiao,

Ming C. Lin,

Yi Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Son_2025_ICCV, author = {Son, Sanghyun and Gadelha, Matheus and Zhou, Yang and Fisher, Matthew and Xu, Zexiang and Qiao, Yi-Ling and Lin, Ming C. and Zhou, Yi}, title = {DMesh++: An Efficient Differentiable Mesh for Complex Shapes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26590-26599} }
From Imitation to Innovation: The Emergence of AI's Unique Artistic Styles and the Challenge of Copyright Protection: Zexi Jia,

Chuanwei Huang,

Yeshuang Zhu,

Hongyan Fei,

Ying Deng,

Zhiqiang Yuan,

Jiapei Zhang,

Jinchao Zhang,

Jie Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2025_ICCV, author = {Jia, Zexi and Huang, Chuanwei and Zhu, Yeshuang and Fei, Hongyan and Deng, Ying and Yuan, Zhiqiang and Zhang, Jiapei and Zhang, Jinchao and Zhou, Jie}, title = {From Imitation to Innovation: The Emergence of AI's Unique Artistic Styles and the Challenge of Copyright Protection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18980-18989} }
VPO: Aligning Text-to-Video Generation Models with Prompt Optimization: Jiale Cheng,

Ruiliang Lyu,

Xiaotao Gu,

Xiao Liu,

Jiazheng Xu,

Yida Lu,

Jiayan Teng,

Zhuoyi Yang,

Yuxiao Dong,

Jie Tang,

Hongning Wang,

Minlie Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Jiale and Lyu, Ruiliang and Gu, Xiaotao and Liu, Xiao and Xu, Jiazheng and Lu, Yida and Teng, Jiayan and Yang, Zhuoyi and Dong, Yuxiao and Tang, Jie and Wang, Hongning and Huang, Minlie}, title = {VPO: Aligning Text-to-Video Generation Models with Prompt Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15636-15645} }
Intervening in Black Box: Concept Bottleneck Model for Enhancing Human Neural Network Mutual Understanding: Nuoye Xiong,

Anqi Dong,

Ning Wang,

Cong Hua,

Guangming Zhu,

Lin Mei,

Peiyi Shen,

Liang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2025_ICCV, author = {Xiong, Nuoye and Dong, Anqi and Wang, Ning and Hua, Cong and Zhu, Guangming and Mei, Lin and Shen, Peiyi and Zhang, Liang}, title = {Intervening in Black Box: Concept Bottleneck Model for Enhancing Human Neural Network Mutual Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2836-2845} }
Versatile Transition Generation with Image-to-Video Diffusion: Zuhao Yang,

Jiahui Zhang,

Yingchen Yu,

Shijian Lu,

Song Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zuhao and Zhang, Jiahui and Yu, Yingchen and Lu, Shijian and Bai, Song}, title = {Versatile Transition Generation with Image-to-Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16981-16990} }
SMGDiff: Soccer Motion Generation using Diffusion Probabilistic Models: Hongdi Yang,

Chengyang Li,

Zhenxuan Wu,

Gaozheng Li,

Jingya Wang,

Jingyi Yu,

Zhuo Su,

Lan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Hongdi and Li, Chengyang and Wu, Zhenxuan and Li, Gaozheng and Wang, Jingya and Yu, Jingyi and Su, Zhuo and Xu, Lan}, title = {SMGDiff: Soccer Motion Generation using Diffusion Probabilistic Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11807-11817} }
Extrapolated Urban View Synthesis Benchmark: Xiangyu Han,

Zhen Jia,

Boyi Li,

Yan Wang,

Boris Ivanovic,

Yurong You,

Lingjie Liu,

Yue Wang,

Marco Pavone,

Chen Feng,

Yiming Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Xiangyu and Jia, Zhen and Li, Boyi and Wang, Yan and Ivanovic, Boris and You, Yurong and Liu, Lingjie and Wang, Yue and Pavone, Marco and Feng, Chen and Li, Yiming}, title = {Extrapolated Urban View Synthesis Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28718-28728} }
Emulating Self-attention with Convolution for Efficient Image Super-Resolution: Dongheon Lee,

Seokju Yun,

Youngmin Ro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Dongheon and Yun, Seokju and Ro, Youngmin}, title = {Emulating Self-attention with Convolution for Efficient Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24467-24477} }
CoDa-4DGS: Dynamic Gaussian Splatting with Context and Deformation Awareness for Autonomous Driving: Rui Song,

Chenwei Liang,

Yan Xia,

Walter Zimmer,

Hu Cao,

Holger Caesar,

Andreas Festag,

Alois Knoll; [pdf] [supp]
[bibtex]
@InProceedings{Song_2025_ICCV, author = {Song, Rui and Liang, Chenwei and Xia, Yan and Zimmer, Walter and Cao, Hu and Caesar, Holger and Festag, Andreas and Knoll, Alois}, title = {CoDa-4DGS: Dynamic Gaussian Splatting with Context and Deformation Awareness for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28031-28041} }
RI3D: Few-Shot Gaussian Splatting With Repair and Inpainting Diffusion Priors: Avinash Paliwal,

Xilong Zhou,

Wei Ye,

Jinhui Xiong,

Rakesh Ranjan,

Nima Khademi Kalantari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Paliwal_2025_ICCV, author = {Paliwal, Avinash and Zhou, Xilong and Ye, Wei and Xiong, Jinhui and Ranjan, Rakesh and Kalantari, Nima Khademi}, title = {RI3D: Few-Shot Gaussian Splatting With Repair and Inpainting Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25094-25103} }
Text Embedding Knows How to Quantize Text-Guided Diffusion Models: Hongjae Lee,

Myungjun Son,

Dongjea Kang,

Seung-Won Jung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Hongjae and Son, Myungjun and Kang, Dongjea and Jung, Seung-Won}, title = {Text Embedding Knows How to Quantize Text-Guided Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15426-15436} }
UniVerse: Unleashing the Scene Prior of Video Diffusion Models for Robust Radiance Field Reconstruction: Jin Cao,

Hongrui Wu,

Ziyong Feng,

Hujun Bao,

Xiaowei Zhou,

Sida Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Jin and Wu, Hongrui and Feng, Ziyong and Bao, Hujun and Zhou, Xiaowei and Peng, Sida}, title = {UniVerse: Unleashing the Scene Prior of Video Diffusion Models for Robust Radiance Field Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27031-27041} }
Shape of Motion: 4D Reconstruction from a Single Video: Qianqian Wang,

Vickie Ye,

Hang Gao,

Weijia Zeng,

Jake Austin,

Zhengqi Li,

Angjoo Kanazawa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Qianqian and Ye, Vickie and Gao, Hang and Zeng, Weijia and Austin, Jake and Li, Zhengqi and Kanazawa, Angjoo}, title = {Shape of Motion: 4D Reconstruction from a Single Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9660-9672} }
From Gallery to Wrist: Realistic 3D Bracelet Insertion in Videos: Chenjian Gao,

Lihe Ding,

Rui Han,

Zhanpeng Huang,

Zibin Wang,

Tianfan Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Chenjian and Ding, Lihe and Han, Rui and Huang, Zhanpeng and Wang, Zibin and Xue, Tianfan}, title = {From Gallery to Wrist: Realistic 3D Bracelet Insertion in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25712-25721} }
Representing 3D Shapes with 64 Latent Vectors for 3D Diffusion Models: In Cho,

Youngbeom Yoo,

Subin Jeon,

Seon Joo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cho_2025_ICCV, author = {Cho, In and Yoo, Youngbeom and Jeon, Subin and Kim, Seon Joo}, title = {Representing 3D Shapes with 64 Latent Vectors for 3D Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28556-28566} }
Backdoor Defense via Enhanced Splitting and Trap Isolation: Hongrui Yu,

Lu Qi,

Wanyu Lin,

Jian Chen,

Hailong Sun,

Chengbin Sun; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Hongrui and Qi, Lu and Lin, Wanyu and Chen, Jian and Sun, Hailong and Sun, Chengbin}, title = {Backdoor Defense via Enhanced Splitting and Trap Isolation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1708-1717} }
FedVLA: Federated Vision-Language-Action Learning with Dual Gating Mixture-of-Experts for Robotic Manipulation: Cui Miao,

Tao Chang,

Meihan Wu,

Hongbin Xu,

Chun Li,

Ming Li,

Xiaodong Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Miao_2025_ICCV, author = {Miao, Cui and Chang, Tao and Wu, Meihan and Xu, Hongbin and Li, Chun and Li, Ming and Wang, Xiaodong}, title = {FedVLA: Federated Vision-Language-Action Learning with Dual Gating Mixture-of-Experts for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6904-6913} }
BokehDiff: Neural Lens Blur with One-Step Diffusion: Chengxuan Zhu,

Qingnan Fan,

Qi Zhang,

Jinwei Chen,

Huaqi Zhang,

Chao Xu,

Boxin Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Chengxuan and Fan, Qingnan and Zhang, Qi and Chen, Jinwei and Zhang, Huaqi and Xu, Chao and Shi, Boxin}, title = {BokehDiff: Neural Lens Blur with One-Step Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9508-9518} }
CutS3D: Cutting Semantics in 3D for 2D Unsupervised Instance Segmentation: Leon Sick,

Dominik Engel,

Sebastian Hartwig,

Pedro Hermosilla,

Timo Ropinski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sick_2025_ICCV, author = {Sick, Leon and Engel, Dominik and Hartwig, Sebastian and Hermosilla, Pedro and Ropinski, Timo}, title = {CutS3D: Cutting Semantics in 3D for 2D Unsupervised Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21265-21275} }
Perspective-Aware Reasoning in Vision-Language Models via Mental Imagery Simulation: Phillip Y. Lee,

Jihyeon Je,

Chanho Park,

Mikaela Angelina Uy,

Leonidas Guibas,

Minhyuk Sung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Phillip Y. and Je, Jihyeon and Park, Chanho and Uy, Mikaela Angelina and Guibas, Leonidas and Sung, Minhyuk}, title = {Perspective-Aware Reasoning in Vision-Language Models via Mental Imagery Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9241-9251} }
Salvaging the Overlooked: Leveraging Class-Aware Contrastive Learning for Multi-Class Anomaly Detection: Lei Fan,

Junjie Huang,

Donglin Di,

Anyang Su,

Tianyou Song,

Maurice Pagnucco,

Yang Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Lei and Huang, Junjie and Di, Donglin and Su, Anyang and Song, Tianyou and Pagnucco, Maurice and Song, Yang}, title = {Salvaging the Overlooked: Leveraging Class-Aware Contrastive Learning for Multi-Class Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21419-21428} }
GUAVA: Generalizable Upper Body 3D Gaussian Avatar: Dongbin Zhang,

Yunfei Liu,

Lijian Lin,

Ye Zhu,

Yang Li,

Minghan Qin,

Yu Li,

Haoqian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Dongbin and Liu, Yunfei and Lin, Lijian and Zhu, Ye and Li, Yang and Qin, Minghan and Li, Yu and Wang, Haoqian}, title = {GUAVA: Generalizable Upper Body 3D Gaussian Avatar}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14205-14217} }
PASG: A Closed-Loop Framework for Automated Geometric Primitive Extraction and Semantic Anchoring in Robotic Manipulation: Zhihao Zhu,

Yifan Zheng,

Siyu Pan,

Yaohui Jin,

Yao Mu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Zhihao and Zheng, Yifan and Pan, Siyu and Jin, Yaohui and Mu, Yao}, title = {PASG: A Closed-Loop Framework for Automated Geometric Primitive Extraction and Semantic Anchoring in Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8950-8960} }
FastVAR: Linear Visual Autoregressive Modeling via Cached Token Pruning: Hang Guo,

Yawei Li,

Taolin Zhang,

Jiangshan Wang,

Tao Dai,

Shu-Tao Xia,

Luca Benini; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Hang and Li, Yawei and Zhang, Taolin and Wang, Jiangshan and Dai, Tao and Xia, Shu-Tao and Benini, Luca}, title = {FastVAR: Linear Visual Autoregressive Modeling via Cached Token Pruning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19011-19021} }
Online Generic Event Boundary Detection: Hyungrok Jung,

Daneul Kim,

Seunggyun Lim,

Jeany Son,

Jonghyun Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_ICCV, author = {Jung, Hyungrok and Kim, Daneul and Lim, Seunggyun and Son, Jeany and Choi, Jonghyun}, title = {Online Generic Event Boundary Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13741-13750} }
Registration beyond Points: General Affine Subspace Alignment via Geodesic Distance on Grassmann Manifold: Jaeho Shin,

Hyeonjae Gil,

Junwoo Jang,

Maani Ghaffari,

Ayoung Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shin_2025_ICCV, author = {Shin, Jaeho and Gil, Hyeonjae and Jang, Junwoo and Ghaffari, Maani and Kim, Ayoung}, title = {Registration beyond Points: General Affine Subspace Alignment via Geodesic Distance on Grassmann Manifold}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3767-3776} }
Disentangling Instance and Scene Contexts for 3D Semantic Scene Completion: Enyu Liu,

En Yu,

Sijia Chen,

Wenbing Tao; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Enyu and Yu, En and Chen, Sijia and Tao, Wenbing}, title = {Disentangling Instance and Scene Contexts for 3D Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26999-27009} }
World4Drive: End-to-End Autonomous Driving via Intention-aware Physical Latent World Model: Yupeng Zheng,

Pengxuan Yang,

Zebin Xing,

Qichao Zhang,

Yuhang Zheng,

Yinfeng Gao,

Pengfei Li,

Teng Zhang,

Zhongpu Xia,

Peng Jia,

XianPeng Lang,

Dongbin Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Yupeng and Yang, Pengxuan and Xing, Zebin and Zhang, Qichao and Zheng, Yuhang and Gao, Yinfeng and Li, Pengfei and Zhang, Teng and Xia, Zhongpu and Jia, Peng and Lang, XianPeng and Zhao, Dongbin}, title = {World4Drive: End-to-End Autonomous Driving via Intention-aware Physical Latent World Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28632-28642} }
Online Language Splatting: Saimouli Katragadda,

Cho-Ying Wu,

Yuliang Guo,

Xinyu Huang,

Guoquan Huang,

Liu Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Katragadda_2025_ICCV, author = {Katragadda, Saimouli and Wu, Cho-Ying and Guo, Yuliang and Huang, Xinyu and Huang, Guoquan and Ren, Liu}, title = {Online Language Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25882-25892} }
Amodal Depth Anything: Amodal Depth Estimation in the Wild: Zhenyu Li,

Mykola Lavreniuk,

Jian Shi,

Shariq Farooq Bhat,

Peter Wonka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhenyu and Lavreniuk, Mykola and Shi, Jian and Bhat, Shariq Farooq and Wonka, Peter}, title = {Amodal Depth Anything: Amodal Depth Estimation in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9673-9682} }
Hierarchical Variational Test-Time Prompt Generation for Zero-Shot Generalization: Zhaoyang Wu,

Fang Liu,

Licheng Jiao,

Shuo Li,

Lingling Li,

Xu Liu,

Puhua Chen,

Wenping Ma; [pdf]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Zhaoyang and Liu, Fang and Jiao, Licheng and Li, Shuo and Li, Lingling and Liu, Xu and Chen, Puhua and Ma, Wenping}, title = {Hierarchical Variational Test-Time Prompt Generation for Zero-Shot Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2325-2335} }
VTimeCoT: Thinking by Drawing for Video Temporal Grounding and Reasoning: Jinglei Zhang,

Yuanfan Guo,

Rolandos Alexandros Potamias,

Jiankang Deng,

Hang Xu,

Chao Ma; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jinglei and Guo, Yuanfan and Potamias, Rolandos Alexandros and Deng, Jiankang and Xu, Hang and Ma, Chao}, title = {VTimeCoT: Thinking by Drawing for Video Temporal Grounding and Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24203-24213} }
RA-BUSSeg: Relation-aware Semi-supervised Breast Ultrasound Image Segmentation via Adjacent Propagation and Cross-layer Alignment: Wanting Zhang,

Zhenhui Ding,

Guilian Chen,

Huisi Wu,

Jing Qin; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wanting and Ding, Zhenhui and Chen, Guilian and Wu, Huisi and Qin, Jing}, title = {RA-BUSSeg: Relation-aware Semi-supervised Breast Ultrasound Image Segmentation via Adjacent Propagation and Cross-layer Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21689-21698} }
Factorized Learning for Temporally Grounded Video-Language Models: Wenzheng Zeng,

Difei Gao,

Mike Zheng Shou,

Hwee Tou Ng; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Wenzheng and Gao, Difei and Shou, Mike Zheng and Ng, Hwee Tou}, title = {Factorized Learning for Temporally Grounded Video-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20683-20693} }
Decouple to Reconstruct: High Quality UHD Restoration via Active Feature Disentanglement and Reversible Fusion: Yidi Liu,

Dong Li,

Yuxin Ma,

Jie Huang,

Wenlong Zhang,

Xueyang Fu,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yidi and Li, Dong and Ma, Yuxin and Huang, Jie and Zhang, Wenlong and Fu, Xueyang and Zha, Zheng-Jun}, title = {Decouple to Reconstruct: High Quality UHD Restoration via Active Feature Disentanglement and Reversible Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11622-11631} }
Generating Multi-Image Synthetic Data for Text-to-Image Customization: Nupur Kumari,

Xi Yin,

Jun-Yan Zhu,

Ishan Misra,

Samaneh Azadi; [pdf] [arXiv]
[bibtex]
@InProceedings{Kumari_2025_ICCV, author = {Kumari, Nupur and Yin, Xi and Zhu, Jun-Yan and Misra, Ishan and Azadi, Samaneh}, title = {Generating Multi-Image Synthetic Data for Text-to-Image Customization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16524-16534} }
WeaveSeg: Iterative Contrast-weaving and Spectral Feature-refining for Nuclei Instance Segmentation: Jiajia Li,

Huisi Wu,

Jing Qin; [pdf]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jiajia and Wu, Huisi and Qin, Jing}, title = {WeaveSeg: Iterative Contrast-weaving and Spectral Feature-refining for Nuclei Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21984-21993} }
Towards Long-Horizon Vision-Language-Action System: Reasoning, Acting and Memory: Daixun Li,

Yusi Zhang,

Mingxiang Cao,

Donglai Liu,

Weiying Xie,

Tianlin Hui,

Lunkai Lin,

Zhiqiang Xie,

Yunsong Li; [pdf]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Daixun and Zhang, Yusi and Cao, Mingxiang and Liu, Donglai and Xie, Weiying and Hui, Tianlin and Lin, Lunkai and Xie, Zhiqiang and Li, Yunsong}, title = {Towards Long-Horizon Vision-Language-Action System: Reasoning, Acting and Memory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6839-6848} }
MR-FIQA: Face Image Quality Assessment with Multi-Reference Representations from Synthetic Data Generation: Fu-Zhao Ou,

Chongyi Li,

Shiqi Wang,

Sam Kwong; [pdf] [supp]
[bibtex]
@InProceedings{Ou_2025_ICCV, author = {Ou, Fu-Zhao and Li, Chongyi and Wang, Shiqi and Kwong, Sam}, title = {MR-FIQA: Face Image Quality Assessment with Multi-Reference Representations from Synthetic Data Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12915-12925} }
IDFace: Face Template Protection for Efficient and Secure Identification: Sunpill Kim,

Seunghun Paik,

Chanwoo Hwang,

Dongsoo Kim,

Junbum Shin,

Jae Hong Seo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Sunpill and Paik, Seunghun and Hwang, Chanwoo and Kim, Dongsoo and Shin, Junbum and Seo, Jae Hong}, title = {IDFace: Face Template Protection for Efficient and Secure Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13995-14005} }
A Real-world Display Inverse Rendering Dataset: Seokjun Choi,

Hoon-Gyu Chung,

Yujin Jeon,

Giljoo Nam,

Seung-Hwan Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2025_ICCV, author = {Choi, Seokjun and Chung, Hoon-Gyu and Jeon, Yujin and Nam, Giljoo and Baek, Seung-Hwan}, title = {A Real-world Display Inverse Rendering Dataset}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25272-25283} }
Lyra: An Efficient and Speech-Centric Framework for Omni-Cognition: Zhisheng Zhong,

Chengyao Wang,

Yuqi Liu,

Senqiao Yang,

Longxiang Tang,

Yuechen Zhang,

Jingyao Li,

Tianyuan Qu,

Yanwei Li,

Yukang Chen,

Shaozuo Yu,

Sitong Wu,

Eric Lo,

Shu Liu,

Jiaya Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Zhisheng and Wang, Chengyao and Liu, Yuqi and Yang, Senqiao and Tang, Longxiang and Zhang, Yuechen and Li, Jingyao and Qu, Tianyuan and Li, Yanwei and Chen, Yukang and Yu, Shaozuo and Wu, Sitong and Lo, Eric and Liu, Shu and Jia, Jiaya}, title = {Lyra: An Efficient and Speech-Centric Framework for Omni-Cognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3694-3704} }
MEH: A Multi-Style Dataset and Toolkit for Advancing Egyptian Hieroglyph Recognition: Maksim Golyadkin,

Valeria Rubanova,

Aleksandr Utkov,

Dmitry Nikolotov,

Ilya Makarov; [pdf]
[bibtex]
@InProceedings{Golyadkin_2025_ICCV, author = {Golyadkin, Maksim and Rubanova, Valeria and Utkov, Aleksandr and Nikolotov, Dmitry and Makarov, Ilya}, title = {MEH: A Multi-Style Dataset and Toolkit for Advancing Egyptian Hieroglyph Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24488-24496} }
D2ST-Adapter: Disentangled-and-Deformable Spatio-Temporal Adapter for Few-shot Action Recognition: Wenjie Pei,

Qizhong Tan,

Guangming Lu,

Jiandong Tian,

Jun Yu; [pdf] [supp]
[bibtex]
@InProceedings{Pei_2025_ICCV, author = {Pei, Wenjie and Tan, Qizhong and Lu, Guangming and Tian, Jiandong and Yu, Jun}, title = {D2ST-Adapter: Disentangled-and-Deformable Spatio-Temporal Adapter for Few-shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11317-11326} }
Synchronization of Multiple Videos: Avihai Naaman,

Ron Shapira Weber,

Oren Freifeld; [pdf] [supp]
[bibtex]
@InProceedings{Naaman_2025_ICCV, author = {Naaman, Avihai and Weber, Ron Shapira and Freifeld, Oren}, title = {Synchronization of Multiple Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12514-12523} }
Hi-Gaussian: Hierarchical Gaussians under Normalized Spherical Projection for Single-View 3D Reconstruction: Binjian Xie,

Pengju Zhang,

Hao Wei,

Yihong Wu; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Binjian and Zhang, Pengju and Wei, Hao and Wu, Yihong}, title = {Hi-Gaussian: Hierarchical Gaussians under Normalized Spherical Projection for Single-View 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28664-28673} }
MotionAgent: Fine-grained Controllable Video Generation via Motion Field Agent: Xinyao Liao,

Xianfang Zeng,

Liao Wang,

Gang Yu,

Guosheng Lin,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Xinyao and Zeng, Xianfang and Wang, Liao and Yu, Gang and Lin, Guosheng and Zhang, Chi}, title = {MotionAgent: Fine-grained Controllable Video Generation via Motion Field Agent}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11305-11316} }
The Scalability of Simplicity: Empirical Analysis of Vision-Language Learning with a Single Transformer: Weixian Lei,

Jiacong Wang,

Haochen Wang,

Xiangtai Li,

Jun Hao Liew,

Jiashi Feng,

Zilong Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2025_ICCV, author = {Lei, Weixian and Wang, Jiacong and Wang, Haochen and Li, Xiangtai and Liew, Jun Hao and Feng, Jiashi and Huang, Zilong}, title = {The Scalability of Simplicity: Empirical Analysis of Vision-Language Learning with a Single Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20758-20769} }
PacGDC: Label-Efficient Generalizable Depth Completion with Projection Ambiguity and Consistency: Haotian Wang,

Aoran Xiao,

Xiaoqin Zhang,

Meng Yang,

Shijian Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haotian and Xiao, Aoran and Zhang, Xiaoqin and Yang, Meng and Lu, Shijian}, title = {PacGDC: Label-Efficient Generalizable Depth Completion with Projection Ambiguity and Consistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7709-7720} }
GSV3D: Gaussian Splatting-based Geometric Distillation with Stable Video Diffusion for Single-Image 3D Object Generation: Ye Tao,

Jiawei Zhang,

Yahao Shi,

Dongqing Zou,

Bin Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2025_ICCV, author = {Tao, Ye and Zhang, Jiawei and Shi, Yahao and Zou, Dongqing and Zhou, Bin}, title = {GSV3D: Gaussian Splatting-based Geometric Distillation with Stable Video Diffusion for Single-Image 3D Object Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7751-7760} }
OpenAnimals: Revisiting Person Re-Identification for Animals Towards Better Generalization: Saihui Hou,

Panjian Huang,

Zengbin Wang,

Yuan Liu,

Zeyu Li,

Man Zhang,

Yongzhen Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2025_ICCV, author = {Hou, Saihui and Huang, Panjian and Wang, Zengbin and Liu, Yuan and Li, Zeyu and Zhang, Man and Huang, Yongzhen}, title = {OpenAnimals: Revisiting Person Re-Identification for Animals Towards Better Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14369-14379} }
VSRM: A Robust Mamba-Based Framework for Video Super-Resolution: Dinh Phu Tran,

Dao Duy Hung,

Daeyoung Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2025_ICCV, author = {Tran, Dinh Phu and Hung, Dao Duy and Kim, Daeyoung}, title = {VSRM: A Robust Mamba-Based Framework for Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14711-14721} }
End-to-End Multi-Modal Diffusion Mamba: Chunhao Lu,

Qiang Lu,

Meichen Dong,

Jake Luo; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Chunhao and Lu, Qiang and Dong, Meichen and Luo, Jake}, title = {End-to-End Multi-Modal Diffusion Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20529-20540} }
Guiding Diffusion-Based Articulated Object Generation by Partial Point Cloud Alignment and Physical Plausibility Constraints: Jens U. Kreber,

Joerg Stueckler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kreber_2025_ICCV, author = {Kreber, Jens U. and Stueckler, Joerg}, title = {Guiding Diffusion-Based Articulated Object Generation by Partial Point Cloud Alignment and Physical Plausibility Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3206-3214} }
PoseSyn: Synthesizing Diverse 3D Pose Data from In-the-Wild 2D Data: ChangHee Yang,

Hyeonseop Song,

Seokhun Choi,

Seungwoo Lee,

Jaechul Kim,

Hoseok Do; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, ChangHee and Song, Hyeonseop and Choi, Seokhun and Lee, Seungwoo and Kim, Jaechul and Do, Hoseok}, title = {PoseSyn: Synthesizing Diverse 3D Pose Data from In-the-Wild 2D Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5611-5621} }
Backdoor Attacks on Neural Networks via One-Bit Flip: Xiang Li,

Lannan Luo,

Qiang Zeng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xiang and Luo, Lannan and Zeng, Qiang}, title = {Backdoor Attacks on Neural Networks via One-Bit Flip}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4328-4338} }
Vector Contrastive Learning For Pixel-Wise Pretraining In Medical Vision: Yuting He,

Shuo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Yuting and Li, Shuo}, title = {Vector Contrastive Learning For Pixel-Wise Pretraining In Medical Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19827-19837} }
Benefit From Seen: Enhancing Open-Vocabulary Object Detection by Bridging Visual and Textual Co-Occurrence Knowledge: Yanqi Li,

Jianwei Niu,

Tao Ren; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yanqi and Niu, Jianwei and Ren, Tao}, title = {Benefit From Seen: Enhancing Open-Vocabulary Object Detection by Bridging Visual and Textual Co-Occurrence Knowledge}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22110-22119} }
EEdit : Rethinking the Spatial and Temporal Redundancy for Efficient Image Editing: Zexuan Yan,

Yue Ma,

Chang Zou,

Wenteng Chen,

Qifeng Chen,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Zexuan and Ma, Yue and Zou, Chang and Chen, Wenteng and Chen, Qifeng and Zhang, Linfeng}, title = {EEdit : Rethinking the Spatial and Temporal Redundancy for Efficient Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17474-17484} }
Enhancing Adversarial Transferability by Balancing Exploration and Exploitation with Gradient-Guided Sampling: Zenghao Niu,

Weicheng Xie,

Siyang Song,

Zitong Yu,

Feng Liu,

Linlin Shen; [pdf] [supp]
[bibtex]
@InProceedings{Niu_2025_ICCV, author = {Niu, Zenghao and Xie, Weicheng and Song, Siyang and Yu, Zitong and Liu, Feng and Shen, Linlin}, title = {Enhancing Adversarial Transferability by Balancing Exploration and Exploitation with Gradient-Guided Sampling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3885-3894} }
SuperEdit: Rectifying and Facilitating Supervision for Instruction-Based Image Editing: Ming Li,

Xin Gu,

Fan Chen,

Xiaoying Xing,

Longyin Wen,

Chen Chen,

Sijie Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Ming and Gu, Xin and Chen, Fan and Xing, Xiaoying and Wen, Longyin and Chen, Chen and Zhu, Sijie}, title = {SuperEdit: Rectifying and Facilitating Supervision for Instruction-Based Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19206-19215} }
HPSv3: Towards Wide-Spectrum Human Preference Score: Yuhang Ma,

Xiaoshi Wu,

Keqiang Sun,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Yuhang and Wu, Xiaoshi and Sun, Keqiang and Li, Hongsheng}, title = {HPSv3: Towards Wide-Spectrum Human Preference Score}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15086-15095} }
NullSwap: Proactive Identity Cloaking Against Deepfake Face Swapping: Tianyi Wang,

Shuaicheng Niu,

Harry Cheng,

Xiao Zhang,

Yinglong Wang; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Tianyi and Niu, Shuaicheng and Cheng, Harry and Zhang, Xiao and Wang, Yinglong}, title = {NullSwap: Proactive Identity Cloaking Against Deepfake Face Swapping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9945-9954} }
SynAD: Enhancing Real-World End-to-End Autonomous Driving Models through Synthetic Data Integration: Jongsuk Kim,

Jaeyoung Lee,

Gyojin Han,

Dong-Jae Lee,

Minki Jeong,

Junmo Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jongsuk and Lee, Jaeyoung and Han, Gyojin and Lee, Dong-Jae and Jeong, Minki and Kim, Junmo}, title = {SynAD: Enhancing Real-World End-to-End Autonomous Driving Models through Synthetic Data Integration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25197-25206} }
Purge-Gate: Backpropagation-Free Test-Time Adaptation for Point Clouds Classification via Token purging: Moslem Yazdanpanah,

Ali Bahri,

Mehrdad Noori,

Sahar Dastani,

Gustavo Adolfo Vargas Hakim,

David Osowiechi,

Ismail Ben Ayed,

Christian Desrosiers; [pdf] [supp]
[bibtex]
@InProceedings{Yazdanpanah_2025_ICCV, author = {Yazdanpanah, Moslem and Bahri, Ali and Noori, Mehrdad and Dastani, Sahar and Hakim, Gustavo Adolfo Vargas and Osowiechi, David and Ben Ayed, Ismail and Desrosiers, Christian}, title = {Purge-Gate: Backpropagation-Free Test-Time Adaptation for Point Clouds Classification via Token purging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27640-27649} }
External Knowledge Injection for CLIP-Based Class-Incremental Learning: Da-Wei Zhou,

Kai-Wen Li,

Jingyi Ning,

Han-Jia Ye,

Lijun Zhang,

De-Chuan Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Da-Wei and Li, Kai-Wen and Ning, Jingyi and Ye, Han-Jia and Zhang, Lijun and Zhan, De-Chuan}, title = {External Knowledge Injection for CLIP-Based Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3314-3325} }
GeoProg3D: Compositional Visual Reasoning for City-Scale 3D Language Fields: Shunsuke Yasuki,

Taiki Miyanishi,

Nakamasa Inoue,

Shuhei Kurita,

Koya Sakamoto,

Daichi Azuma,

Masato Taki,

Yutaka Matsuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yasuki_2025_ICCV, author = {Yasuki, Shunsuke and Miyanishi, Taiki and Inoue, Nakamasa and Kurita, Shuhei and Sakamoto, Koya and Azuma, Daichi and Taki, Masato and Matsuo, Yutaka}, title = {GeoProg3D: Compositional Visual Reasoning for City-Scale 3D Language Fields}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9737-9748} }
Geometry Distributions: Biao Zhang,

Jing Ren,

Peter Wonka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Biao and Ren, Jing and Wonka, Peter}, title = {Geometry Distributions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1495-1505} }
PVChat: Personalized Video Chat with One-Shot Learning: Yufei Shi,

Weilong Yan,

Gang Xu,

Yumeng Li,

Yucheng Chen,

Zhenxi Li,

Fei Yu,

Ming Li,

Si Yong Yeo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Yufei and Yan, Weilong and Xu, Gang and Li, Yumeng and Chen, Yucheng and Li, Zhenxi and Yu, Fei and Li, Ming and Yeo, Si Yong}, title = {PVChat: Personalized Video Chat with One-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23321-23331} }
VertexRegen: Mesh Generation with Continuous Level of Detail: Xiang Zhang,

Yawar Siddiqui,

Armen Avetisyan,

Chris Xie,

Jakob Engel,

Henry Howard-Jenkins; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiang and Siddiqui, Yawar and Avetisyan, Armen and Xie, Chris and Engel, Jakob and Howard-Jenkins, Henry}, title = {VertexRegen: Mesh Generation with Continuous Level of Detail}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12570-12580} }
Low-Light Image Enhancement Using Event-Based Illumination Estimation: Lei Sun,

Yuhan Bao,

Jiajun Zhai,

Jingyun Liang,

Yulun Zhang,

Kaiwei Wang,

Danda Pani Paudel,

Luc Van Gool; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Lei and Bao, Yuhan and Zhai, Jiajun and Liang, Jingyun and Zhang, Yulun and Wang, Kaiwei and Paudel, Danda Pani and Van Gool, Luc}, title = {Low-Light Image Enhancement Using Event-Based Illumination Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6667-6677} }
FontAnimate: High Quality Few-shot Font Generation via Animating Font Transfer Process: Bin Fu,

Zixuan Wang,

Kainan Yan,

Shitian Zhao,

Qi Qin,

Jie Wen,

Junjun He,

Peng Gao; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Bin and Wang, Zixuan and Yan, Kainan and Zhao, Shitian and Qin, Qi and Wen, Jie and He, Junjun and Gao, Peng}, title = {FontAnimate: High Quality Few-shot Font Generation via Animating Font Transfer Process}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16015-16025} }
A Good Teacher Adapts Their Knowledge for Distillation: Chengyao Qian,

Trung Le,

Mehrtash Harandi; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2025_ICCV, author = {Qian, Chengyao and Le, Trung and Harandi, Mehrtash}, title = {A Good Teacher Adapts Their Knowledge for Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1239-1248} }
GenFlowRL: Shaping Rewards with Generative Object-Centric Flow in Visual Reinforcement Learning: Kelin Yu,

Sheng Zhang,

Harshit Soora,

Furong Huang,

Heng Huang,

Pratap Tokekar,

Ruohan Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Kelin and Zhang, Sheng and Soora, Harshit and Huang, Furong and Huang, Heng and Tokekar, Pratap and Gao, Ruohan}, title = {GenFlowRL: Shaping Rewards with Generative Object-Centric Flow in Visual Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13183-13192} }
MAVFlow: Preserving Paralinguistic Elements with Conditional Flow Matching for Zero-Shot AV2AV Multilingual Translation: Sungwoo Cho,

Jeongsoo Choi,

Sungnyun Kim,

Se-Young Yun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cho_2025_ICCV, author = {Cho, Sungwoo and Choi, Jeongsoo and Kim, Sungnyun and Yun, Se-Young}, title = {MAVFlow: Preserving Paralinguistic Elements with Conditional Flow Matching for Zero-Shot AV2AV Multilingual Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13151-13161} }
Video Color Grading via Look-Up Table Generation: Seunghyun Shin,

Dongmin Shin,

Jisu Shin,

Hae-Gon Jeon,

Joon-Young Lee; [pdf] [arXiv]
[bibtex]
@InProceedings{Shin_2025_ICCV, author = {Shin, Seunghyun and Shin, Dongmin and Shin, Jisu and Jeon, Hae-Gon and Lee, Joon-Young}, title = {Video Color Grading via Look-Up Table Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19141-19152} }
NeuraLeaf: Neural Parametric Leaf Models with Shape and Deformation Disentanglement: Yang Yang,

Dongni Mao,

Hiroaki Santo,

Yasuyuki Matsushita,

Fumio Okura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Yang and Mao, Dongni and Santo, Hiroaki and Matsushita, Yasuyuki and Okura, Fumio}, title = {NeuraLeaf: Neural Parametric Leaf Models with Shape and Deformation Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28167-28176} }
DC-AR: Efficient Masked Autoregressive Image Generation with Deep Compression Hybrid Tokenizer: Yecheng Wu,

Han Cai,

Junyu Chen,

Zhuoyang Zhang,

Enze Xie,

Jincheng Yu,

Junsong Chen,

Jinyi Hu,

Yao Lu,

Song Han; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Yecheng and Cai, Han and Chen, Junyu and Zhang, Zhuoyang and Xie, Enze and Yu, Jincheng and Chen, Junsong and Hu, Jinyi and Lu, Yao and Han, Song}, title = {DC-AR: Efficient Masked Autoregressive Image Generation with Deep Compression Hybrid Tokenizer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18034-18045} }
BVINet: Unlocking Blind Video Inpainting with Zero Annotations: Zhiliang Wu,

Kerui Chen,

Kun Li,

Hehe Fan,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Zhiliang and Chen, Kerui and Li, Kun and Fan, Hehe and Yang, Yi}, title = {BVINet: Unlocking Blind Video Inpainting with Zero Annotations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14017-14027} }
Semi-supervised Deep Transfer for Regression without Domain Alignment: Mainak Biswas,

Ambedkar Dukkipati,

Devarajan Sridharan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Biswas_2025_ICCV, author = {Biswas, Mainak and Dukkipati, Ambedkar and Sridharan, Devarajan}, title = {Semi-supervised Deep Transfer for Regression without Domain Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {827-836} }
Evading Data Provenance in Deep Neural Networks: Hongyu Zhu,

Sichu Liang,

Wenwen Wang,

Zhuomeng Zhang,

Fangqi Li,

Shi-Lin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Hongyu and Liang, Sichu and Wang, Wenwen and Zhang, Zhuomeng and Li, Fangqi and Wang, Shi-Lin}, title = {Evading Data Provenance in Deep Neural Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1249-1260} }
Vamba: Understanding Hour-Long Videos with Hybrid Mamba-Transformers: Weiming Ren,

Wentao Ma,

Huan Yang,

Cong Wei,

Ge Zhang,

Wenhu Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Weiming and Ma, Wentao and Yang, Huan and Wei, Cong and Zhang, Ge and Chen, Wenhu}, title = {Vamba: Understanding Hour-Long Videos with Hybrid Mamba-Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21197-21208} }
Manual-PA: Learning 3D Part Assembly from Instruction Diagrams: Jiahao Zhang,

Anoop Cherian,

Cristian Rodriguez,

Weijian Deng,

Stephen Gould; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jiahao and Cherian, Anoop and Rodriguez, Cristian and Deng, Weijian and Gould, Stephen}, title = {Manual-PA: Learning 3D Part Assembly from Instruction Diagrams}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6304-6314} }
RMultiplex200K: Toward Reliable Multimodal Process Supervision for Visual Language Models on Telecommunications: Sijia Chen,

Bin Song; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Sijia and Song, Bin}, title = {RMultiplex200K: Toward Reliable Multimodal Process Supervision for Visual Language Models on Telecommunications}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1686-1696} }
Video-T1: Test-time Scaling for Video Generation: Fangfu Liu,

Hanyang Wang,

Yimo Cai,

Kaiyan Zhang,

Xiaohang Zhan,

Yueqi Duan; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Fangfu and Wang, Hanyang and Cai, Yimo and Zhang, Kaiyan and Zhan, Xiaohang and Duan, Yueqi}, title = {Video-T1: Test-time Scaling for Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18671-18681} }
MemoryTalker: Personalized Speech-Driven 3D Facial Animation via Audio-Guided Stylization: Hyung Kyu Kim,

Sangmin Lee,

Hak Gu Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Hyung Kyu and Lee, Sangmin and Kim, Hak Gu}, title = {MemoryTalker: Personalized Speech-Driven 3D Facial Animation via Audio-Guided Stylization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11241-11251} }
Text2Outfit: Controllable Outfit Generation with Multimodal Language Models: Yuanhao Zhai,

Yen-Liang Lin,

Minxu Peng,

Larry S. Davis,

Ashwin Chandramouli,

Junsong Yuan,

David Doermann; [pdf] [supp]
[bibtex]
@InProceedings{Zhai_2025_ICCV, author = {Zhai, Yuanhao and Lin, Yen-Liang and Peng, Minxu and Davis, Larry S. and Chandramouli, Ashwin and Yuan, Junsong and Doermann, David}, title = {Text2Outfit: Controllable Outfit Generation with Multimodal Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16165-16174} }
From Trial to Triumph: Advancing Long Video Understanding via Visual Context Sample Scaling and Self-reward Alignment: Yucheng Suo,

Fan Ma,

Linchao Zhu,

Tianyi Wang,

Fengyun Rao,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Suo_2025_ICCV, author = {Suo, Yucheng and Ma, Fan and Zhu, Linchao and Wang, Tianyi and Rao, Fengyun and Yang, Yi}, title = {From Trial to Triumph: Advancing Long Video Understanding via Visual Context Sample Scaling and Self-reward Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23243-23255} }
CA2C: A Prior-Knowledge-Free Approach for Robust Label Noise Learning via Asymmetric Co-learning and Co-training: Mengmeng Sheng,

Zeren Sun,

Tianfei Zhou,

Xiangbo Shu,

Jinshan Pan,

Yazhou Yao; [pdf]
[bibtex]
@InProceedings{Sheng_2025_ICCV, author = {Sheng, Mengmeng and Sun, Zeren and Zhou, Tianfei and Shu, Xiangbo and Pan, Jinshan and Yao, Yazhou}, title = {CA2C: A Prior-Knowledge-Free Approach for Robust Label Noise Learning via Asymmetric Co-learning and Co-training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {901-911} }
HairCUP: Hair Compositional Universal Prior for 3D Gaussian Avatars: Byungjun Kim,

Shunsuke Saito,

Giljoo Nam,

Tomas Simon,

Jason Saragih,

Hanbyul Joo,

Junxuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Byungjun and Saito, Shunsuke and Nam, Giljoo and Simon, Tomas and Saragih, Jason and Joo, Hanbyul and Li, Junxuan}, title = {HairCUP: Hair Compositional Universal Prior for 3D Gaussian Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9966-9976} }
Prototype-based Contrastive Learning with Stage-wise Progressive Augmentation for Self-Supervised Fine-Grained Learning: Baofeng Tan,

Xiu-Shen Wei,

Lin Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2025_ICCV, author = {Tan, Baofeng and Wei, Xiu-Shen and Zhao, Lin}, title = {Prototype-based Contrastive Learning with Stage-wise Progressive Augmentation for Self-Supervised Fine-Grained Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4125-4134} }
4DSegStreamer: Streaming 4D Panoptic Segmentation via Dual Threads: Ling Liu,

Jun Tian,

Li Yi; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Ling and Tian, Jun and Yi, Li}, title = {4DSegStreamer: Streaming 4D Panoptic Segmentation via Dual Threads}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7089-7098} }
GS-LIVM: Real-Time Photo-Realistic LiDAR-Inertial-Visual Mapping with Gaussian Splatting: Yusen Xie,

Zhenmin Huang,

Jin Wu,

Jun Ma; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Yusen and Huang, Zhenmin and Wu, Jin and Ma, Jun}, title = {GS-LIVM: Real-Time Photo-Realistic LiDAR-Inertial-Visual Mapping with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26869-26878} }
Temporal Unlearnable Examples: Preventing Personal Video Data from Unauthorized Exploitation by Object Tracking: Qiangqiang Wu,

Yi Yu,

Chenqi Kong,

Ziquan Liu,

Jia Wan,

Haoliang Li,

Alex C. Kot,

Antoni B. Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Qiangqiang and Yu, Yi and Kong, Chenqi and Liu, Ziquan and Wan, Jia and Li, Haoliang and Kot, Alex C. and Chan, Antoni B.}, title = {Temporal Unlearnable Examples: Preventing Personal Video Data from Unauthorized Exploitation by Object Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11110-11121} }
Self-Calibrating Gaussian Splatting for Large Field-of-View Reconstruction: Youming Deng,

Wenqi Xian,

Guandao Yang,

Leonidas Guibas,

Gordon Wetzstein,

Steve Marschner,

Paul Debevec; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Youming and Xian, Wenqi and Yang, Guandao and Guibas, Leonidas and Wetzstein, Gordon and Marschner, Steve and Debevec, Paul}, title = {Self-Calibrating Gaussian Splatting for Large Field-of-View Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25124-25133} }
TWIST & SCOUT: Grounding Multimodal LLM-Experts by Forget-Free Tuning: Aritra Bhowmik,

Mohammad Mahdi Derakhshani,

Dennis Koelma,

Yuki M. Asano,

Martin R. Oswald,

Cees G. M. Snoek; [pdf] [arXiv]
[bibtex]
@InProceedings{Bhowmik_2025_ICCV, author = {Bhowmik, Aritra and Derakhshani, Mohammad Mahdi and Koelma, Dennis and Asano, Yuki M. and Oswald, Martin R. and Snoek, Cees G. M.}, title = {TWIST \& SCOUT: Grounding Multimodal LLM-Experts by Forget-Free Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1359-1368} }
Automated Model Evaluation for Object Detection via Prediction Consistency and Reliability: Seungju Yoo,

Hyuk Kwon,

Joong-Won Hwang,

Kibok Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoo_2025_ICCV, author = {Yoo, Seungju and Kwon, Hyuk and Hwang, Joong-Won and Lee, Kibok}, title = {Automated Model Evaluation for Object Detection via Prediction Consistency and Reliability}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19764-19773} }
VSP: Diagnosing the Dual Challenges of Perception and Reasoning in Spatial Planning Tasks for MLLMs: Qiucheng Wu,

Handong Zhao,

Michael Saxon,

Trung Bui,

William Yang Wang,

Yang Zhang,

Shiyu Chang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Qiucheng and Zhao, Handong and Saxon, Michael and Bui, Trung and Wang, William Yang and Zhang, Yang and Chang, Shiyu}, title = {VSP: Diagnosing the Dual Challenges of Perception and Reasoning in Spatial Planning Tasks for MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2270-2280} }
Instance-Level Video Depth in Groups Beyond Occlusions: Yuan Liang,

Yang Zhou,

Ziming Sun,

Tianyi Xiang,

Guiqing Li,

Shengfeng He; [pdf]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Yuan and Zhou, Yang and Sun, Ziming and Xiang, Tianyi and Li, Guiqing and He, Shengfeng}, title = {Instance-Level Video Depth in Groups Beyond Occlusions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7581-7591} }
ODP-Bench: Benchmarking Out-of-Distribution Performance Prediction: Han Yu,

Kehan Li,

Dongbai Li,

Yue He,

Xingxuan Zhang,

Peng Cui; [pdf]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Han and Li, Kehan and Li, Dongbai and He, Yue and Zhang, Xingxuan and Cui, Peng}, title = {ODP-Bench: Benchmarking Out-of-Distribution Performance Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1846-1858} }
Stepping Out of Similar Semantic Space for Open-Vocabulary Segmentation: Yong Liu,

Song-Li Wu,

Sule Bai,

Jiahao Wang,

Yitong Wang,

Yansong Tang; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yong and Wu, Song-Li and Bai, Sule and Wang, Jiahao and Wang, Yitong and Tang, Yansong}, title = {Stepping Out of Similar Semantic Space for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22664-22674} }
Sibai: A Few-Shot Meta-Classifier for Poisoning Detection in Federated Learning: Melanie Götz,

Torsten Krauß,

Alexandra Dmitrienko; [pdf] [supp]
[bibtex]
@InProceedings{Gotz_2025_ICCV, author = {G\"otz, Melanie and Krau{\ss}, Torsten and Dmitrienko, Alexandra}, title = {Sibai: A Few-Shot Meta-Classifier for Poisoning Detection in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3787-3797} }
StyleKeeper: Prevent Content Leakage using Negative Visual Query Guidance: Jaeseok Jeong,

Junho Kim,

Gayoung Lee,

Yunjey Choi,

Youngjung Uh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_ICCV, author = {Jeong, Jaeseok and Kim, Junho and Lee, Gayoung and Choi, Yunjey and Uh, Youngjung}, title = {StyleKeeper: Prevent Content Leakage using Negative Visual Query Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15760-15769} }
Generative Modeling of Shape-Dependent Self-Contact Human Poses: Takehiko Ohkawa,

Jihyun Lee,

Shunsuke Saito,

Jason Saragih,

Fabian Prada,

Yichen Xu,

Shoou-I Yu,

Ryosuke Furuta,

Yoichi Sato,

Takaaki Shiratori; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ohkawa_2025_ICCV, author = {Ohkawa, Takehiko and Lee, Jihyun and Saito, Shunsuke and Saragih, Jason and Prada, Fabian and Xu, Yichen and Yu, Shoou-I and Furuta, Ryosuke and Sato, Yoichi and Shiratori, Takaaki}, title = {Generative Modeling of Shape-Dependent Self-Contact Human Poses}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5426-5436} }
Frequency-Guided Diffusion for Training-Free Text-Driven Image Translation: Zheng Gao,

Jifei Song,

Zhensong Zhang,

Jiankang Deng,

Ioannis Patras; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Zheng and Song, Jifei and Zhang, Zhensong and Deng, Jiankang and Patras, Ioannis}, title = {Frequency-Guided Diffusion for Training-Free Text-Driven Image Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19195-19205} }
DictAS: A Framework for Class-Generalizable Few-Shot Anomaly Segmentation via Dictionary Lookup: Zhen Qu,

Xian Tao,

Xinyi Gong,

ShiChen Qu,

Xiaopei Zhang,

Xingang Wang,

Fei Shen,

Zhengtao Zhang,

Mukesh Prasad,

Guiguang Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_ICCV, author = {Qu, Zhen and Tao, Xian and Gong, Xinyi and Qu, ShiChen and Zhang, Xiaopei and Wang, Xingang and Shen, Fei and Zhang, Zhengtao and Prasad, Mukesh and Ding, Guiguang}, title = {DictAS: A Framework for Class-Generalizable Few-Shot Anomaly Segmentation via Dictionary Lookup}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20519-20528} }
DAP-MAE: Domain-Adaptive Point Cloud Masked Autoencoder for Effective Cross-Domain Learning: Ziqi Gao,

Qiufu Li,

Linlin Shen; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Ziqi and Li, Qiufu and Shen, Linlin}, title = {DAP-MAE: Domain-Adaptive Point Cloud Masked Autoencoder for Effective Cross-Domain Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3488-3498} }
6DOPE-GS: Online 6D Object Pose Estimation using Gaussian Splatting: Yufeng Jin,

Vignesh Prasad,

Snehal Jauhri,

Mathias Franzius,

Georgia Chalvatzaki; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Yufeng and Prasad, Vignesh and Jauhri, Snehal and Franzius, Mathias and Chalvatzaki, Georgia}, title = {6DOPE-GS: Online 6D Object Pose Estimation using Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8032-8043} }
Incremental Few-Shot Semantic Segmentation via Multi-Level Switchable Visual Prompts: Maoxian Wan,

Kaige Li,

Qichuan Geng,

Weimin Shi,

Zhong Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wan_2025_ICCV, author = {Wan, Maoxian and Li, Kaige and Geng, Qichuan and Shi, Weimin and Zhou, Zhong}, title = {Incremental Few-Shot Semantic Segmentation via Multi-Level Switchable Visual Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24113-24122} }
Faster and Better 3D Splatting via Group Training: Chengbo Wang,

Guozheng Ma,

Yifei Xue,

Yizhen Lao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Chengbo and Ma, Guozheng and Xue, Yifei and Lao, Yizhen}, title = {Faster and Better 3D Splatting via Group Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27968-27977} }
PLMP - Point-Line Minimal Problems for Projective SfM: Kim Kiehn,

Albin Ahlbäck,

Kathlén Kohn; [pdf] [supp]
[bibtex]
@InProceedings{Kiehn_2025_ICCV, author = {Kiehn, Kim and Ahlb\"ack, Albin and Kohn, Kathl\'en}, title = {PLMP - Point-Line Minimal Problems for Projective SfM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8558-8567} }
Flow-MIL: Constructing Highly-expressive Latent Feature Space For Whole Slide Image Classification Using Normalizing Flow: Yingfan Ma,

Bohan An,

Ao Shen,

Mingzhi Yuan,

Minghong Duan,

Manning Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Yingfan and An, Bohan and Shen, Ao and Yuan, Mingzhi and Duan, Minghong and Wang, Manning}, title = {Flow-MIL: Constructing Highly-expressive Latent Feature Space For Whole Slide Image Classification Using Normalizing Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23561-23570} }
3DRealCar: An In-the-wild RGB-D Car Dataset with 360-degree Views: Xiaobiao Du,

Yida Wang,

Haiyang Sun,

Zhuojie Wu,

Hongwei Sheng,

Shuyun Wang,

Jiaying Ying,

Ming Lu,

Tianqing Zhu,

Kun Zhan,

Xin Yu; [pdf] [supp]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Xiaobiao and Wang, Yida and Sun, Haiyang and Wu, Zhuojie and Sheng, Hongwei and Wang, Shuyun and Ying, Jiaying and Lu, Ming and Zhu, Tianqing and Zhan, Kun and Yu, Xin}, title = {3DRealCar: An In-the-wild RGB-D Car Dataset with 360-degree Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26488-26498} }
Enhancing Transferability of Targeted Adversarial Examples via Inverse Target Gradient Competition and Spatial Distance Stretching: Zhankai Li,

Weiping Wang,

Jie Li,

Shigeng Zhang,

Yunan Hu,

Song Guo; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhankai and Wang, Weiping and Li, Jie and Zhang, Shigeng and Hu, Yunan and Guo, Song}, title = {Enhancing Transferability of Targeted Adversarial Examples via Inverse Target Gradient Competition and Spatial Distance Stretching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3716-3725} }
MotionShot: Adaptive Motion Transfer across Arbitrary Objects for Text-to-Video Generation: Yanchen Liu,

Yanan Sun,

Zhening Xing,

Junyao Gao,

Kai Chen,

Wenjie Pei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yanchen and Sun, Yanan and Xing, Zhening and Gao, Junyao and Chen, Kai and Pei, Wenjie}, title = {MotionShot: Adaptive Motion Transfer across Arbitrary Objects for Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11861-11871} }
STIV: Scalable Text and Image Conditioned Video Generation: Zongyu Lin,

Wei Liu,

Chen Chen,

Jiasen Lu,

Wenze Hu,

Tsu-Jui Fu,

Jesse Allardice,

Zhengfeng Lai,

Liangchen Song,

Bowen Zhang,

Cha Chen,

Yiran Fei,

Lezhi Li,

Yinfei Yang,

Yizhou Sun,

Kai-Wei Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Zongyu and Liu, Wei and Chen, Chen and Lu, Jiasen and Hu, Wenze and Fu, Tsu-Jui and Allardice, Jesse and Lai, Zhengfeng and Song, Liangchen and Zhang, Bowen and Chen, Cha and Fei, Yiran and Li, Lezhi and Yang, Yinfei and Sun, Yizhou and Chang, Kai-Wei}, title = {STIV: Scalable Text and Image Conditioned Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16249-16259} }
Repurposing 2D Diffusion Models with Gaussian Atlas for 3D Generation: Tiange Xiang,

Kai Li,

Chengjiang Long,

Christian Häne,

Peihong Guo,

Scott Delp,

Ehsan Adeli,

Li Fei-Fei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2025_ICCV, author = {Xiang, Tiange and Li, Kai and Long, Chengjiang and H\"ane, Christian and Guo, Peihong and Delp, Scott and Adeli, Ehsan and Fei-Fei, Li}, title = {Repurposing 2D Diffusion Models with Gaussian Atlas for 3D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16492-16502} }
Generating, Fast and Slow: Scalable Parallel Video Generation with Video Interface Networks: Bhishma Dedhia,

David Bourgin,

Krishna Kumar Singh,

Yuheng Li,

Yan Kang,

Zhan Xu,

Niraj K. Jha,

Yuchen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dedhia_2025_ICCV, author = {Dedhia, Bhishma and Bourgin, David and Singh, Krishna Kumar and Li, Yuheng and Kang, Yan and Xu, Zhan and Jha, Niraj K. and Liu, Yuchen}, title = {Generating, Fast and Slow: Scalable Parallel Video Generation with Video Interface Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15385-15394} }
The Devil is in the Spurious Correlations: Boosting Moment Retrieval with Dynamic Learning: Xinyang Zhou,

Fanyue Wei,

Lixin Duan,

Angela Yao,

Wen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Xinyang and Wei, Fanyue and Duan, Lixin and Yao, Angela and Li, Wen}, title = {The Devil is in the Spurious Correlations: Boosting Moment Retrieval with Dynamic Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20981-20990} }
Guiding Noisy Label Conditional Diffusion Models with Score-based Discriminator Correction: Dat Nguyen Cong,

Hieu Tran Bao,

Tung Hoang-Thanh; [pdf] [supp]
[bibtex]
@InProceedings{Cong_2025_ICCV, author = {Cong, Dat Nguyen and Bao, Hieu Tran and Hoang-Thanh, Tung}, title = {Guiding Noisy Label Conditional Diffusion Models with Score-based Discriminator Correction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18531-18541} }
A Conditional Probability Framework for Compositional Zero-shot Learning: Peng Wu,

Qiuxia Lai,

Hao Fang,

Guo-Sen Xie,

Yilong Yin,

Xiankai Lu,

Wenguan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Peng and Lai, Qiuxia and Fang, Hao and Xie, Guo-Sen and Yin, Yilong and Lu, Xiankai and Wang, Wenguan}, title = {A Conditional Probability Framework for Compositional Zero-shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3673-3683} }
IAP: Invisible Adversarial Patch Attack through Perceptibility-Aware Localization and Perturbation Optimization: Subrat Kishore Dutta,

Xiao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dutta_2025_ICCV, author = {Dutta, Subrat Kishore and Zhang, Xiao}, title = {IAP: Invisible Adversarial Patch Attack through Perceptibility-Aware Localization and Perturbation Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14766-14775} }
Advancing Textual Prompt Learning with Anchored Attributes: Zheng Li,

Yibing Song,

Ming-Ming Cheng,

Xiang Li,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zheng and Song, Yibing and Cheng, Ming-Ming and Li, Xiang and Yang, Jian}, title = {Advancing Textual Prompt Learning with Anchored Attributes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3618-3627} }
Event-based Visual Vibrometry: Xinyu Zhou,

Peiqi Duan,

Yeliduosi Xiaokaiti,

Chao Xu,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Xinyu and Duan, Peiqi and Xiaokaiti, Yeliduosi and Xu, Chao and Shi, Boxin}, title = {Event-based Visual Vibrometry}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24666-24676} }
Keep Your Friends Close, and Your Enemies Farther: Distance-aware Voxel-wise Contrastive Learning for Semi-supervised Multi-organ Segmentation: Haochen Zhao,

Jianwei Niu,

Xuefeng Liu,

Xiaozheng Xie,

Li Kuang,

Haotian Yang,

Bin Dai,

Hui Meng,

Yong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Haochen and Niu, Jianwei and Liu, Xuefeng and Xie, Xiaozheng and Kuang, Li and Yang, Haotian and Dai, Bin and Meng, Hui and Wang, Yong}, title = {Keep Your Friends Close, and Your Enemies Farther: Distance-aware Voxel-wise Contrastive Learning for Semi-supervised Multi-organ Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21832-21842} }
Online Reasoning Video Segmentation with Just-in-Time Digital Twins: Yiqing Shen,

Bohan Liu,

Chenjia Li,

Lalithkumar Seenivasan,

Mathias Unberath; [pdf] [arXiv]
[bibtex]
@InProceedings{Shen_2025_ICCV, author = {Shen, Yiqing and Liu, Bohan and Li, Chenjia and Seenivasan, Lalithkumar and Unberath, Mathias}, title = {Online Reasoning Video Segmentation with Just-in-Time Digital Twins}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24698-24706} }
Robust Multi-View Learning via Representation Fusion of Sample-Level Attention and Alignment of Simulated Perturbation: Jie Xu,

Na Zhao,

Gang Niu,

Masashi Sugiyama,

Xiaofeng Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Jie and Zhao, Na and Niu, Gang and Sugiyama, Masashi and Zhu, Xiaofeng}, title = {Robust Multi-View Learning via Representation Fusion of Sample-Level Attention and Alignment of Simulated Perturbation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4232-4241} }
InterSyn: Interleaved Learning for Dynamic Motion Synthesis in the Wild: Yiyi Ma,

Yuanzhi Liang,

Xiu Li,

Chi Zhang,

Xuelong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Yiyi and Liang, Yuanzhi and Li, Xiu and Zhang, Chi and Li, Xuelong}, title = {InterSyn: Interleaved Learning for Dynamic Motion Synthesis in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12832-12841} }
SegmentDreamer: Towards High-fidelity Text-to-3D Synthesis with Segmented Consistency Trajectory Distillation: Jiahao Zhu,

Zixuan Chen,

Guangcong Wang,

Xiaohua Xie,

Yi Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jiahao and Chen, Zixuan and Wang, Guangcong and Xie, Xiaohua and Zhou, Yi}, title = {SegmentDreamer: Towards High-fidelity Text-to-3D Synthesis with Segmented Consistency Trajectory Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15864-15874} }
InterGSEdit: Interactive 3D Gaussian Splatting Editing with 3D Geometry-Consistent Attention Prior: Minghao Wen,

Shengjie Wu,

Kangkan Wang,

Dong Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2025_ICCV, author = {Wen, Minghao and Wu, Shengjie and Wang, Kangkan and Liang, Dong}, title = {InterGSEdit: Interactive 3D Gaussian Splatting Editing with 3D Geometry-Consistent Attention Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26136-26145} }
Fine-Grained Evaluation of Large Vision-Language Models in Autonomous Driving: Yue Li,

Meng Tian,

Zhenyu Lin,

Jiangtong Zhu,

Dechang Zhu,

Haiqiang Liu,

Yueyi Zhang,

Zhiwei Xiong,

Xinhai Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yue and Tian, Meng and Lin, Zhenyu and Zhu, Jiangtong and Zhu, Dechang and Liu, Haiqiang and Zhang, Yueyi and Xiong, Zhiwei and Zhao, Xinhai}, title = {Fine-Grained Evaluation of Large Vision-Language Models in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9431-9442} }
MaTe: Images Are All You Need for Material Transfer via Diffusion Transformer: Nisha Huang,

Henglin Liu,

Yizhou Lin,

Kaer Huang,

Chubin Chen,

Jie Guo,

Tong-yee Lee,

Xiu Li; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Nisha and Liu, Henglin and Lin, Yizhou and Huang, Kaer and Chen, Chubin and Guo, Jie and Lee, Tong-yee and Li, Xiu}, title = {MaTe: Images Are All You Need for Material Transfer via Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15117-15126} }
Ph-GAN: Physics-Inspired GAN for Generating SAR Images Under Limited Data: Xidan Zhang,

Yihan Zhuang,

Qian Guo,

Haodong Yang,

Xuelin Qian,

Gong Cheng,

Junwei Han,

Zhongling Huang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xidan and Zhuang, Yihan and Guo, Qian and Yang, Haodong and Qian, Xuelin and Cheng, Gong and Han, Junwei and Huang, Zhongling}, title = {Ph-GAN: Physics-Inspired GAN for Generating SAR Images Under Limited Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29075-29085} }
Interpretable point cloud classification using multiple instance learning: Matt De Vries,

Reed Naidoo,

Olga Fourkioti,

Lucas G. Dent,

Nathan Curry,

Chris Dunsby,

Chris Bakal; [pdf] [supp]
[bibtex]
@InProceedings{De_Vries_2025_ICCV, author = {De Vries, Matt and Naidoo, Reed and Fourkioti, Olga and Dent, Lucas G. and Curry, Nathan and Dunsby, Chris and Bakal, Chris}, title = {Interpretable point cloud classification using multiple instance learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22209-22220} }
Efficient Fine-Tuning of Large Models via Nested Low-Rank Adaptation: Lujun Li,

Cheng Lin,

Dezhi Li,

You-Liang Huang,

Wei Li,

Tianyu Wu,

Jie Zou,

Wei Xue,

Sirui Han,

Yike Guo; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Lujun and Lin, Cheng and Li, Dezhi and Huang, You-Liang and Li, Wei and Wu, Tianyu and Zou, Jie and Xue, Wei and Han, Sirui and Guo, Yike}, title = {Efficient Fine-Tuning of Large Models via Nested Low-Rank Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22252-22262} }
CoSMIC: Continual Self-supervised Learning for Multi-Domain Medical Imaging via Conditional Mutual Information Maximization: Yihang Liu,

Ying Wen,

Longzhen Yang,

Lianghua He,

Heng Tao Shen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yihang and Wen, Ying and Yang, Longzhen and He, Lianghua and Shen, Heng Tao}, title = {CoSMIC: Continual Self-supervised Learning for Multi-Domain Medical Imaging via Conditional Mutual Information Maximization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23051-23062} }
STAR: Spatial-Temporal Augmentation with Text-to-Video Models for Real-World Video Super-Resolution: Rui Xie,

Yinhong Liu,

Penghao Zhou,

Chen Zhao,

Jun Zhou,

Kai Zhang,

Zhenyu Zhang,

Jian Yang,

Zhenheng Yang,

Ying Tai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Rui and Liu, Yinhong and Zhou, Penghao and Zhao, Chen and Zhou, Jun and Zhang, Kai and Zhang, Zhenyu and Yang, Jian and Yang, Zhenheng and Tai, Ying}, title = {STAR: Spatial-Temporal Augmentation with Text-to-Video Models for Real-World Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17108-17118} }
SCAN: Bootstrapping Contrastive Pre-training for Data Efficiency: Yangyang Guo,

Mohan Kankanhalli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Yangyang and Kankanhalli, Mohan}, title = {SCAN: Bootstrapping Contrastive Pre-training for Data Efficiency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3662-3672} }
Learnable Retrieval Enhanced Visual-Text Alignment and Fusion for Radiology Report Generation: Qin Zhou,

Guoyan Liang,

Xindi Li,

Jingyuan Chen,

Zhe Wang,

Chang Yao,

Sai Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Qin and Liang, Guoyan and Li, Xindi and Chen, Jingyuan and Wang, Zhe and Yao, Chang and Wu, Sai}, title = {Learnable Retrieval Enhanced Visual-Text Alignment and Fusion for Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22529-22538} }
Revisiting Adversarial Patch Defenses on Object Detectors: Unified Evaluation, Large-Scale Dataset, and New Insights: Junhao Zheng,

Jiahao Sun,

Chenhao Lin,

Zhengyu Zhao,

Chen Ma,

Chong Zhang,

Cong Wang,

Qian Wang,

Chao Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Junhao and Sun, Jiahao and Lin, Chenhao and Zhao, Zhengyu and Ma, Chen and Zhang, Chong and Wang, Cong and Wang, Qian and Shen, Chao}, title = {Revisiting Adversarial Patch Defenses on Object Detectors: Unified Evaluation, Large-Scale Dataset, and New Insights}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23476-23486} }
V.I.P. : Iterative Online Preference Distillation for Efficient Video Diffusion Models: Jisoo Kim,

Wooseok Seo,

Junwan Kim,

Seungho Park,

Sooyeon Park,

Youngjae Yu; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jisoo and Seo, Wooseok and Kim, Junwan and Park, Seungho and Park, Sooyeon and Yu, Youngjae}, title = {V.I.P. : Iterative Online Preference Distillation for Efficient Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17235-17245} }
Diffusion-based Source-biased Model for Single Domain Generalized Object Detection: Han Jiang,

Wenfei Yang,

Tianzhu Zhang,

Yongdong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Han and Yang, Wenfei and Zhang, Tianzhu and Zhang, Yongdong}, title = {Diffusion-based Source-biased Model for Single Domain Generalized Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1548-1557} }
Domain Generalizable Portrait Style Transfer: Xinbo Wang,

Wenju Xu,

Qing Zhang,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xinbo and Xu, Wenju and Zhang, Qing and Zheng, Wei-Shi}, title = {Domain Generalizable Portrait Style Transfer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15802-15811} }
4D-Bench: Benchmarking Multi-modal Large Language Models for 4D Object Understanding: Wenxuan Zhu,

Bing Li,

Cheng Zheng,

Jinjie Mai,

Jun Chen,

Letian Jiang,

Abdullah Hamdi,

Sara Rojas Martinez,

Chia-Wen Lin,

Mohamed Elhoseiny,

Bernard Ghanem; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Wenxuan and Li, Bing and Zheng, Cheng and Mai, Jinjie and Chen, Jun and Jiang, Letian and Hamdi, Abdullah and Martinez, Sara Rojas and Lin, Chia-Wen and Elhoseiny, Mohamed and Ghanem, Bernard}, title = {4D-Bench: Benchmarking Multi-modal Large Language Models for 4D Object Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21129-21143} }
How Do Optical Flow and Textual Prompts Collaborate to Assist in Audio-Visual Semantic Segmentation?: Yujian Lee,

Peng Gao,

Yongqi Xu,

Wentao Fan; [pdf]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Yujian and Gao, Peng and Xu, Yongqi and Fan, Wentao}, title = {How Do Optical Flow and Textual Prompts Collaborate to Assist in Audio-Visual Semantic Segmentation?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23342-23352} }
Devil is in the Uniformity: Exploring Diverse Learners within Transformer for Image Restoration: Shihao Zhou,

Dayu Li,

Jinshan Pan,

Juncheng Zhou,

Jinglei Shi,

Jufeng Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Shihao and Li, Dayu and Pan, Jinshan and Zhou, Juncheng and Shi, Jinglei and Yang, Jufeng}, title = {Devil is in the Uniformity: Exploring Diverse Learners within Transformer for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12307-12317} }
Epona: Autoregressive Diffusion World Model for Autonomous Driving: Kaiwen Zhang,

Zhenyu Tang,

Xiaotao Hu,

Xingang Pan,

Xiaoyang Guo,

Yuan Liu,

Jingwei Huang,

Li Yuan,

Qian Zhang,

Xiao-Xiao Long,

Xun Cao,

Wei Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Kaiwen and Tang, Zhenyu and Hu, Xiaotao and Pan, Xingang and Guo, Xiaoyang and Liu, Yuan and Huang, Jingwei and Yuan, Li and Zhang, Qian and Long, Xiao-Xiao and Cao, Xun and Yin, Wei}, title = {Epona: Autoregressive Diffusion World Model for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27220-27230} }
Diffusion-based 3D Hand Motion Recovery with Intuitive Physics: Yufei Zhang,

Zijun Cui,

Jeffrey O. Kephart,

Qiang Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yufei and Cui, Zijun and Kephart, Jeffrey O. and Ji, Qiang}, title = {Diffusion-based 3D Hand Motion Recovery with Intuitive Physics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7306-7317} }
Visual Surface Wave Elastography: Revealing Subsurface Physical Properties via Visible Surface Waves: Alexander C. Ogren,

Berthy T. Feng,

Jihoon Ahn,

Katherine L. Bouman,

Chiara Daraio; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ogren_2025_ICCV, author = {Ogren, Alexander C. and Feng, Berthy T. and Ahn, Jihoon and Bouman, Katherine L. and Daraio, Chiara}, title = {Visual Surface Wave Elastography: Revealing Subsurface Physical Properties via Visible Surface Waves}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26446-26455} }
Leveraging Spatial Invariance to Boost Adversarial Transferability: Zihan Zhou,

Li Li,

Yanli Ren,

Chuan Qin,

Guorui Feng; [pdf]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zihan and Li, Li and Ren, Yanli and Qin, Chuan and Feng, Guorui}, title = {Leveraging Spatial Invariance to Boost Adversarial Transferability}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1423-1432} }
MoSiC: Optimal-Transport Motion Trajectory for Dense Self-Supervised Learning: Mohammadreza Salehi,

Shashanka Venkataramanan,

Ioana Simion,

Efstratios Gavves,

Cees G. M. Snoek,

Yuki M Asano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Salehi_2025_ICCV, author = {Salehi, Mohammadreza and Venkataramanan, Shashanka and Simion, Ioana and Gavves, Efstratios and Snoek, Cees G. M. and Asano, Yuki M}, title = {MoSiC: Optimal-Transport Motion Trajectory for Dense Self-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6541-6551} }
GDKVM: Echocardiography Video Segmentation via Spatiotemporal Key-Value Memory with Gated Delta Rule: Rui Wang,

Yimu Sun,

Jingxing Guo,

Huisi Wu,

Jing Qin; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Rui and Sun, Yimu and Guo, Jingxing and Wu, Huisi and Qin, Jing}, title = {GDKVM: Echocardiography Video Segmentation via Spatiotemporal Key-Value Memory with Gated Delta Rule}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12191-12200} }
ViCTr: Vital Consistency Transfer for Pathology Aware Image Synthesis: Onkar Susladkar,

Gayatri Deshmukh,

Yalcin Tur,

Gorkem Durak,

Ulas Bagci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Susladkar_2025_ICCV, author = {Susladkar, Onkar and Deshmukh, Gayatri and Tur, Yalcin and Durak, Gorkem and Bagci, Ulas}, title = {ViCTr: Vital Consistency Transfer for Pathology Aware Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22772-22782} }
DanceEditor: Towards Iterative Editable Music-driven Dance Generation with Open-Vocabulary Descriptions: Hengyuan Zhang,

Zhe Li,

Xingqun Qi,

Mengze Li,

Muyi Sun,

Siye Wang,

Man Zhang,

Sirui Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hengyuan and Li, Zhe and Qi, Xingqun and Li, Mengze and Sun, Muyi and Wang, Siye and Zhang, Man and Han, Sirui}, title = {DanceEditor: Towards Iterative Editable Music-driven Dance Generation with Open-Vocabulary Descriptions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12158-12168} }
When Pixel Difference Patterns Meet ViT: PiDiViT for Few-Shot Object Detection: Hongliang Zhou,

Yongxiang Liu,

Canyu Mo,

Weijie Li,

Bowen Peng,

Li Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Hongliang and Liu, Yongxiang and Mo, Canyu and Li, Weijie and Peng, Bowen and Liu, Li}, title = {When Pixel Difference Patterns Meet ViT: PiDiViT for Few-Shot Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24309-24318} }
AnimalClue: Recognizing Animals by their Traces: Risa Shinoda,

Nakamasa Inoue,

Iro Laina,

Christian Rupprecht,

Hirokatsu Kataoka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shinoda_2025_ICCV, author = {Shinoda, Risa and Inoue, Nakamasa and Laina, Iro and Rupprecht, Christian and Kataoka, Hirokatsu}, title = {AnimalClue: Recognizing Animals by their Traces}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14776-14786} }
From Objects to Events: Unlocking Complex Visual Understanding in Object Detectors via LLM-guided Symbolic Reasoning: Yuhui Zeng,

Haoxiang Wu,

Wenjie Nie,

Guangyao Chen,

Xiawu Zheng,

Yunhang Shen,

Jun Peng,

Yonghong Tian,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Yuhui and Wu, Haoxiang and Nie, Wenjie and Chen, Guangyao and Zheng, Xiawu and Shen, Yunhang and Peng, Jun and Tian, Yonghong and Ji, Rongrong}, title = {From Objects to Events: Unlocking Complex Visual Understanding in Object Detectors via LLM-guided Symbolic Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24380-24391} }
Pruning All-Rounder: Rethinking and Improving Inference Efficiency for Large Vision Language Models: Wei Suo,

Ji Ma,

Mengyang Sun,

Lin Yuanbo Wu,

Peng Wang,

Yanning Zhang; [pdf]
[bibtex]
@InProceedings{Suo_2025_ICCV, author = {Suo, Wei and Ma, Ji and Sun, Mengyang and Wu, Lin Yuanbo and Wang, Peng and Zhang, Yanning}, title = {Pruning All-Rounder: Rethinking and Improving Inference Efficiency for Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20247-20256} }
Discontinuity-aware Normal Integration for Generic Central Camera Models: Francesco Milano,

Manuel López-Antequera,

Naina Dhingra,

Roland Siegwart,

Robert Thiel; [pdf] [supp]
[bibtex]
@InProceedings{Milano_2025_ICCV, author = {Milano, Francesco and L\'opez-Antequera, Manuel and Dhingra, Naina and Siegwart, Roland and Thiel, Robert}, title = {Discontinuity-aware Normal Integration for Generic Central Camera Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26026-26034} }
LazyMAR: Accelerating Masked Autoregressive Models via Feature Caching: Feihong Yan,

Qingyan Wei,

Jiayi Tang,

Jiajun Li,

Yulin Wang,

Xuming Hu,

Huiqi Li,

Linfeng Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Feihong and Wei, Qingyan and Tang, Jiayi and Li, Jiajun and Wang, Yulin and Hu, Xuming and Li, Huiqi and Zhang, Linfeng}, title = {LazyMAR: Accelerating Masked Autoregressive Models via Feature Caching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15552-15561} }
OmniDiff: A Comprehensive Benchmark for Fine-grained Image Difference Captioning: Yuan Liu,

Saihui Hou,

Saijie Hou,

Jiabao Du,

Shibei Meng,

Yongzhen Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yuan and Hou, Saihui and Hou, Saijie and Du, Jiabao and Meng, Shibei and Huang, Yongzhen}, title = {OmniDiff: A Comprehensive Benchmark for Fine-grained Image Difference Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21440-21449} }
MAESTRO: Task-Relevant Optimization via Adaptive Feature Enhancement and Suppression for Multi-task 3D Perception: Changwon Kang,

Jisong Kim,

Hongjae Shin,

Junseo Park,

Jun Won Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_ICCV, author = {Kang, Changwon and Kim, Jisong and Shin, Hongjae and Park, Junseo and Choi, Jun Won}, title = {MAESTRO: Task-Relevant Optimization via Adaptive Feature Enhancement and Suppression for Multi-task 3D Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28313-28323} }
MM-Spatial: Exploring 3D Spatial Understanding in Multimodal LLMs: Erik Daxberger,

Nina Wenzel,

David Griffiths,

Haiming Gang,

Justin Lazarow,

Gefen Kohavi,

Kai Kang,

Marcin Eichner,

Yinfei Yang,

Afshin Dehghan,

Peter Grasch; [pdf] [supp]
[bibtex]
@InProceedings{Daxberger_2025_ICCV, author = {Daxberger, Erik and Wenzel, Nina and Griffiths, David and Gang, Haiming and Lazarow, Justin and Kohavi, Gefen and Kang, Kai and Eichner, Marcin and Yang, Yinfei and Dehghan, Afshin and Grasch, Peter}, title = {MM-Spatial: Exploring 3D Spatial Understanding in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7395-7408} }
CasP: Improving Semi-Dense Feature Matching Pipeline Leveraging Cascaded Correspondence Priors for Guidance: Peiqi Chen,

Lei Yu,

Yi Wan,

Yingying Pei,

Xinyi Liu,

Yongxiang Yao,

Yingying Zhang,

Lixiang Ru,

Liheng Zhong,

Jingdong Chen,

Ming Yang,

Yongjun Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Peiqi and Yu, Lei and Wan, Yi and Pei, Yingying and Liu, Xinyi and Yao, Yongxiang and Zhang, Yingying and Ru, Lixiang and Zhong, Liheng and Chen, Jingdong and Yang, Ming and Zhang, Yongjun}, title = {CasP: Improving Semi-Dense Feature Matching Pipeline Leveraging Cascaded Correspondence Priors for Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28063-28072} }
Multimodal Latent Diffusion Model for Complex Sewing Pattern Generation: Shengqi Liu,

Yuhao Cheng,

Zhuo Chen,

Xingyu Ren,

Wenhan Zhu,

Lincheng Li,

Mengxiao Bi,

Xiaokang Yang,

Yichao Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Shengqi and Cheng, Yuhao and Chen, Zhuo and Ren, Xingyu and Zhu, Wenhan and Li, Lincheng and Bi, Mengxiao and Yang, Xiaokang and Yan, Yichao}, title = {Multimodal Latent Diffusion Model for Complex Sewing Pattern Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17640-17650} }
CObL: Toward Zero-Shot Ordinal Layering without User Prompting: Aneel Damaraju,

Dean Hazineh,

Todd Zickler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Damaraju_2025_ICCV, author = {Damaraju, Aneel and Hazineh, Dean and Zickler, Todd}, title = {CObL: Toward Zero-Shot Ordinal Layering without User Prompting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8154-8164} }
Rethinking the Upsampling Process in Light Field Super-Resolution with Spatial-Epipolar Implicit Image Function: Ruixuan Cong,

Yu Wang,

Mingyuan Zhao,

Da Yang,

Rongshan Chen,

Hao Sheng; [pdf] [supp]
[bibtex]
@InProceedings{Cong_2025_ICCV, author = {Cong, Ruixuan and Wang, Yu and Zhao, Mingyuan and Yang, Da and Chen, Rongshan and Sheng, Hao}, title = {Rethinking the Upsampling Process in Light Field Super-Resolution with Spatial-Epipolar Implicit Image Function}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7559-7569} }
GGTalker: Talking Head Systhesis with Generalizable Gaussian Priors and Identity-Specific Adaptation: Wentao Hu,

Shunkai Li,

Ziqiao Peng,

Haoxian Zhang,

Fan Shi,

Xiaoqiang Liu,

Pengfei Wan,

Di Zhang,

Hui Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Wentao and Li, Shunkai and Peng, Ziqiao and Zhang, Haoxian and Shi, Fan and Liu, Xiaoqiang and Wan, Pengfei and Zhang, Di and Tian, Hui}, title = {GGTalker: Talking Head Systhesis with Generalizable Gaussian Priors and Identity-Specific Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10108-10117} }
SAC-GNC: SAmple Consensus for adaptive Graduated Non-Convexity: Valter Piedade,

Chitturi Sidhartha,

José Gaspar,

Venu Madhav Govindu,

Pedro Miraldo; [pdf] [supp]
[bibtex]
@InProceedings{Piedade_2025_ICCV, author = {Piedade, Valter and Sidhartha, Chitturi and Gaspar, Jos\'e and Govindu, Venu Madhav and Miraldo, Pedro}, title = {SAC-GNC: SAmple Consensus for adaptive Graduated Non-Convexity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5780-5790} }
VGGSounder: Audio-Visual Evaluations for Foundation Models: Daniil Zverev,

Thaddäus Wiedemer,

Ameya Prabhu,

Matthias Bethge,

Wieland Brendel,

A. Sophia Koepke; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zverev_2025_ICCV, author = {Zverev, Daniil and Wiedemer, Thadd\"aus and Prabhu, Ameya and Bethge, Matthias and Brendel, Wieland and Koepke, A. Sophia}, title = {VGGSounder: Audio-Visual Evaluations for Foundation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1027-1037} }
ResQ: A Novel Framework to Implement Residual Neural Networks on Analog Rydberg Atom Quantum Computers: Nicholas S. DiBrita,

Jason Han,

Tirthak Patel; [pdf] [arXiv]
[bibtex]
@InProceedings{DiBrita_2025_ICCV, author = {DiBrita, Nicholas S. and Han, Jason and Patel, Tirthak}, title = {ResQ: A Novel Framework to Implement Residual Neural Networks on Analog Rydberg Atom Quantum Computers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20085-20094} }
Make Me Happier: Evoking Emotions Through Image Diffusion Models: Qing Lin,

Jingfeng Zhang,

Yew-Soon Ong,

Mengmi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Qing and Zhang, Jingfeng and Ong, Yew-Soon and Zhang, Mengmi}, title = {Make Me Happier: Evoking Emotions Through Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16367-16376} }
SimpleVQA: Multimodal Factuality Evaluation for Multimodal Large Language Models: Xianfu Cheng,

Wei Zhang,

Shiwei Zhang,

Jian Yang,

Xiangyuan Guan,

Xianjie Wu,

Xiang Li,

Ge Zhang,

Jiaheng Liu,

Yuying Mai,

Yutao Zeng,

Zhoufutu Wen,

Ke Jin,

Baorui Wang,

Weixiao Zhou,

Yunhong Lu,

Hangyuan Ji,

Tongliang Li,

Wenhao Huang,

Zhoujun Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Xianfu and Zhang, Wei and Zhang, Shiwei and Yang, Jian and Guan, Xiangyuan and Wu, Xianjie and Li, Xiang and Zhang, Ge and Liu, Jiaheng and Mai, Yuying and Zeng, Yutao and Wen, Zhoufutu and Jin, Ke and Wang, Baorui and Zhou, Weixiao and Lu, Yunhong and Ji, Hangyuan and Li, Tongliang and Huang, Wenhao and Li, Zhoujun}, title = {SimpleVQA: Multimodal Factuality Evaluation for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4637-4646} }
DiTFastAttnV2: Head-wise Attention Compression for Multi-Modality Diffusion Transformers: Hanling Zhang,

Rundong Su,

Zhihang Yuan,

Pengtao Chen,

Mingzhu Shen,

Yibo Fan,

Shengen Yan,

Guohao Dai,

Yu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hanling and Su, Rundong and Yuan, Zhihang and Chen, Pengtao and Shen, Mingzhu and Fan, Yibo and Yan, Shengen and Dai, Guohao and Wang, Yu}, title = {DiTFastAttnV2: Head-wise Attention Compression for Multi-Modality Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16399-16409} }
Find a Scapegoat: Poisoning Membership Inference Attack and Defense to Federated Learning: Wenjin Mo,

Zhiyuan Li,

Minghong Fang,

Mingwei Fang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mo_2025_ICCV, author = {Mo, Wenjin and Li, Zhiyuan and Fang, Minghong and Fang, Mingwei}, title = {Find a Scapegoat: Poisoning Membership Inference Attack and Defense to Federated Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3967-3976} }
Generalized Tensor-based Parameter-Efficient Fine-Tuning via Lie Group Transformations: Chongjie Si,

Zhiyi Shi,

Xuehui Wang,

Yichen Xiao,

Xiaokang Yang,

Wei Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Si_2025_ICCV, author = {Si, Chongjie and Shi, Zhiyi and Wang, Xuehui and Xiao, Yichen and Yang, Xiaokang and Shen, Wei}, title = {Generalized Tensor-based Parameter-Efficient Fine-Tuning via Lie Group Transformations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {197-207} }
GAS: Generative Avatar Synthesis from a Single Image: Yixing Lu,

Junting Dong,

Youngjoong Kwon,

Qin Zhao,

Bo Dai,

Fernando De la Torre; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Yixing and Dong, Junting and Kwon, Youngjoong and Zhao, Qin and Dai, Bo and De la Torre, Fernando}, title = {GAS: Generative Avatar Synthesis from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12883-12893} }
GenM3: Generative Pretrained Multi-path Motion Model for Text Conditional Human Motion Generation: Junyu Shi,

Lijiang Liu,

Yong Sun,

Zhiyuan Zhang,

Jinni Zhou,

Qiang Nie; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Junyu and Liu, Lijiang and Sun, Yong and Zhang, Zhiyuan and Zhou, Jinni and Nie, Qiang}, title = {GenM3: Generative Pretrained Multi-path Motion Model for Text Conditional Human Motion Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13129-13139} }
Coordinate-based Speed of Sound Recovery for Aberration-Corrected Photoacoustic Computed Tomography: Tianao Li,

Manxiu Cui,

Cheng Ma,

Emma Alexander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Tianao and Cui, Manxiu and Ma, Cheng and Alexander, Emma}, title = {Coordinate-based Speed of Sound Recovery for Aberration-Corrected Photoacoustic Computed Tomography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27466-27475} }
Granular Concept Circuits: Toward a Fine-Grained Circuit Discovery for Concept Representations: Dahee Kwon,

Sehyun Lee,

Jaesik Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2025_ICCV, author = {Kwon, Dahee and Lee, Sehyun and Choi, Jaesik}, title = {Granular Concept Circuits: Toward a Fine-Grained Circuit Discovery for Concept Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2356-2365} }
Diversity-Enhanced Distribution Alignment for Dataset Distillation: Hongcheng Li,

Yucan Zhou,

Xiaoyan Gu,

Bo Li,

Weiping Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Hongcheng and Zhou, Yucan and Gu, Xiaoyan and Li, Bo and Wang, Weiping}, title = {Diversity-Enhanced Distribution Alignment for Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3747-3756} }
OCR Hinders RAG: Evaluating the Cascading Impact of OCR on Retrieval-Augmented Generation: Junyuan Zhang,

Qintong Zhang,

Bin Wang,

Linke Ouyang,

Zichen Wen,

Ying Li,

Ka-Ho Chow,

Conghui He,

Wentao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Junyuan and Zhang, Qintong and Wang, Bin and Ouyang, Linke and Wen, Zichen and Li, Ying and Chow, Ka-Ho and He, Conghui and Zhang, Wentao}, title = {OCR Hinders RAG: Evaluating the Cascading Impact of OCR on Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17443-17453} }
HarmonySeg: Tubular Structure Segmentation with Deep-Shallow Feature Fusion and Growth-Suppression Balanced Loss: Yi Huang,

Ke Zhang,

Wei Liu,

Yuanyuan Wang,

Vishal M. Patel,

Le Lu,

Xu Han,

Dakai Jin,

Ke Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Yi and Zhang, Ke and Liu, Wei and Wang, Yuanyuan and Patel, Vishal M. and Lu, Le and Han, Xu and Jin, Dakai and Yan, Ke}, title = {HarmonySeg: Tubular Structure Segmentation with Deep-Shallow Feature Fusion and Growth-Suppression Balanced Loss}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23571-23581} }
Semi-supervised Concept Bottleneck Models: Lijie Hu,

Tianhao Huang,

Huanyi Xie,

Xilin Gong,

Chenyang Ren,

Zhengyu Hu,

Lu Yu,

Ping Ma,

Di Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Lijie and Huang, Tianhao and Xie, Huanyi and Gong, Xilin and Ren, Chenyang and Hu, Zhengyu and Yu, Lu and Ma, Ping and Wang, Di}, title = {Semi-supervised Concept Bottleneck Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2110-2119} }
Can Knowledge be Transferred from Unimodal to Multimodal? Investigating the Transitivity of Multimodal Knowledge Editing: Lingyong Fang,

Xinzhong Wang,

Depeng Wang,

Zongru Wu,

Ya Guo,

Huijia Zhu,

Zhuosheng Zhang,

Gongshen Liu; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Lingyong and Wang, Xinzhong and Wang, Depeng and Wu, Zongru and Guo, Ya and Zhu, Huijia and Zhang, Zhuosheng and Liu, Gongshen}, title = {Can Knowledge be Transferred from Unimodal to Multimodal? Investigating the Transitivity of Multimodal Knowledge Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2482-2490} }
Beyond Training: Dynamic Token Merging for Zero-Shot Video Understanding: Yiming Zhang,

Zhuokai Zhao,

Zhaorun Chen,

Zenghui Ding,

Xianjun Yang,

Yining Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yiming and Zhao, Zhuokai and Chen, Zhaorun and Ding, Zenghui and Yang, Xianjun and Sun, Yining}, title = {Beyond Training: Dynamic Token Merging for Zero-Shot Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22046-22055} }
CounterPC: Counterfactual Feature Realignment for Unsupervised Domain Adaptation on Point Clouds: Feng Yang,

Yichao Cao,

Xiu Su,

Dan Niu,

Xuanpeng Li; [pdf]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Feng and Cao, Yichao and Su, Xiu and Niu, Dan and Li, Xuanpeng}, title = {CounterPC: Counterfactual Feature Realignment for Unsupervised Domain Adaptation on Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24760-24769} }
MPG-SAM 2: Adapting SAM 2 with Mask Priors and Global Context for Referring Video Object Segmentation: Fu Rong,

Meng Lan,

Qian Zhang,

Lefei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rong_2025_ICCV, author = {Rong, Fu and Lan, Meng and Zhang, Qian and Zhang, Lefei}, title = {MPG-SAM 2: Adapting SAM 2 with Mask Priors and Global Context for Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23979-23989} }
FreeSplatter: Pose-free Gaussian Splatting for Sparse-view 3D Reconstruction: Jiale Xu,

Shenghua Gao,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Jiale and Gao, Shenghua and Shan, Ying}, title = {FreeSplatter: Pose-free Gaussian Splatting for Sparse-view 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25442-25452} }
FreqPDE: Rethinking Positional Depth Embedding for Multi-View 3D Object Detection Transformers: Haisheng Su,

Junjie Zhang,

Feixiang Song,

Sanping Zhou,

Wei Wu,

Junchi Yan,

Nanning Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Su_2025_ICCV, author = {Su, Haisheng and Zhang, Junjie and Song, Feixiang and Zhou, Sanping and Wu, Wei and Yan, Junchi and Zheng, Nanning}, title = {FreqPDE: Rethinking Positional Depth Embedding for Multi-View 3D Object Detection Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28145-28155} }
Hierarchy-Aware Pseudo Word Learning with Text Adaptation for Zero-Shot Composed Image Retrieval: Zhe Li,

Lei Zhang,

Zheren Fu,

Kun Zhang,

Zhendong Mao; [pdf]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhe and Zhang, Lei and Fu, Zheren and Zhang, Kun and Mao, Zhendong}, title = {Hierarchy-Aware Pseudo Word Learning with Text Adaptation for Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24319-24329} }
WSI-LLaVA: A Multimodal Large Language Model for Whole Slide Image: Yuci Liang,

Xinheng Lyu,

Wenting Chen,

Meidan Ding,

Jipeng Zhang,

Xiangjian He,

Song Wu,

Xiaohan Xing,

Sen Yang,

Xiyue Wang,

Linlin Shen; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Yuci and Lyu, Xinheng and Chen, Wenting and Ding, Meidan and Zhang, Jipeng and He, Xiangjian and Wu, Song and Xing, Xiaohan and Yang, Sen and Wang, Xiyue and Shen, Linlin}, title = {WSI-LLaVA: A Multimodal Large Language Model for Whole Slide Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22718-22727} }
GLEAM: Learning Generalizable Exploration Policy for Active Mapping in Complex 3D Indoor Scene: Xiao Chen,

Tai Wang,

Quanyi Li,

Tao Huang,

Jiangmiao Pang,

Tianfan Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Xiao and Wang, Tai and Li, Quanyi and Huang, Tao and Pang, Jiangmiao and Xue, Tianfan}, title = {GLEAM: Learning Generalizable Exploration Policy for Active Mapping in Complex 3D Indoor Scene}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5558-5568} }
DialNav: Multi-turn Dialog Navigation with a Remote Guide: Leekyeung Han,

Hyunji Min,

Gyeom Hwangbo,

Jonghyun Choi,

Paul Hongsuck Seo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Leekyeung and Min, Hyunji and Hwangbo, Gyeom and Choi, Jonghyun and Seo, Paul Hongsuck}, title = {DialNav: Multi-turn Dialog Navigation with a Remote Guide}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8514-8523} }
VLDrive: Vision-Augmented Lightweight MLLMs for Efficient Language-grounded Autonomous Driving: Ruifei Zhang,

Wei Zhang,

Xiao Tan,

Sibei Yang,

Xiang Wan,

Xiaonan Luo,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Ruifei and Zhang, Wei and Tan, Xiao and Yang, Sibei and Wan, Xiang and Luo, Xiaonan and Li, Guanbin}, title = {VLDrive: Vision-Augmented Lightweight MLLMs for Efficient Language-grounded Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5923-5933} }
Ponimator: Unfolding Interactive Pose for Versatile Human-human Interaction Animation: Shaowei Liu,

Chuan Guo,

Bing Zhou,

Jian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Shaowei and Guo, Chuan and Zhou, Bing and Wang, Jian}, title = {Ponimator: Unfolding Interactive Pose for Versatile Human-human Interaction Animation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12068-12077} }
Streamlining Image Editing with Layered Diffusion Brushes: Peyman Gholami,

Robert Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gholami_2025_ICCV, author = {Gholami, Peyman and Xiao, Robert}, title = {Streamlining Image Editing with Layered Diffusion Brushes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17368-17378} }
ObjectMate: A Recurrence Prior for Object Insertion and Subject-Driven Generation: Daniel Winter,

Asaf Shul,

Matan Cohen,

Dana Berman,

Yael Pritch,

Alex Rav-Acha,

Yedid Hoshen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Winter_2025_ICCV, author = {Winter, Daniel and Shul, Asaf and Cohen, Matan and Berman, Dana and Pritch, Yael and Rav-Acha, Alex and Hoshen, Yedid}, title = {ObjectMate: A Recurrence Prior for Object Insertion and Subject-Driven Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16281-16291} }
CycleVAR: Repurposing Autoregressive Model for Unsupervised One-Step Image Translation: Yi Liu,

Shengqian Li,

Zuzeng Lin,

Feng Wang,

Si Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yi and Li, Shengqian and Lin, Zuzeng and Wang, Feng and Liu, Si}, title = {CycleVAR: Repurposing Autoregressive Model for Unsupervised One-Step Image Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15194-15204} }
Synthetic Video Enhances Physical Fidelity in Video Synthesis: Qi Zhao,

Xingyu Ni,

Ziyu Wang,

Feng Cheng,

Ziyan Yang,

Lu Jiang,

Bohan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Qi and Ni, Xingyu and Wang, Ziyu and Cheng, Feng and Yang, Ziyan and Jiang, Lu and Wang, Bohan}, title = {Synthetic Video Enhances Physical Fidelity in Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12135-12146} }
Enhancing Partially Relevant Video Retrieval with Hyperbolic Learning: Jun Li,

Jinpeng Wang,

Chaolei Tan,

Niu Lian,

Long Chen,

Yaowei Wang,

Min Zhang,

Shu-Tao Xia,

Bin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jun and Wang, Jinpeng and Tan, Chaolei and Lian, Niu and Chen, Long and Wang, Yaowei and Zhang, Min and Xia, Shu-Tao and Chen, Bin}, title = {Enhancing Partially Relevant Video Retrieval with Hyperbolic Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23074-23084} }
Ultra High-Resolution Image Inpainting with Patch-Based Content Consistency Adapter: Jianhui Zhang,

Shen Cheng,

Qirui Sun,

Jia Liu,

Wang Luyang,

Chaoyu Feng,

Chen Fang,

Lei Lei,

Jue Wang,

Shuaicheng Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jianhui and Cheng, Shen and Sun, Qirui and Liu, Jia and Luyang, Wang and Feng, Chaoyu and Fang, Chen and Lei, Lei and Wang, Jue and Liu, Shuaicheng}, title = {Ultra High-Resolution Image Inpainting with Patch-Based Content Consistency Adapter}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16991-17000} }
OURO: A Self-Bootstrapped Framework for Enhancing Multimodal Scene Understanding: Tianrun Xu,

Guanyu Chen,

Ye Li,

Yuxin Xi,

Zeyu Mu,

Ruichen Wang,

Tianren Zhang,

Haichuan Gao,

Feng Chen; [pdf]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Tianrun and Chen, Guanyu and Li, Ye and Xi, Yuxin and Mu, Zeyu and Wang, Ruichen and Zhang, Tianren and Gao, Haichuan and Chen, Feng}, title = {OURO: A Self-Bootstrapped Framework for Enhancing Multimodal Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18240-18251} }
DiGA3D: Coarse-to-Fine Diffusional Propagation of Geometry and Appearance for Versatile 3D Inpainting: Jingyi Pan,

Dan Xu,

Qiong Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_ICCV, author = {Pan, Jingyi and Xu, Dan and Luo, Qiong}, title = {DiGA3D: Coarse-to-Fine Diffusional Propagation of Geometry and Appearance for Versatile 3D Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16345-16355} }
Text-to-Any-Skeleton Motion Generation Without Retargeting: Qingyuan Liu,

Ke Lv,

Kun Dong,

Jian Xue,

Zehai Niu,

Jinbao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Qingyuan and Lv, Ke and Dong, Kun and Xue, Jian and Niu, Zehai and Wang, Jinbao}, title = {Text-to-Any-Skeleton Motion Generation Without Retargeting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12926-12936} }
EVDM: Event-based Real-world Video Deblurring with Mamba: Zhijing Sun,

Senyan Xu,

Kean Liu,

Runze Tian,

Xueyang Fu,

Zheng-Jun Zha; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Zhijing and Xu, Senyan and Liu, Kean and Tian, Runze and Fu, Xueyang and Zha, Zheng-Jun}, title = {EVDM: Event-based Real-world Video Deblurring with Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13793-13803} }
Learning Yourself: Class-Incremental Semantic Segmentation with Language-Inspired Bootstrapped Disentanglement: Ruitao Wu,

Yifan Zhao,

Jia Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Ruitao and Zhao, Yifan and Li, Jia}, title = {Learning Yourself: Class-Incremental Semantic Segmentation with Language-Inspired Bootstrapped Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21623-21634} }
Towards Fine-grained Interactive Segmentation in Images and Videos: Yuan Yao,

Qiushi Yang,

Miaomiao Cui,

Liefeng Bo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_ICCV, author = {Yao, Yuan and Yang, Qiushi and Cui, Miaomiao and Bo, Liefeng}, title = {Towards Fine-grained Interactive Segmentation in Images and Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22509-22518} }
FaceXFormer: A Unified Transformer for Facial Analysis: Kartik Narayan,

Vibashan VS,

Rama Chellappa,

Vishal M. Patel; [pdf] [arXiv]
[bibtex]
@InProceedings{Narayan_2025_ICCV, author = {Narayan, Kartik and VS, Vibashan and Chellappa, Rama and Patel, Vishal M.}, title = {FaceXFormer: A Unified Transformer for Facial Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11369-11382} }
Puzzle Similarity: A Perceptually-guided Cross-Reference Metric for Artifact Detection in 3D Scene Reconstructions: Nicolai Hermann,

Jorge Condor,

Piotr Didyk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hermann_2025_ICCV, author = {Hermann, Nicolai and Condor, Jorge and Didyk, Piotr}, title = {Puzzle Similarity: A Perceptually-guided Cross-Reference Metric for Artifact Detection in 3D Scene Reconstructions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28881-28891} }
InstantEdit: Text-Guided Few-Step Image Editing with Piecewise Rectified Flow: Yiming Gong,

Zhen Zhu,

Minjia Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2025_ICCV, author = {Gong, Yiming and Zhu, Zhen and Zhang, Minjia}, title = {InstantEdit: Text-Guided Few-Step Image Editing with Piecewise Rectified Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16808-16817} }
The Source Image is the Best Attention for Infrared and Visible Image Fusion: Song Wang,

Xie Han,

Liqun Kuang,

Boying Wang,

Zhongyu Chen,

Zherui Qiao,

Fan Yang,

Xiaoxia Liu,

Bingyu Zhang,

Zhixun Wang; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Song and Han, Xie and Kuang, Liqun and Wang, Boying and Chen, Zhongyu and Qiao, Zherui and Yang, Fan and Liu, Xiaoxia and Zhang, Bingyu and Wang, Zhixun}, title = {The Source Image is the Best Attention for Infrared and Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13513-13522} }
Princeton365: A Diverse Dataset with Accurate Camera Pose: Karhan Kayan,

Stamatis Alexandropoulos,

Rishabh Jain,

Yiming Zuo,

Erich Liang,

Jia Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kayan_2025_ICCV, author = {Kayan, Karhan and Alexandropoulos, Stamatis and Jain, Rishabh and Zuo, Yiming and Liang, Erich and Deng, Jia}, title = {Princeton365: A Diverse Dataset with Accurate Camera Pose}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7645-7654} }
Towards Immersive Human-X Interaction: A Real-Time Framework for Physically Plausible Motion Synthesis: Kaiyang Ji,

Ye Shi,

Zichen Jin,

Kangyi Chen,

Lan Xu,

Yuexin Ma,

Jingyi Yu,

Jingya Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Kaiyang and Shi, Ye and Jin, Zichen and Chen, Kangyi and Xu, Lan and Ma, Yuexin and Yu, Jingyi and Wang, Jingya}, title = {Towards Immersive Human-X Interaction: A Real-Time Framework for Physically Plausible Motion Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10173-10183} }
Generative Video Bi-flow: Chen Liu,

Tobias Ritschel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Chen and Ritschel, Tobias}, title = {Generative Video Bi-flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19363-19372} }
AlignDiff: Learning Physically-Grounded Camera Alignment via Diffusion: Liuyue Xie,

Jiancong Guo,

Ozan Cakmakci,

Andre Araujo,

László A. Jeni,

Zhiheng Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Liuyue and Guo, Jiancong and Cakmakci, Ozan and Araujo, Andre and Jeni, L\'aszl\'o A. and Jia, Zhiheng}, title = {AlignDiff: Learning Physically-Grounded Camera Alignment via Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26901-26911} }
EFTViT: Efficient Federated Training of Vision Transformers with Masked Images on Resource-Constrained Clients: Meihan Wu,

Tao Chang,

Cui Miao,

Jie Zhou,

Chun Li,

Xiangyu Xu,

Ming Li,

Xiaodong Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Meihan and Chang, Tao and Miao, Cui and Zhou, Jie and Li, Chun and Xu, Xiangyu and Li, Ming and Wang, Xiaodong}, title = {EFTViT: Efficient Federated Training of Vision Transformers with Masked Images on Resource-Constrained Clients}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1815-1824} }
A Structure-aware and Motion-adaptive Framework for 3D Human Pose Estimation with Mamba: Ye Lu,

Jie Wang,

Jianjun Gao,

Rui Gong,

Chen Cai,

Kim-Hui Yap; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Ye and Wang, Jie and Gao, Jianjun and Gong, Rui and Cai, Chen and Yap, Kim-Hui}, title = {A Structure-aware and Motion-adaptive Framework for 3D Human Pose Estimation with Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7958-7968} }
Toward Fair and Accurate Cross-Domain Medical Image Segmentation: A VLM-Driven Active Domain Adaptation Paradigm: Hongqiu Wang,

Wu Chen,

Xiangde Luo,

Zhaohu Xing,

Lihao Liu,

Jing Qin,

Shaozhi Wu,

Lei Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Hongqiu and Chen, Wu and Luo, Xiangde and Xing, Zhaohu and Liu, Lihao and Qin, Jing and Wu, Shaozhi and Zhu, Lei}, title = {Toward Fair and Accurate Cross-Domain Medical Image Segmentation: A VLM-Driven Active Domain Adaptation Paradigm}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24102-24112} }
Scaling Transformer-Based Novel View Synthesis with Models Token Disentanglement and Synthetic Data: Nithin Gopalakrishnan Nair,

Srinivas Kaza,

Xuan Luo,

Vishal M. Patel,

Stephen Lombardi,

Jungyeon Park; [pdf] [supp]
[bibtex]
@InProceedings{Nair_2025_ICCV, author = {Nair, Nithin Gopalakrishnan and Kaza, Srinivas and Luo, Xuan and Patel, Vishal M. and Lombardi, Stephen and Park, Jungyeon}, title = {Scaling Transformer-Based Novel View Synthesis with Models Token Disentanglement and Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28567-28576} }
FED-PsyAU: Privacy-Preserving Micro-Expression Recognition via Psychological AU Coordination and Dynamic Facial Motion Modeling: Jingting Li,

Yu Qian,

Lin Zhao,

Su-Jing Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jingting and Qian, Yu and Zhao, Lin and Wang, Su-Jing}, title = {FED-PsyAU: Privacy-Preserving Micro-Expression Recognition via Psychological AU Coordination and Dynamic Facial Motion Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14453-14463} }
SAFER: Sharpness Aware layer-selective Finetuning for Enhanced Robustness in vision transformers: Bhavna Gopal,

Huanrui Yang,

Mark Horton,

Yiran Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gopal_2025_ICCV, author = {Gopal, Bhavna and Yang, Huanrui and Horton, Mark and Chen, Yiran}, title = {SAFER: Sharpness Aware layer-selective Finetuning for Enhanced Robustness in vision transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3999-4008} }
ClearSight: Human Vision-Inspired Solutions for Event-Based Motion Deblurring: Xiaopeng Lin,

Yulong Huang,

Hongwei Ren,

Zunchang Liu,

Hongxiang Huang,

Yue Zhou,

Haotian Fu,

Bojun Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Xiaopeng and Huang, Yulong and Ren, Hongwei and Liu, Zunchang and Huang, Hongxiang and Zhou, Yue and Fu, Haotian and Cheng, Bojun}, title = {ClearSight: Human Vision-Inspired Solutions for Event-Based Motion Deblurring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7462-7471} }
Event-aided Dense and Continuous Point Tracking: Everywhere and Anytime: Zhexiong Wan,

Jianqin Luo,

Yuchao Dai,

Gim Hee Lee; [pdf] [supp]
[bibtex]
@InProceedings{Wan_2025_ICCV, author = {Wan, Zhexiong and Luo, Jianqin and Dai, Yuchao and Lee, Gim Hee}, title = {Event-aided Dense and Continuous Point Tracking: Everywhere and Anytime}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7936-7946} }
Stable-Sim2Real: Exploring Simulation of Real-Captured 3D Data with Two-Stage Depth Diffusion: Mutian Xu,

Chongjie Ye,

Haolin Liu,

Yushuang Wu,

Jiahao Chang,

Xiaoguang Han; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Mutian and Ye, Chongjie and Liu, Haolin and Wu, Yushuang and Chang, Jiahao and Han, Xiaoguang}, title = {Stable-Sim2Real: Exploring Simulation of Real-Captured 3D Data with Two-Stage Depth Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2609-2619} }
Towards Real Unsupervised Anomaly Detection Via Confident Meta-Learning: Muhammad Aqeel,

Shakiba Sharifi,

Marco Cristani,

Francesco Setti; [pdf] [arXiv]
[bibtex]
@InProceedings{Aqeel_2025_ICCV, author = {Aqeel, Muhammad and Sharifi, Shakiba and Cristani, Marco and Setti, Francesco}, title = {Towards Real Unsupervised Anomaly Detection Via Confident Meta-Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4858-4867} }
Multi-Schema Proximity Network for Composed Image Retrieval: Jiangming Shi,

Xiangbo Yin,

Yeyun Chen,

Yachao Zhang,

Zhizhong Zhang,

Yuan Xie,

Yanyun Qu; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Jiangming and Yin, Xiangbo and Chen, Yeyun and Zhang, Yachao and Zhang, Zhizhong and Xie, Yuan and Qu, Yanyun}, title = {Multi-Schema Proximity Network for Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19999-20008} }
Unified Video Generation via Next-Set Prediction in Continuous Domain: Zhanzhou Feng,

Qingpei Guo,

Xinyu Xiao,

Ruihan Xu,

Ming Yang,

Shiliang Zhang; [pdf]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Zhanzhou and Guo, Qingpei and Xiao, Xinyu and Xu, Ruihan and Yang, Ming and Zhang, Shiliang}, title = {Unified Video Generation via Next-Set Prediction in Continuous Domain}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19427-19438} }
Latent-Reframe: Enabling Camera Control for Video Diffusion Models without Training: Zhenghong Zhou,

Jie An,

Jiebo Luo; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zhenghong and An, Jie and Luo, Jiebo}, title = {Latent-Reframe: Enabling Camera Control for Video Diffusion Models without Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12779-12789} }
EditCLIP: Representation Learning for Image Editing: Qian Wang,

Aleksandar Cvejić,

Abdelrahman Eldesokey,

Peter Wonka; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Qian and Cveji\'c, Aleksandar and Eldesokey, Abdelrahman and Wonka, Peter}, title = {EditCLIP: Representation Learning for Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15960-15970} }
Efficient Event Camera Data Pretraining with Adaptive Prompt Fusion: Quanmin Liang,

Qiang Li,

Shuai Liu,

Xinzi Cao,

Jinyi Lu,

Feidiao Yang,

Wei Zhang,

Kai Huang,

Yonghong Tian; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Quanmin and Li, Qiang and Liu, Shuai and Cao, Xinzi and Lu, Jinyi and Yang, Feidiao and Zhang, Wei and Huang, Kai and Tian, Yonghong}, title = {Efficient Event Camera Data Pretraining with Adaptive Prompt Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8656-8667} }
Tiling artifacts and trade-offs of feature normalization in the segmentation of large biological images: Elena Buglakova,

Anwai Archit,

Edoardo D'Imprima,

Julia Mahamid,

Constantin Pape,

Anna Kreshuk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Buglakova_2025_ICCV, author = {Buglakova, Elena and Archit, Anwai and D'Imprima, Edoardo and Mahamid, Julia and Pape, Constantin and Kreshuk, Anna}, title = {Tiling artifacts and trade-offs of feature normalization in the segmentation of large biological images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13109-13118} }
Harnessing Text-to-Image Diffusion Models for Point Cloud Self-Supervised Learning: Yiyang Chen,

Shanshan Zhao,

Lunhao Duan,

Changxing Ding,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yiyang and Zhao, Shanshan and Duan, Lunhao and Ding, Changxing and Tao, Dacheng}, title = {Harnessing Text-to-Image Diffusion Models for Point Cloud Self-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26156-26166} }
Bridging the Gap between Brain and Machine in Interpreting Visual Semantics: Towards Self-adaptive Brain-to-Text Decoding: Jiaxuan Chen,

Yu Qi,

Yueming Wang,

Gang Pan; [pdf]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jiaxuan and Qi, Yu and Wang, Yueming and Pan, Gang}, title = {Bridging the Gap between Brain and Machine in Interpreting Visual Semantics: Towards Self-adaptive Brain-to-Text Decoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21938-21948} }
Doppler-Aware LiDAR-RADAR Fusion for Weather-Robust 3D Detection: Yujeong Chae,

Heejun Park,

Hyeonseong Kim,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Chae_2025_ICCV, author = {Chae, Yujeong and Park, Heejun and Kim, Hyeonseong and Yoon, Kuk-Jin}, title = {Doppler-Aware LiDAR-RADAR Fusion for Weather-Robust 3D Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27197-27208} }
RoboAnnotatorX: A Comprehensive and Universal Annotation Framework for Accurate Understanding of Long-horizon Robot Demonstration: Longxin Kou,

Fei Ni,

Yan Zheng,

Peilong Han,

Jinyi Liu,

Haiqin Cui,

Rui Liu,

Jianye Hao; [pdf] [supp]
[bibtex]
@InProceedings{Kou_2025_ICCV, author = {Kou, Longxin and Ni, Fei and Zheng, Yan and Han, Peilong and Liu, Jinyi and Cui, Haiqin and Liu, Rui and Hao, Jianye}, title = {RoboAnnotatorX: A Comprehensive and Universal Annotation Framework for Accurate Understanding of Long-horizon Robot Demonstration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10353-10363} }
On-Device Diffusion Transformer Policy for Efficient Robot Manipulation: Yiming Wu,

Huan Wang,

Zhenghao Chen,

Jianxin Pang,

Dong Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Yiming and Wang, Huan and Chen, Zhenghao and Pang, Jianxin and Xu, Dong}, title = {On-Device Diffusion Transformer Policy for Efficient Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14073-14083} }
Unsupervised Part Discovery via Descriptor-Based Masked Image Restoration with Optimized Constraints: Jiahao Xia,

Yike Wu,

Wenjian Huang,

Jianguo Zhang,

Jian Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_ICCV, author = {Xia, Jiahao and Wu, Yike and Huang, Wenjian and Zhang, Jianguo and Zhang, Jian}, title = {Unsupervised Part Discovery via Descriptor-Based Masked Image Restoration with Optimized Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8668-8677} }
VideoOrion: Tokenizing Object Dynamics in Videos: Yicheng Feng,

Yijiang Li,

Wanpeng Zhang,

Sipeng Zheng,

Hao Luo,

Zihao Yue,

Zongqing Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Yicheng and Li, Yijiang and Zhang, Wanpeng and Zheng, Sipeng and Luo, Hao and Yue, Zihao and Lu, Zongqing}, title = {VideoOrion: Tokenizing Object Dynamics in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20401-20412} }
Training-Free Text-Guided Image Editing with Visual Autoregressive Model: Yufei Wang,

Lanqing Guo,

Zhihao Li,

Jiaxing Huang,

Pichao Wang,

Bihan Wen,

Jian Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yufei and Guo, Lanqing and Li, Zhihao and Huang, Jiaxing and Wang, Pichao and Wen, Bihan and Wang, Jian}, title = {Training-Free Text-Guided Image Editing with Visual Autoregressive Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17577-17586} }
Cracking Instance Jigsaw Puzzles: An Alternative to Multiple Instance Learning for Whole Slide Image Analysis: Xiwen Chen,

Peijie Qiu,

Wenhui Zhu,

Hao Wang,

Huayu Li,

Xuanzhao Dong,

Xiaotong Sun,

Xiaobing Yu,

Yalin Wang,

Abolfazl Razi,

Aristeidis Sotiras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Xiwen and Qiu, Peijie and Zhu, Wenhui and Wang, Hao and Li, Huayu and Dong, Xuanzhao and Sun, Xiaotong and Yu, Xiaobing and Wang, Yalin and Razi, Abolfazl and Sotiras, Aristeidis}, title = {Cracking Instance Jigsaw Puzzles: An Alternative to Multiple Instance Learning for Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21353-21363} }
SPD: Shallow Backdoor Protecting Deep Backdoor Against Backdoor Detection: Shunjie Yuan,

Xinghua Li,

Xuelin Cao,

Haiyan Zhang,

Mengyao Zhu,

Robert H. Deng; [pdf] [supp]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Shunjie and Li, Xinghua and Cao, Xuelin and Zhang, Haiyan and Zhu, Mengyao and Deng, Robert H.}, title = {SPD: Shallow Backdoor Protecting Deep Backdoor Against Backdoor Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4029-4038} }
RealGeneral: Unifying Visual Generation via Temporal In-Context Learning with Video Models: Yijing Lin,

Mengqi Huang,

Shuhan Zhuang,

Zhendong Mao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Yijing and Huang, Mengqi and Zhuang, Shuhan and Mao, Zhendong}, title = {RealGeneral: Unifying Visual Generation via Temporal In-Context Learning with Video Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14994-15004} }
Mamba-3VL: Taming State Space Model for 3D Vision Language Learning: Yuan Wang,

Yuxin Chen,

Zhongang Qi,

Lijun Liu,

Jile Jiao,

Xuetao Feng,

Yujia Liang,

Ying Shan,

Zhipeng Zhang; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuan and Chen, Yuxin and Qi, Zhongang and Liu, Lijun and Jiao, Jile and Feng, Xuetao and Liang, Yujia and Shan, Ying and Zhang, Zhipeng}, title = {Mamba-3VL: Taming State Space Model for 3D Vision Language Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6273-6283} }
Fine-structure Preserved Real-world Image Super-resolution via Transfer VAE Training: Qiaosi Yi,

Shuai Li,

Rongyuan Wu,

Lingchen Sun,

Yuhui Wu,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yi_2025_ICCV, author = {Yi, Qiaosi and Li, Shuai and Wu, Rongyuan and Sun, Lingchen and Wu, Yuhui and Zhang, Lei}, title = {Fine-structure Preserved Real-world Image Super-resolution via Transfer VAE Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12415-12426} }
FlowR: Flowing from Sparse to Dense 3D Reconstructions: Tobias Fischer,

Samuel Rota Bulò,

Yung-Hsu Yang,

Nikhil Keetha,

Lorenzo Porzi,

Norman Müller,

Katja Schwarz,

Jonathon Luiten,

Marc Pollefeys,

Peter Kontschieder; [pdf] [supp]
[bibtex]
@InProceedings{Fischer_2025_ICCV, author = {Fischer, Tobias and Bul\`o, Samuel Rota and Yang, Yung-Hsu and Keetha, Nikhil and Porzi, Lorenzo and M\"uller, Norman and Schwarz, Katja and Luiten, Jonathon and Pollefeys, Marc and Kontschieder, Peter}, title = {FlowR: Flowing from Sparse to Dense 3D Reconstructions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27702-27712} }
Augmented Mass-Spring Model for Real-Time Dense Hair Simulation: J. Alejandro Amador H.,

Yi Zhou,

Xin Sun,

Zhixin Shu,

Chengan He,

Soren Pirk,

Dominik L. Michels; [pdf] [supp]
[bibtex]
@InProceedings{H._2025_ICCV, author = {H., J. Alejandro Amador and Zhou, Yi and Sun, Xin and Shu, Zhixin and He, Chengan and Pirk, Soren and Michels, Dominik L.}, title = {Augmented Mass-Spring Model for Real-Time Dense Hair Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11339-11347} }
IQA-Adapter: Exploring Knowledge Transfer from Image Quality Assessment to Diffusion-based Generative Models: Khaled Abud,

Sergey Lavrushkin,

Alexey Kirillov,

Dmitriy Vatolin; [pdf] [supp]
[bibtex]
@InProceedings{Abud_2025_ICCV, author = {Abud, Khaled and Lavrushkin, Sergey and Kirillov, Alexey and Vatolin, Dmitriy}, title = {IQA-Adapter: Exploring Knowledge Transfer from Image Quality Assessment to Diffusion-based Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15469-15480} }
SPA: Efficient User-Preference Alignment against Uncertainty in Medical Image Segmentation: Jiayuan Zhu,

Junde Wu,

Cheng Ouyang,

Konstantinos Kamnitsas,

J. Alison Noble; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jiayuan and Wu, Junde and Ouyang, Cheng and Kamnitsas, Konstantinos and Noble, J. Alison}, title = {SPA: Efficient User-Preference Alignment against Uncertainty in Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23731-23740} }
GaussianReg: Rapid 2D/3D Registration for Emergency Surgery via Explicit 3D Modeling with Gaussian Primitives: Weihao Yu,

Xiaoqing Guo,

Xinyu Liu,

Yifan Liu,

Hao Zheng,

Yawen Huang,

Yixuan Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Weihao and Guo, Xiaoqing and Liu, Xinyu and Liu, Yifan and Zheng, Hao and Huang, Yawen and Yuan, Yixuan}, title = {GaussianReg: Rapid 2D/3D Registration for Emergency Surgery via Explicit 3D Modeling with Gaussian Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21482-21491} }
Robustifying Zero-Shot Vision Language Models by Subspaces Alignment: Junhao Dong,

Piotr Koniusz,

Liaoyuan Feng,

Yifei Zhang,

Hao Zhu,

Weiming Liu,

Xinghua Qu,

Yew-Soon Ong; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Junhao and Koniusz, Piotr and Feng, Liaoyuan and Zhang, Yifei and Zhu, Hao and Liu, Weiming and Qu, Xinghua and Ong, Yew-Soon}, title = {Robustifying Zero-Shot Vision Language Models by Subspaces Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21037-21047} }
Performing Defocus Deblurring by Modeling its Formation Process: Zhengbo Zhang,

Lin Geng Foo,

Hossein Rahmani,

Jun Liu,

De Wen Soh; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zhengbo and Foo, Lin Geng and Rahmani, Hossein and Liu, Jun and Soh, De Wen}, title = {Performing Defocus Deblurring by Modeling its Formation Process}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5791-5801} }
Towards Effective Foundation Model Adaptation for Extreme Cross-Domain Few-Shot Learning: Fei Zhou,

Peng Wang,

Lei Zhang,

Wei Wei,

Chen Ding,

Guosheng Lin,

Yanning Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Fei and Wang, Peng and Zhang, Lei and Wei, Wei and Ding, Chen and Lin, Guosheng and Zhang, Yanning}, title = {Towards Effective Foundation Model Adaptation for Extreme Cross-Domain Few-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4582-4593} }
GaRe: Relightable 3D Gaussian Splatting for Outdoor Scenes from Unconstrained Photo Collections: Haiyang Bai,

Jiaqi Zhu,

Songru Jiang,

Wei Huang,

Tao Lu,

Yuanqi Li,

Jie Guo,

Runze Fu,

Yanwen Guo,

Lijun Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2025_ICCV, author = {Bai, Haiyang and Zhu, Jiaqi and Jiang, Songru and Huang, Wei and Lu, Tao and Li, Yuanqi and Guo, Jie and Fu, Runze and Guo, Yanwen and Chen, Lijun}, title = {GaRe: Relightable 3D Gaussian Splatting for Outdoor Scenes from Unconstrained Photo Collections}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26456-26465} }
LoD-Loc v2: Aerial Visual Localization over Low Level-of-Detail City Models using Explicit Silhouette Alignment: Juelin Zhu,

Shuaibang Peng,

Long Wang,

Hanlin Tan,

Yu Liu,

Maojun Zhang,

Shen Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Juelin and Peng, Shuaibang and Wang, Long and Tan, Hanlin and Liu, Yu and Zhang, Maojun and Yan, Shen}, title = {LoD-Loc v2: Aerial Visual Localization over Low Level-of-Detail City Models using Explicit Silhouette Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26610-26621} }
RomanTex: Decoupling 3D-aware Rotary Positional Embedded Multi-Attention Network for Texture Synthesis: Yifei Feng,

Mingxin Yang,

Shuhui Yang,

Sheng Zhang,

Jiaao Yu,

Zibo Zhao,

Yuhong Liu,

Jie Jiang,

Chunchao Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Yifei and Yang, Mingxin and Yang, Shuhui and Zhang, Sheng and Yu, Jiaao and Zhao, Zibo and Liu, Yuhong and Jiang, Jie and Guo, Chunchao}, title = {RomanTex: Decoupling 3D-aware Rotary Positional Embedded Multi-Attention Network for Texture Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17203-17213} }
Phantom: Subject-Consistent Video Generation via Cross-Modal Alignment: Lijie Liu,

Tianxiang Ma,

Bingchuan Li,

Zhuowei Chen,

Jiawei Liu,

Gen Li,

Siyu Zhou,

Qian He,

Xinglong Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Lijie and Ma, Tianxiang and Li, Bingchuan and Chen, Zhuowei and Liu, Jiawei and Li, Gen and Zhou, Siyu and He, Qian and Wu, Xinglong}, title = {Phantom: Subject-Consistent Video Generation via Cross-Modal Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14951-14961} }
EYE3:Turn Anything into Naked-eye 3D: Yingde Song,

Zongyuan Yang,

Baolin Liu,

Yongping Xiong,

Sai Chen,

Lan Yi,

Zhaohe Zhang,

Xunbo Yu; [pdf] [supp]
[bibtex]
@InProceedings{Song_2025_ICCV, author = {Song, Yingde and Yang, Zongyuan and Liu, Baolin and Xiong, Yongping and Chen, Sai and Yi, Lan and Zhang, Zhaohe and Yu, Xunbo}, title = {EYE3:Turn Anything into Naked-eye 3D}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27862-27871} }
Boosting Class Representation via Semantically Related Instances for Robust Long-Tailed Learning with Noisy Labels: Yuhang Li,

Zhuying Li,

Yuheng Jia; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yuhang and Li, Zhuying and Jia, Yuheng}, title = {Boosting Class Representation via Semantically Related Instances for Robust Long-Tailed Learning with Noisy Labels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1516-1525} }
You Are Your Own Best Teacher: Achieving Centralized-level Performance in Federated Learning under Heterogeneous and Long-tailed Data: Shanshan Yan,

Zexi Li,

Chao Wu,

Meng Pang,

Yang Lu,

Yan Yan,

Hanzi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Shanshan and Li, Zexi and Wu, Chao and Pang, Meng and Lu, Yang and Yan, Yan and Wang, Hanzi}, title = {You Are Your Own Best Teacher: Achieving Centralized-level Performance in Federated Learning under Heterogeneous and Long-tailed Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2750-2759} }
IFAdapter: Instance Feature Control for Grounded Text-to-Image Generation: Yinwei Wu,

Xianpan Zhou,

Bing Ma,

Xuefeng Su,

Kai Ma,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Yinwei and Zhou, Xianpan and Ma, Bing and Su, Xuefeng and Ma, Kai and Wang, Xinchao}, title = {IFAdapter: Instance Feature Control for Grounded Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15949-15959} }
StableDepth: Scene-Consistent and Scale-Invariant Monocular Depth: Zheng Zhang,

Lihe Yang,

Tianyu Yang,

Chaohui Yu,

Xiaoyang Guo,

Yixing Lao,

Hengshuang Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zheng and Yang, Lihe and Yang, Tianyu and Yu, Chaohui and Guo, Xiaoyang and Lao, Yixing and Zhao, Hengshuang}, title = {StableDepth: Scene-Consistent and Scale-Invariant Monocular Depth}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7069-7078} }
LV-MAE: Learning Long Video Representations through Masked-Embedding Autoencoders: Ilan Naiman,

Emanuel Ben-Baruch,

Oron Anschel,

Alon Shoshan,

Igor Kviatkovsky,

Manoj Aggarwal,

Gerard Medioni; [pdf] [supp]
[bibtex]
@InProceedings{Naiman_2025_ICCV, author = {Naiman, Ilan and Ben-Baruch, Emanuel and Anschel, Oron and Shoshan, Alon and Kviatkovsky, Igor and Aggarwal, Manoj and Medioni, Gerard}, title = {LV-MAE: Learning Long Video Representations through Masked-Embedding Autoencoders}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21398-21407} }
Personalized Federated Learning under Local Supervision: Qiqi Liu,

Jiaqiang Li,

Yuchen Liu,

Yaochu Jin,

Lingjuan Lyu,

Xiaohu Wu,

Han Yu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Qiqi and Li, Jiaqiang and Liu, Yuchen and Jin, Yaochu and Lyu, Lingjuan and Wu, Xiaohu and Yu, Han}, title = {Personalized Federated Learning under Local Supervision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4069-4079} }
STDDNet: Harnessing Mamba for Video Polyp Segmentation via Spatial-aligned Temporal Modeling and Discriminative Dynamic Representation Learning: Guilian Chen,

Huisi Wu,

Jing Qin; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Guilian and Wu, Huisi and Qin, Jing}, title = {STDDNet: Harnessing Mamba for Video Polyp Segmentation via Spatial-aligned Temporal Modeling and Discriminative Dynamic Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21364-21373} }
DIMO: Diverse 3D Motion Generation for Arbitrary Objects: Linzhan Mou,

Jiahui Lei,

Chen Wang,

Lingjie Liu,

Kostas Daniilidis; [pdf] [supp]
[bibtex]
@InProceedings{Mou_2025_ICCV, author = {Mou, Linzhan and Lei, Jiahui and Wang, Chen and Liu, Lingjie and Daniilidis, Kostas}, title = {DIMO: Diverse 3D Motion Generation for Arbitrary Objects}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14357-14368} }
Enhancing Spatial Reasoning in Multimodal Large Language Models through Reasoning-based Segmentation: Zhenhua Ning,

Zhuotao Tian,

Shaoshuai Shi,

Guangming Lu,

Daojing He,

Wenjie Pei,

Li Jiang; [pdf] [arXiv]
[bibtex]
@InProceedings{Ning_2025_ICCV, author = {Ning, Zhenhua and Tian, Zhuotao and Shi, Shaoshuai and Lu, Guangming and He, Daojing and Pei, Wenjie and Jiang, Li}, title = {Enhancing Spatial Reasoning in Multimodal Large Language Models through Reasoning-based Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7851-7860} }
MMAD: Multi-label Micro-Action Detection in Videos: Kun Li,

Pengyu Liu,

Dan Guo,

Fei Wang,

Zhiliang Wu,

Hehe Fan,

Meng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Kun and Liu, Pengyu and Guo, Dan and Wang, Fei and Wu, Zhiliang and Fan, Hehe and Wang, Meng}, title = {MMAD: Multi-label Micro-Action Detection in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13225-13236} }
Human-in-the-Loop Local Corrections of 3D Scene Layouts via Infilling: Christopher Xie,

Armen Avetisyan,

Henry Howard-Jenkins,

Yawar Siddiqui,

Julian Straub,

Richard Newcombe,

Vasileios Balntas,

Jakob Engel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Christopher and Avetisyan, Armen and Howard-Jenkins, Henry and Siddiqui, Yawar and Straub, Julian and Newcombe, Richard and Balntas, Vasileios and Engel, Jakob}, title = {Human-in-the-Loop Local Corrections of 3D Scene Layouts via Infilling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5657-5666} }
2D Gaussian Splatting-based Sparse-view Transparent Object Depth Reconstruction via Physics Simulation for Scene Update: Jeongyun Kim,

Seunghoon Jeong,

Giseop Kim,

Myung-Hwan Jeon,

Eunji Jun,

Ayoung Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jeongyun and Jeong, Seunghoon and Kim, Giseop and Jeon, Myung-Hwan and Jun, Eunji and Kim, Ayoung}, title = {2D Gaussian Splatting-based Sparse-view Transparent Object Depth Reconstruction via Physics Simulation for Scene Update}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27927-27936} }
HouseCrafter: Lifting Floorplans to 3D Scenes with 2D Diffusion Models: Yiwen Chen,

Hieu T. Nguyen,

Vikram Voleti,

Varun Jampani,

Huaizu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yiwen and Nguyen, Hieu T. and Voleti, Vikram and Jampani, Varun and Jiang, Huaizu}, title = {HouseCrafter: Lifting Floorplans to 3D Scenes with 2D Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28440-28450} }
Fast Globally Optimal and Geometrically Consistent 3D Shape Matching: Paul Roetzer,

Florian Bernard; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Roetzer_2025_ICCV, author = {Roetzer, Paul and Bernard, Florian}, title = {Fast Globally Optimal and Geometrically Consistent 3D Shape Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {912-922} }
Guiding Diffusion Models with Adaptive Negative Sampling Without External Resources: Alakh Desai,

Nuno Vasconcelos; [pdf] [supp]
[bibtex]
@InProceedings{Desai_2025_ICCV, author = {Desai, Alakh and Vasconcelos, Nuno}, title = {Guiding Diffusion Models with Adaptive Negative Sampling Without External Resources}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16122-16131} }
AlignGuard: Scalable Safety Alignment for Text-to-Image Generation: Runtao Liu,

I Chieh Chen,

Jindong Gu,

Jipeng Zhang,

Renjie Pi,

Qifeng Chen,

Philip Torr,

Ashkan Khakzar,

Fabio Pizzati; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Runtao and Chen, I Chieh and Gu, Jindong and Zhang, Jipeng and Pi, Renjie and Chen, Qifeng and Torr, Philip and Khakzar, Ashkan and Pizzati, Fabio}, title = {AlignGuard: Scalable Safety Alignment for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17024-17034} }
Function-centric Bayesian Network for Zero-Shot Object Goal Navigation: Sixian Zhang,

Xinyao Yu,

Xinhang Song,

Yiyao Wang,

Shuqiang Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Sixian and Yu, Xinyao and Song, Xinhang and Wang, Yiyao and Jiang, Shuqiang}, title = {Function-centric Bayesian Network for Zero-Shot Object Goal Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19535-19545} }
Preserve Anything: Controllable Image Synthesis with Object Preservation: Prasen Kumar Sharma,

Neeraj Matiyali,

Siddharth Srivastava,

Gaurav Sharma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sharma_2025_ICCV, author = {Sharma, Prasen Kumar and Matiyali, Neeraj and Srivastava, Siddharth and Sharma, Gaurav}, title = {Preserve Anything: Controllable Image Synthesis with Object Preservation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18058-18067} }
Leveraging Local Patch Alignment to Seam-cutting for Large Parallax Image Stitching: Tianli Liao,

Chenyang Zhao,

Lei Li,

Heling Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Tianli and Zhao, Chenyang and Li, Lei and Cao, Heling}, title = {Leveraging Local Patch Alignment to Seam-cutting for Large Parallax Image Stitching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27262-27271} }
Revisiting Efficient Semantic Segmentation: Learning Offsets for Better Spatial and Class Feature Alignment: Shi-Chen Zhang,

Yunheng Li,

Yu-Huan Wu,

Qibin Hou,

Ming-Ming Cheng; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shi-Chen and Li, Yunheng and Wu, Yu-Huan and Hou, Qibin and Cheng, Ming-Ming}, title = {Revisiting Efficient Semantic Segmentation: Learning Offsets for Better Spatial and Class Feature Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22361-22371} }
GIViC: Generative Implicit Video Compression: Ge Gao,

Siyue Teng,

Tianhao Peng,

Fan Zhang,

David Bull; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Ge and Teng, Siyue and Peng, Tianhao and Zhang, Fan and Bull, David}, title = {GIViC: Generative Implicit Video Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17356-17367} }
TAD-E2E: A Large-scale End-to-end Autonomous Driving Dataset: Chang Liu,

Mingxu Zhu,

Zheyuan Zhang,

Linna Song,

Xiao Zhao,

Qingliang Luo,

Qi Wang,

Chufan Guo,

Kuifeng Su; [pdf]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Chang and Zhu, Mingxu and Zhang, Zheyuan and Song, Linna and Zhao, Xiao and Luo, Qingliang and Wang, Qi and Guo, Chufan and Su, Kuifeng}, title = {TAD-E2E: A Large-scale End-to-end Autonomous Driving Dataset}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26600-26609} }
Rethinking Bimanual Robotic Manipulation: Learning with Decoupled Interaction Framework: Jian-Jian Jiang,

Xiao-Ming Wu,

Yi-Xiang He,

Ling-An Zeng,

Yi-Lin Wei,

Dandan Zhang,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Jian-Jian and Wu, Xiao-Ming and He, Yi-Xiang and Zeng, Ling-An and Wei, Yi-Lin and Zhang, Dandan and Zheng, Wei-Shi}, title = {Rethinking Bimanual Robotic Manipulation: Learning with Decoupled Interaction Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12427-12437} }
LVAgent: Long Video Understanding by Multi-Round Dynamical Collaboration of MLLM Agents: Boyu Chen,

Zhengrong Yue,

Siran Chen,

Zikang Wang,

Yang Liu,

Peng Li,

Yali Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Boyu and Yue, Zhengrong and Chen, Siran and Wang, Zikang and Liu, Yang and Li, Peng and Wang, Yali}, title = {LVAgent: Long Video Understanding by Multi-Round Dynamical Collaboration of MLLM Agents}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20237-20246} }
Avat3r: Large Animatable Gaussian Reconstruction Model for High-fidelity 3D Head Avatars: Tobias Kirschstein,

Javier Romero,

Artem Sevastopolsky,

Matthias Nießner,

Shunsuke Saito; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kirschstein_2025_ICCV, author = {Kirschstein, Tobias and Romero, Javier and Sevastopolsky, Artem and Nie{\ss}ner, Matthias and Saito, Shunsuke}, title = {Avat3r: Large Animatable Gaussian Reconstruction Model for High-fidelity 3D Head Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12089-12100} }
EVER: Exact Volumetric Ellipsoid Rendering for Real-time View Synthesis: Alexander Mai,

Peter Hedman,

George Kopanas,

Dor Verbin,

David Futschik,

Qiangeng Xu,

Falko Kuester,

Jonathan T. Barron,

Yinda Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mai_2025_ICCV, author = {Mai, Alexander and Hedman, Peter and Kopanas, George and Verbin, Dor and Futschik, David and Xu, Qiangeng and Kuester, Falko and Barron, Jonathan T. and Zhang, Yinda}, title = {EVER: Exact Volumetric Ellipsoid Rendering for Real-time View Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4930-4939} }
Democratizing High-Fidelity Co-Speech Gesture Video Generation: Xu Yang,

Shaoli Huang,

Shenbo Xie,

Xuelin Chen,

Yifei Liu,

Changxing Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Xu and Huang, Shaoli and Xie, Shenbo and Chen, Xuelin and Liu, Yifei and Ding, Changxing}, title = {Democratizing High-Fidelity Co-Speech Gesture Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14283-14292} }
Dissecting Generalized Category Discovery: Multiplex Consensus under Self-Deconstruction: Luyao Tang,

Kunze Huang,

Chaoqi Chen,

Yuxuan Yuan,

Chenxin Li,

Xiaotong Tu,

Xinghao Ding,

Yue Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Tang_2025_ICCV, author = {Tang, Luyao and Huang, Kunze and Chen, Chaoqi and Yuan, Yuxuan and Li, Chenxin and Tu, Xiaotong and Ding, Xinghao and Huang, Yue}, title = {Dissecting Generalized Category Discovery: Multiplex Consensus under Self-Deconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {297-307} }
DiffIP: Representation Fingerprints for Robust IP Protection of Diffusion Models: Zhuoling Li,

Haoxuan Qu,

Jason Kuen,

Jiuxiang Gu,

Qiuhong Ke,

Jun Liu,

Hossein Rahmani; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhuoling and Qu, Haoxuan and Kuen, Jason and Gu, Jiuxiang and Ke, Qiuhong and Liu, Jun and Rahmani, Hossein}, title = {DiffIP: Representation Fingerprints for Robust IP Protection of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17035-17045} }
Divide-and-Conquer for Enhancing Unlabeled Learning, Stability, and Plasticity in Semi-supervised Continual Learning: Yue Duan,

Taicai Chen,

Lei Qi,

Yinghuan Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duan_2025_ICCV, author = {Duan, Yue and Chen, Taicai and Qi, Lei and Shi, Yinghuan}, title = {Divide-and-Conquer for Enhancing Unlabeled Learning, Stability, and Plasticity in Semi-supervised Continual Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {583-593} }
All Parts Matter: A Unified Mask-Free Virtual Try-On Framework: Chenghu Du,

Shengwu Xiong,

Yi Rong; [pdf]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Chenghu and Xiong, Shengwu and Rong, Yi}, title = {All Parts Matter: A Unified Mask-Free Virtual Try-On Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19525-19534} }
CogNav: Cognitive Process Modeling for Object Goal Navigation with LLMs: Yihan Cao,

Jiazhao Zhang,

Zhinan Yu,

Shuzhen Liu,

Zheng Qin,

Qin Zou,

Bo Du,

Kai Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Yihan and Zhang, Jiazhao and Yu, Zhinan and Liu, Shuzhen and Qin, Zheng and Zou, Qin and Du, Bo and Xu, Kai}, title = {CogNav: Cognitive Process Modeling for Object Goal Navigation with LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9550-9560} }
Spatio-Spectral Pattern Illumination for Direct and Indirect Separation from a Single Hyperspectral Image: Shin Ishihara,

Imari Sato; [pdf] [supp]
[bibtex]
@InProceedings{Ishihara_2025_ICCV, author = {Ishihara, Shin and Sato, Imari}, title = {Spatio-Spectral Pattern Illumination for Direct and Indirect Separation from a Single Hyperspectral Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26827-26836} }
Explaining Human Preferences via Metrics for Structured 3D Reconstruction: Jack Langerman,

Denys Rozumnyi,

Yuzhong Huang,

Dmytro Mishkin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Langerman_2025_ICCV, author = {Langerman, Jack and Rozumnyi, Denys and Huang, Yuzhong and Mishkin, Dmytro}, title = {Explaining Human Preferences via Metrics for Structured 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26944-26953} }
LOCATEdit: Graph Laplacian Optimized Cross Attention for Localized Text-Guided Image Editing: Achint Soni,

Meet Soni,

Sirisha Rambhatla; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Soni_2025_ICCV, author = {Soni, Achint and Soni, Meet and Rambhatla, Sirisha}, title = {LOCATEdit: Graph Laplacian Optimized Cross Attention for Localized Text-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18804-18814} }
VistaDream: Sampling multiview consistent images for single-view scene reconstruction: Haiping Wang,

Yuan Liu,

Ziwei Liu,

Wenping Wang,

Zhen Dong,

Bisheng Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haiping and Liu, Yuan and Liu, Ziwei and Wang, Wenping and Dong, Zhen and Yang, Bisheng}, title = {VistaDream: Sampling multiview consistent images for single-view scene reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26772-26782} }
LangScene-X: Reconstruct Generalizable 3D Language-Embedded Scenes with TriMap Video Diffusion: Fangfu Liu,

Hao Li,

Jiawei Chi,

Hanyang Wang,

Minghui Yang,

Fudong Wang,

Yueqi Duan; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Fangfu and Li, Hao and Chi, Jiawei and Wang, Hanyang and Yang, Minghui and Wang, Fudong and Duan, Yueqi}, title = {LangScene-X: Reconstruct Generalizable 3D Language-Embedded Scenes with TriMap Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29010-29020} }
Orchid: Image Latent Diffusion for Joint Appearance and Geometry Generation: Akshay Krishnan,

Xinchen Yan,

Vincent Casser,

Abhijit Kundu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Krishnan_2025_ICCV, author = {Krishnan, Akshay and Yan, Xinchen and Casser, Vincent and Kundu, Abhijit}, title = {Orchid: Image Latent Diffusion for Joint Appearance and Geometry Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28217-28227} }
MonoMVSNet: Monocular Priors Guided Multi-View Stereo Network: Jianfei Jiang,

Qiankun Liu,

Haochen Yu,

Hongyuan Liu,

Liyong Wang,

Jiansheng Chen,

Huimin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Jianfei and Liu, Qiankun and Yu, Haochen and Liu, Hongyuan and Wang, Liyong and Chen, Jiansheng and Ma, Huimin}, title = {MonoMVSNet: Monocular Priors Guided Multi-View Stereo Network}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27806-27816} }
Top2Pano: Learning to Generate Indoor Panoramas from Top-Down View: Zitong Zhang,

Suranjan Gautam,

Rui Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zitong and Gautam, Suranjan and Yu, Rui}, title = {Top2Pano: Learning to Generate Indoor Panoramas from Top-Down View}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28493-28502} }
Highlight What You Want: Weakly-Supervised Instance-Level Controllable Infrared-Visible Image Fusion: Zeyu Wang,

Jizheng Zhang,

Haiyu Song,

Mingyu Ge,

Jiayu Wang,

Haoran Duan; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zeyu and Zhang, Jizheng and Song, Haiyu and Ge, Mingyu and Wang, Jiayu and Duan, Haoran}, title = {Highlight What You Want: Weakly-Supervised Instance-Level Controllable Infrared-Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12637-12647} }
Unbiased Missing-modality Multimodal Learning: Ruiting Dai,

Chenxi Li,

Yandong Yan,

Lisi Mo,

Ke Qin,

Tao He; [pdf]
[bibtex]
@InProceedings{Dai_2025_ICCV, author = {Dai, Ruiting and Li, Chenxi and Yan, Yandong and Mo, Lisi and Qin, Ke and He, Tao}, title = {Unbiased Missing-modality Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24507-24517} }
You Think, You ACT: The New Task of Arbitrary Text to Motion Generation: Runqi Wang,

Caoyuan Ma,

Guopeng Li,

Hanrui Xu,

Yuke Li,

Zheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Runqi and Ma, Caoyuan and Li, Guopeng and Xu, Hanrui and Li, Yuke and Wang, Zheng}, title = {You Think, You ACT: The New Task of Arbitrary Text to Motion Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12012-12022} }
Intermediate Connectors and Geometric Priors for Language-Guided Affordance Segmentation on Unseen Object Categories: Yicong Li,

Yiyang Chen,

Zhenyuan Ma,

Junbin Xiao,

Xiang Wang,

Angela Yao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yicong and Chen, Yiyang and Ma, Zhenyuan and Xiao, Junbin and Wang, Xiang and Yao, Angela}, title = {Intermediate Connectors and Geometric Priors for Language-Guided Affordance Segmentation on Unseen Object Categories}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22836-22845} }
FALCON: Resolving Visual Redundancy and Fragmentation in High-resolution Multimodal Large Language Models via Visual Registers: Renshan Zhang,

Rui Shao,

Gongwei Chen,

Miao Zhang,

Kaiwen Zhou,

Weili Guan,

Liqiang Nie; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Renshan and Shao, Rui and Chen, Gongwei and Zhang, Miao and Zhou, Kaiwen and Guan, Weili and Nie, Liqiang}, title = {FALCON: Resolving Visual Redundancy and Fragmentation in High-resolution Multimodal Large Language Models via Visual Registers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23530-23540} }
Verbalized Representation Learning for Interpretable Few-Shot Generalization: Cheng-Fu Yang,

Da Yin,

Wenbo Hu,

Heng Ji,

Nanyun Peng,

Bolei Zhou,

Kai-Wei Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Cheng-Fu and Yin, Da and Hu, Wenbo and Ji, Heng and Peng, Nanyun and Zhou, Bolei and Chang, Kai-Wei}, title = {Verbalized Representation Learning for Interpretable Few-Shot Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1602-1612} }
HERO: Human Reaction Generation from Videos: Chengjun Yu,

Wei Zhai,

Yuhang Yang,

Yang Cao,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Chengjun and Zhai, Wei and Yang, Yuhang and Cao, Yang and Zha, Zheng-Jun}, title = {HERO: Human Reaction Generation from Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10262-10274} }
Transformer-based Tooth Alignment Prediction with Occlusion and Collision Constraints: Zhenxing Dong,

Jiazhou Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Zhenxing and Chen, Jiazhou}, title = {Transformer-based Tooth Alignment Prediction with Occlusion and Collision Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25145-25154} }
Frequency-Guided Posterior Sampling for Diffusion-Based Image Restoration: Darshan Thaker,

Abhishek Goyal,

Rene Vidal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thaker_2025_ICCV, author = {Thaker, Darshan and Goyal, Abhishek and Vidal, Rene}, title = {Frequency-Guided Posterior Sampling for Diffusion-Based Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12873-12882} }
Beyond Walking: A Large-Scale Image-Text Benchmark for Text-based Person Anomaly Search: Shuyu Yang,

Yaxiong Wang,

Li Zhu,

Zhedong Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Shuyu and Wang, Yaxiong and Zhu, Li and Zheng, Zhedong}, title = {Beyond Walking: A Large-Scale Image-Text Benchmark for Text-based Person Anomaly Search}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11720-11730} }
GigaTok: Scaling Visual Tokenizers to 3 Billion Parameters for Autoregressive Image Generation: Tianwei Xiong,

Jun Hao Liew,

Zilong Huang,

Jiashi Feng,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2025_ICCV, author = {Xiong, Tianwei and Liew, Jun Hao and Huang, Zilong and Feng, Jiashi and Liu, Xihui}, title = {GigaTok: Scaling Visual Tokenizers to 3 Billion Parameters for Autoregressive Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18770-18780} }
Perspective-aware 3D Gaussian Inpainting with Multi-view Consistency: Yuxin Cheng,

Binxiao Huang,

Taiqiang Wu,

Wenyong Zhou,

Chenchen Ding,

Zhengwu Liu,

Graziano Chesi,

Ngai Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Yuxin and Huang, Binxiao and Wu, Taiqiang and Zhou, Wenyong and Ding, Chenchen and Liu, Zhengwu and Chesi, Graziano and Wong, Ngai}, title = {Perspective-aware 3D Gaussian Inpainting with Multi-view Consistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28503-28513} }
DistillDrive: End-to-End Multi-Mode Autonomous Driving Distillation by Isomorphic Hetero-Source Planning Model: Rui Yu,

Xianghang Zhang,

Runkai Zhao,

Huaicheng Yan,

Meng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Rui and Zhang, Xianghang and Zhao, Runkai and Yan, Huaicheng and Wang, Meng}, title = {DistillDrive: End-to-End Multi-Mode Autonomous Driving Distillation by Isomorphic Hetero-Source Planning Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26188-26197} }
TESPEC: Temporally-Enhanced Self-Supervised Pretraining for Event Cameras: Mohammad Mohammadi,

Ziyi Wu,

Igor Gilitschenski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mohammadi_2025_ICCV, author = {Mohammadi, Mohammad and Wu, Ziyi and Gilitschenski, Igor}, title = {TESPEC: Temporally-Enhanced Self-Supervised Pretraining for Event Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7782-7793} }
Wide2Long: Learning Lens Compression and Perspective Adjustment for Wide-Angle to Telephoto Translation: Soumyadipta Banerjee,

Jiaul H. Paik,

Debashis Sen; [pdf] [supp]
[bibtex]
@InProceedings{Banerjee_2025_ICCV, author = {Banerjee, Soumyadipta and Paik, Jiaul H. and Sen, Debashis}, title = {Wide2Long: Learning Lens Compression and Perspective Adjustment for Wide-Angle to Telephoto Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29001-29009} }
FullDiT: Video Generative Foundation Models with Multimodal Control via Full Attention: Xuan Ju,

Weicai Ye,

Quande Liu,

Qiulin Wang,

Xintao Wang,

Pengfei Wan,

Di Zhang,

Kun Gai,

Qiang Xu; [pdf]
[bibtex]
@InProceedings{Ju_2025_ICCV, author = {Ju, Xuan and Ye, Weicai and Liu, Quande and Wang, Qiulin and Wang, Xintao and Wan, Pengfei and Zhang, Di and Gai, Kun and Xu, Qiang}, title = {FullDiT: Video Generative Foundation Models with Multimodal Control via Full Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15737-15747} }
Feed-Forward SceneDINO for Unsupervised Semantic Scene Completion: Aleksandar Jevtić,

Christoph Reich,

Felix Wimbauer,

Oliver Hahn,

Christian Rupprecht,

Stefan Roth,

Daniel Cremers; [pdf] [supp]
[bibtex]
@InProceedings{Jevtic_2025_ICCV, author = {Jevti\'c, Aleksandar and Reich, Christoph and Wimbauer, Felix and Hahn, Oliver and Rupprecht, Christian and Roth, Stefan and Cremers, Daniel}, title = {Feed-Forward SceneDINO for Unsupervised Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6784-6796} }
V2XScenes: A Multiple Challenging Traffic Conditions Dataset for Large-Range Vehicle-Infrastructure Collaborative Perception: Bowen Wang,

Yafei Wang,

Wei Gong,

Siheng Chen,

Genjia Liu,

Minhao Xiong,

Chin Long Ng; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Bowen and Wang, Yafei and Gong, Wei and Chen, Siheng and Liu, Genjia and Xiong, Minhao and Ng, Chin Long}, title = {V2XScenes: A Multiple Challenging Traffic Conditions Dataset for Large-Range Vehicle-Infrastructure Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28385-28395} }
VideoAuteur: Towards Long Narrative Video Generation: Junfei Xiao,

Feng Cheng,

Lu Qi,

Liangke Gui,

Yang Zhao,

Shanchuan Lin,

Jiepeng Cen,

Zhibei Ma,

Alan Yuille,

Lu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_ICCV, author = {Xiao, Junfei and Cheng, Feng and Qi, Lu and Gui, Liangke and Zhao, Yang and Lin, Shanchuan and Cen, Jiepeng and Ma, Zhibei and Yuille, Alan and Jiang, Lu}, title = {VideoAuteur: Towards Long Narrative Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19163-19173} }
Is CLIP ideal? No. Can we fix it? Yes!: Raphi Kang,

Yue Song,

Georgia Gkioxari,

Pietro Perona; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_ICCV, author = {Kang, Raphi and Song, Yue and Gkioxari, Georgia and Perona, Pietro}, title = {Is CLIP ideal? No. Can we fix it? Yes!}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22436-22446} }
LaneDiffusion: Improving Centerline Graph Learning via Prior Injected BEV Feature Generation: Zijie Wang,

Weiming Zhang,

Wei Zhang,

Xiao Tan,

Hongxing Liu,

Yaowei Wang,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zijie and Zhang, Weiming and Zhang, Wei and Tan, Xiao and Liu, Hongxing and Wang, Yaowei and Li, Guanbin}, title = {LaneDiffusion: Improving Centerline Graph Learning via Prior Injected BEV Feature Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27052-27062} }
Federated Domain Generalization with Domain-specific Soft Prompts Generation: Jianhan Wu,

Xiaoyang Qu,

Zhangcheng Huang,

Jianzong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Jianhan and Qu, Xiaoyang and Huang, Zhangcheng and Wang, Jianzong}, title = {Federated Domain Generalization with Domain-specific Soft Prompts Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2366-2375} }
Multi-modal Identity Extraction: Ryan Webster,

Teddy Furon; [pdf] [supp]
[bibtex]
@InProceedings{Webster_2025_ICCV, author = {Webster, Ryan and Furon, Teddy}, title = {Multi-modal Identity Extraction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10797-10806} }
Height-Fidelity Dense Global Fusion for Multi-modal 3D Object Detection: Hanshi Wang,

Jin Gao,

Weiming Hu,

Zhipeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Hanshi and Gao, Jin and Hu, Weiming and Zhang, Zhipeng}, title = {Height-Fidelity Dense Global Fusion for Multi-modal 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26664-26674} }
DIH-CLIP: Unleashing the Diversity of Multi-Head Self-Attention for Training-Free Open-Vocabulary Semantic Segmentation: Songsong Duan,

Xi Yang,

Nannan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2025_ICCV, author = {Duan, Songsong and Yang, Xi and Wang, Nannan}, title = {DIH-CLIP: Unleashing the Diversity of Multi-Head Self-Attention for Training-Free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22794-22803} }
ExploreGS: Explorable 3D Scene Reconstruction with Virtual Camera Samplings and Diffusion Priors: Minsu Kim,

Subin Jeon,

In Cho,

Mijin Yoo,

Seon Joo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Minsu and Jeon, Subin and Cho, In and Yoo, Mijin and Kim, Seon Joo}, title = {ExploreGS: Explorable 3D Scene Reconstruction with Virtual Camera Samplings and Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27042-27051} }
Prototypes are Balanced Units for Efficient and Effective Partially Relevant Video Retrieval: WonJun Moon,

Cheol-Ho Cho,

Woojin Jun,

Taeoh Kim,

Inwoong Lee,

Dongyoon Wee,

Minho Shim,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2025_ICCV, author = {Moon, WonJun and Cho, Cheol-Ho and Jun, Woojin and Kim, Taeoh and Lee, Inwoong and Wee, Dongyoon and Shim, Minho and Heo, Jae-Pil}, title = {Prototypes are Balanced Units for Efficient and Effective Partially Relevant Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21789-21799} }
Diffusion Epistemic Uncertainty with Asymmetric Learning for Diffusion-Generated Image Detection: Yingsong Huang,

Hui Guo,

Jing Huang,

Bing Bai,

Qi Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Yingsong and Guo, Hui and Huang, Jing and Bai, Bing and Xiong, Qi}, title = {Diffusion Epistemic Uncertainty with Asymmetric Learning for Diffusion-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17097-17107} }
SpiLiFormer: Enhancing Spiking Transformers with Lateral Inhibition: Zeqi Zheng,

Yanchen Huang,

Yingchao Yu,

Zizheng Zhu,

Junfeng Tang,

Zhaofei Yu,

Yaochu Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Zeqi and Huang, Yanchen and Yu, Yingchao and Zhu, Zizheng and Tang, Junfeng and Yu, Zhaofei and Jin, Yaochu}, title = {SpiLiFormer: Enhancing Spiking Transformers with Lateral Inhibition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24539-24548} }
PS-Mamba: Spatial-Temporal Graph Mamba for Pose Sequence Refinement: Haoye Dong,

Gim Hee Lee; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Haoye and Lee, Gim Hee}, title = {PS-Mamba: Spatial-Temporal Graph Mamba for Pose Sequence Refinement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8568-8578} }
Model Reveals What to Cache: Profiling-Based Feature Reuse for Video Diffusion Models: Xuran Ma,

Yexin Liu,

Yaofu Liu,

Xianfeng Wu,

Mingzhe Zheng,

Zihao Wang,

Ser-Nam Lim,

Harry Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Xuran and Liu, Yexin and Liu, Yaofu and Wu, Xianfeng and Zheng, Mingzhe and Wang, Zihao and Lim, Ser-Nam and Yang, Harry}, title = {Model Reveals What to Cache: Profiling-Based Feature Reuse for Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17150-17159} }
IRASim: A Fine-Grained World Model for Robot Manipulation: Fangqi Zhu,

Hongtao Wu,

Song Guo,

Yuxiao Liu,

Chilam Cheang,

Tao Kong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Fangqi and Wu, Hongtao and Guo, Song and Liu, Yuxiao and Cheang, Chilam and Kong, Tao}, title = {IRASim: A Fine-Grained World Model for Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9834-9844} }
Diffusion-Based Imaginative Coordination for Bimanual Manipulation: Huilin Xu,

Jian Ding,

Jiakun Xu,

Ruixiang Wang,

Jun Chen,

Jinjie Mai,

Yanwei Fu,

Bernard Ghanem,

Feng Xu,

Mohamed Elhoseiny; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Huilin and Ding, Jian and Xu, Jiakun and Wang, Ruixiang and Chen, Jun and Mai, Jinjie and Fu, Yanwei and Ghanem, Bernard and Xu, Feng and Elhoseiny, Mohamed}, title = {Diffusion-Based Imaginative Coordination for Bimanual Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11469-11479} }
Seeing the Trees for the Forest: Rethinking Weakly-Supervised Medical Visual Grounding: Ta Duc Huy,

Duy Anh Huynh,

Yutong Xie,

Yuankai Qi,

Qi Chen,

Phi Le Nguyen,

Sen Kim Tran,

Son Lam Phung,

Anton van den Hengel,

Zhibin Liao,

Minh-Son To,

Johan W. Verjans,

Vu Minh Hieu Phan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huy_2025_ICCV, author = {Huy, Ta Duc and Huynh, Duy Anh and Xie, Yutong and Qi, Yuankai and Chen, Qi and Le Nguyen, Phi and Tran, Sen Kim and Phung, Son Lam and van den Hengel, Anton and Liao, Zhibin and To, Minh-Son and Verjans, Johan W. and Phan, Vu Minh Hieu}, title = {Seeing the Trees for the Forest: Rethinking Weakly-Supervised Medical Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24445-24455} }
Dataset Distillation as Data Compression: A Rate-Utility Perspective: Youneng Bao,

Yiping Liu,

Zhuo Chen,

Yongsheng Liang,

Mu Li,

Kede Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bao_2025_ICCV, author = {Bao, Youneng and Liu, Yiping and Chen, Zhuo and Liang, Yongsheng and Li, Mu and Ma, Kede}, title = {Dataset Distillation as Data Compression: A Rate-Utility Perspective}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {519-529} }
GroundingSuite: Measuring Complex Multi-Granular Pixel Grounding: Rui Hu,

Lianghui Zhu,

Yuxuan Zhang,

Tianheng Cheng,

Lei Liu,

Heng Liu,

Longjin Ran,

Xiaoxin Chen,

Wenyu Liu,

Xinggang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Rui and Zhu, Lianghui and Zhang, Yuxuan and Cheng, Tianheng and Liu, Lei and Liu, Heng and Ran, Longjin and Chen, Xiaoxin and Liu, Wenyu and Wang, Xinggang}, title = {GroundingSuite: Measuring Complex Multi-Granular Pixel Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23105-23114} }
Consensus-Driven Active Model Selection: Justin Kay,

Grant Van Horn,

Subhransu Maji,

Daniel Sheldon,

Sara Beery; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kay_2025_ICCV, author = {Kay, Justin and Van Horn, Grant and Maji, Subhransu and Sheldon, Daniel and Beery, Sara}, title = {Consensus-Driven Active Model Selection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4594-4604} }
Learning an Implicit Physics Model for Image-based Fluid Simulation: Emily Yue-Ting Jia,

Jiageng Mao,

Zhiyuan Gao,

Yajie Zhao,

Yue Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2025_ICCV, author = {Jia, Emily Yue-Ting and Mao, Jiageng and Gao, Zhiyuan and Zhao, Yajie and Wang, Yue}, title = {Learning an Implicit Physics Model for Image-based Fluid Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7048-7057} }
Task-Decoupled Bezier Surface Constraint for Uneven Low-Light Image Enhancement: Xingxiang Zhou,

Xiangdong Su,

Haoran Zhang,

Wei Chen,

Guanglai Gao; [pdf]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Xingxiang and Su, Xiangdong and Zhang, Haoran and Chen, Wei and Gao, Guanglai}, title = {Task-Decoupled Bezier Surface Constraint for Uneven Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6859-6868} }
VideoRFSplat: Direct Scene-Level Text-to-3D Gaussian Splatting Generation with Flexible Pose and Multi-View Joint Modeling: Hyojun Go,

Byeongjun Park,

Hyelin Nam,

Byung-Hoon Kim,

Hyungjin Chung,

Changick Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Go_2025_ICCV, author = {Go, Hyojun and Park, Byeongjun and Nam, Hyelin and Kim, Byung-Hoon and Chung, Hyungjin and Kim, Changick}, title = {VideoRFSplat: Direct Scene-Level Text-to-3D Gaussian Splatting Generation with Flexible Pose and Multi-View Joint Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26706-26717} }
PRISM: Reducing Spurious Implicit Biases in Vision-Language Models with LLM-Guided Embedding Projection: Mahdiyar Molahasani,

Azadeh Motamedi,

Michael Greenspan,

Il-Min Kim,

Ali Etemad; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Molahasani_2025_ICCV, author = {Molahasani, Mahdiyar and Motamedi, Azadeh and Greenspan, Michael and Kim, Il-Min and Etemad, Ali}, title = {PRISM: Reducing Spurious Implicit Biases in Vision-Language Models with LLM-Guided Embedding Projection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {688-697} }
Animate Anyone 2: High-Fidelity Character Image Animation with Environment Affordance: Li Hu,

Guangyuan Wang,

Zhen Shen,

Xin Gao,

Dechao Meng,

Lian Zhuo,

Peng Zhang,

Bang Zhang,

Liefeng Bo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Li and Wang, Guangyuan and Shen, Zhen and Gao, Xin and Meng, Dechao and Zhuo, Lian and Zhang, Peng and Zhang, Bang and Bo, Liefeng}, title = {Animate Anyone 2: High-Fidelity Character Image Animation with Environment Affordance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10207-10217} }
DATA: Domain-And-Time Alignment for High-Quality Feature Fusion in Collaborative Perception: Chengchang Tian,

Jianwei Ma,

Yan Huang,

Zhanye Chen,

Honghao Wei,

Hui Zhang,

Wei Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_ICCV, author = {Tian, Chengchang and Ma, Jianwei and Huang, Yan and Chen, Zhanye and Wei, Honghao and Zhang, Hui and Hong, Wei}, title = {DATA: Domain-And-Time Alignment for High-Quality Feature Fusion in Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28643-28652} }
LongAnimation: Long Animation Generation with Dynamic Global-Local Memory: Nan Chen,

Mengqi Huang,

Yihao Meng,

Zhendong Mao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Nan and Huang, Mengqi and Meng, Yihao and Mao, Zhendong}, title = {LongAnimation: Long Animation Generation with Dynamic Global-Local Memory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10032-10042} }
Is Tracking Really More Challenging in First Person Egocentric Vision?: Matteo Dunnhofer,

Zaira Manigrasso,

Christian Micheloni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dunnhofer_2025_ICCV, author = {Dunnhofer, Matteo and Manigrasso, Zaira and Micheloni, Christian}, title = {Is Tracking Really More Challenging in First Person Egocentric Vision?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5879-5889} }
Bringing RNNs Back to Efficient Open-Ended Video Understanding: Weili Xu,

Enxin Song,

Wenhao Chai,

Xuexiang Wen,

Tian Ye,

Gaoang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Weili and Song, Enxin and Chai, Wenhao and Wen, Xuexiang and Ye, Tian and Wang, Gaoang}, title = {Bringing RNNs Back to Efficient Open-Ended Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23453-23465} }
ViM-VQ: Efficient Post-Training Vector Quantization for Visual Mamba: Juncan Deng,

Shuaiting Li,

Zeyu Wang,

Kedong Xu,

Hong Gu,

Kejie Huang; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Juncan and Li, Shuaiting and Wang, Zeyu and Xu, Kedong and Gu, Hong and Huang, Kejie}, title = {ViM-VQ: Efficient Post-Training Vector Quantization for Visual Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24518-24527} }
TorchAdapt: Towards Light-Agnostic Real-Time Visual Perception: Khurram Azeem Hashmi,

Karthik Palyakere Suresh,

Didier Stricker,

Muhammad Zeshan Afzal; [pdf] [supp]
[bibtex]
@InProceedings{Hashmi_2025_ICCV, author = {Hashmi, Khurram Azeem and Suresh, Karthik Palyakere and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {TorchAdapt: Towards Light-Agnostic Real-Time Visual Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5645-5656} }
UniversalBooth: Model-Agnostic Personalized Text-to-Image Generation: Songhua Liu,

Ruonan Yu,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Songhua and Yu, Ruonan and Wang, Xinchao}, title = {UniversalBooth: Model-Agnostic Personalized Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18314-18324} }
EVEv2: Improved Baselines for Encoder-Free Vision-Language Models: Haiwen Diao,

Xiaotong Li,

Yufeng Cui,

Yueze Wang,

Haoge Deng,

Ting Pan,

Wenxuan Wang,

Huchuan Lu,

Xinlong Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Diao_2025_ICCV, author = {Diao, Haiwen and Li, Xiaotong and Cui, Yufeng and Wang, Yueze and Deng, Haoge and Pan, Ting and Wang, Wenxuan and Lu, Huchuan and Wang, Xinlong}, title = {EVEv2: Improved Baselines for Encoder-Free Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21014-21025} }
End-to-End Driving with Online Trajectory Evaluation via BEV World Model: Yingyan Li,

Yuqi Wang,

Yang Liu,

Jiawei He,

Lue Fan,

Zhaoxiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yingyan and Wang, Yuqi and Liu, Yang and He, Jiawei and Fan, Lue and Zhang, Zhaoxiang}, title = {End-to-End Driving with Online Trajectory Evaluation via BEV World Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27137-27146} }
EEGMirror: Leveraging EEG Data in the Wild via Montage-Agnostic Self-Supervision for EEG to Video Decoding: Xuan-Hao Liu,

Bao-Liang Lu,

Wei-Long Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Xuan-Hao and Lu, Bao-Liang and Zheng, Wei-Long}, title = {EEGMirror: Leveraging EEG Data in the Wild via Montage-Agnostic Self-Supervision for EEG to Video Decoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18273-18283} }
Edit360: 2D Image Edits to 3D Assets from Any Angle: Junchao Huang,

Xinting Hu,

Shaoshuai Shi,

Zhuotao Tian,

Li Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Junchao and Hu, Xinting and Shi, Shaoshuai and Tian, Zhuotao and Jiang, Li}, title = {Edit360: 2D Image Edits to 3D Assets from Any Angle}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16618-16628} }
VehicleMAE: View-asymmetry Mutual Learning for Vehicle Re-identification Pre-training via Masked AutoEncoders: Qi Wang,

Zeyu Zhang,

Dong Wang,

Di Gai,

Xin Xiong,

Jiyang Xu,

Ruihua Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Qi and Zhang, Zeyu and Wang, Dong and Gai, Di and Xiong, Xin and Xu, Jiyang and Zhou, Ruihua}, title = {VehicleMAE: View-asymmetry Mutual Learning for Vehicle Re-identification Pre-training via Masked AutoEncoders}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4701-4711} }
MiDSummer: Multi-Guidance Diffusion for Controllable Zero-Shot Immersive Gaussian Splatting Scene Generation: Anjun Hu,

Richard Tomsett,

Valentin Gourmet,

Massimo Camplani,

Jas Kandola,

Hanting Xie; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Anjun and Tomsett, Richard and Gourmet, Valentin and Camplani, Massimo and Kandola, Jas and Xie, Hanting}, title = {MiDSummer: Multi-Guidance Diffusion for Controllable Zero-Shot Immersive Gaussian Splatting Scene Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26793-26805} }
AV-Link: Temporally-Aligned Diffusion Features for Cross-Modal Audio-Video Generation: Moayed Haji-Ali,

Willi Menapace,

Aliaksandr Siarohin,

Ivan Skorokhodov,

Alper Canberk,

Kwot Sin Lee,

Vicente Ordonez,

Sergey Tulyakov; [pdf] [supp]
[bibtex]
@InProceedings{Haji-Ali_2025_ICCV, author = {Haji-Ali, Moayed and Menapace, Willi and Siarohin, Aliaksandr and Skorokhodov, Ivan and Canberk, Alper and Lee, Kwot Sin and Ordonez, Vicente and Tulyakov, Sergey}, title = {AV-Link: Temporally-Aligned Diffusion Features for Cross-Modal Audio-Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19373-19385} }
RANKCLIP: Ranking-Consistent Language-Image Pretraining: Yiming Zhang,

Zhuokai Zhao,

Zhaorun Chen,

Zhili Feng,

Zenghui Ding,

Yining Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yiming and Zhao, Zhuokai and Chen, Zhaorun and Feng, Zhili and Ding, Zenghui and Sun, Yining}, title = {RANKCLIP: Ranking-Consistent Language-Image Pretraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3874-3884} }
Is Visual in-Context Learning for Compositional Medical Tasks within Reach?: Simon Reiß,

Zdravko Marinov,

Alexander Jaus,

Constantin Seibold,

M. Saquib Sarfraz,

Erik Rodner,

Rainer Stiefelhagen; [pdf] [supp]
[bibtex]
@InProceedings{Reiss_2025_ICCV, author = {Rei{\ss}, Simon and Marinov, Zdravko and Jaus, Alexander and Seibold, Constantin and Sarfraz, M. Saquib and Rodner, Erik and Stiefelhagen, Rainer}, title = {Is Visual in-Context Learning for Compositional Medical Tasks within Reach?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2642-2652} }
Details Matter for Indoor Open-vocabulary 3D Instance Segmentation: Sanghun Jung,

Jingjing Zheng,

Ke Zhang,

Nan Qiao,

Albert Y. C. Chen,

Lu Xia,

Chi Liu,

Yuyin Sun,

Xiao Zeng,

Hsiang-Wei Huang,

Byron Boots,

Min Sun,

Cheng-Hao Kuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_ICCV, author = {Jung, Sanghun and Zheng, Jingjing and Zhang, Ke and Qiao, Nan and Chen, Albert Y. C. and Xia, Lu and Liu, Chi and Sun, Yuyin and Zeng, Xiao and Huang, Hsiang-Wei and Boots, Byron and Sun, Min and Kuo, Cheng-Hao}, title = {Details Matter for Indoor Open-vocabulary 3D Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9627-9637} }
Differentially Private Fine-Tuning of Diffusion Models: Yu-Lin Tsai,

Yizhe Li,

Chia-Mu Yu,

Xuebin Ren,

Po-Yu Chen,

Zekai Chen,

Francois Buet-Golfouse; [pdf] [arXiv]
[bibtex]
@InProceedings{Tsai_2025_ICCV, author = {Tsai, Yu-Lin and Li, Yizhe and Yu, Chia-Mu and Ren, Xuebin and Chen, Po-Yu and Chen, Zekai and Buet-Golfouse, Francois}, title = {Differentially Private Fine-Tuning of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4561-4571} }
Large Learning Rates Simultaneously Achieve Robustness to Spurious Correlations and Compressibility: Melih Barsbey,

Lucas Prieto,

Stefanos Zafeiriou,

Tolga Birdal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Barsbey_2025_ICCV, author = {Barsbey, Melih and Prieto, Lucas and Zafeiriou, Stefanos and Birdal, Tolga}, title = {Large Learning Rates Simultaneously Achieve Robustness to Spurious Correlations and Compressibility}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2055-2066} }
Global Regulation and Excitation via Attention Tuning for Stereo Matching: Jiahao Li,

Xinhong Chen,

Zhengmin Jiang,

Qian Zhou,

Yung-Hui Li,

Jianping Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jiahao and Chen, Xinhong and Jiang, Zhengmin and Zhou, Qian and Li, Yung-Hui and Wang, Jianping}, title = {Global Regulation and Excitation via Attention Tuning for Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25539-25549} }
TruthPrInt: Mitigating Large Vision-Language Models Object Hallucination Via Latent Truthful-Guided Pre-Intervention: Jinhao Duan,

Fei Kong,

Hao Cheng,

James Diffenderfer,

Bhavya Kailkhura,

Lichao Sun,

Xiaofeng Zhu,

Xiaoshuang Shi,

Kaidi Xu; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2025_ICCV, author = {Duan, Jinhao and Kong, Fei and Cheng, Hao and Diffenderfer, James and Kailkhura, Bhavya and Sun, Lichao and Zhu, Xiaofeng and Shi, Xiaoshuang and Xu, Kaidi}, title = {TruthPrInt: Mitigating Large Vision-Language Models Object Hallucination Via Latent Truthful-Guided Pre-Intervention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7372-7382} }
Customizing Domain Adapters for Domain Generalization: Yuyang Ji,

Zeyi Huang,

Haohan Wang,

Yong Jae Lee; [pdf] [supp]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Yuyang and Huang, Zeyi and Wang, Haohan and Lee, Yong Jae}, title = {Customizing Domain Adapters for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {934-944} }
ADCD-Net: Robust Document Image Forgery Localization via Adaptive DCT Feature and Hierarchical Content Disentanglement: Kahim Wong,

Jicheng Zhou,

Haiwei Wu,

Yain-Whar Si,

Jiantao Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wong_2025_ICCV, author = {Wong, Kahim and Zhou, Jicheng and Wu, Haiwei and Si, Yain-Whar and Zhou, Jiantao}, title = {ADCD-Net: Robust Document Image Forgery Localization via Adaptive DCT Feature and Hierarchical Content Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19280-19289} }
STaR: Seamless Spatial-Temporal Aware Motion Retargeting with Penetration and Consistency Constraints: Xiaohang Yang,

Qing Wang,

Jiahao Yang,

Gregory Slabaugh,

Shanxin Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Xiaohang and Wang, Qing and Yang, Jiahao and Slabaugh, Gregory and Yuan, Shanxin}, title = {STaR: Seamless Spatial-Temporal Aware Motion Retargeting with Penetration and Consistency Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12947-12955} }
Structured Policy Optimization: Enhance Large Vision-Language Model via Self-referenced Dialogue: Guohao Sun,

Can Qin,

Yihao Feng,

Zeyuan Chen,

Ran Xu,

Sohail Dianat,

Majid Rabbani,

Raghuveer Rao,

Zhiqiang Tao; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Guohao and Qin, Can and Feng, Yihao and Chen, Zeyuan and Xu, Ran and Dianat, Sohail and Rabbani, Majid and Rao, Raghuveer and Tao, Zhiqiang}, title = {Structured Policy Optimization: Enhance Large Vision-Language Model via Self-referenced Dialogue}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {741-751} }
UnrealZoo: Enriching Photo-realistic Virtual Worlds for Embodied AI: Fangwei Zhong,

Kui Wu,

Churan Wang,

Hao Chen,

Hai Ci,

Zhoujun Li,

Yizhou Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Fangwei and Wu, Kui and Wang, Churan and Chen, Hao and Ci, Hai and Li, Zhoujun and Wang, Yizhou}, title = {UnrealZoo: Enriching Photo-realistic Virtual Worlds for Embodied AI}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5769-5779} }
Visual Test-time Scaling for GUI Agent Grounding: Tiange Luo,

Lajanugen Logeswaran,

Justin Johnson,

Honglak Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Tiange and Logeswaran, Lajanugen and Johnson, Justin and Lee, Honglak}, title = {Visual Test-time Scaling for GUI Agent Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19989-19998} }
One-Step Specular Highlight Removal with Adapted Diffusion Models: Mahir Atmis,

Levent Karacan,

Mehmet Sarıgül; [pdf] [supp]
[bibtex]
@InProceedings{Atmis_2025_ICCV, author = {Atmis, Mahir and Karacan, Levent and Sar{\i}g\"ul, Mehmet}, title = {One-Step Specular Highlight Removal with Adapted Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16313-16322} }
GraspCoT: Integrating Physical Property Reasoning for 6-DoF Grasping under Flexible Language Instructions: Xiaomeng Chu,

Jiajun Deng,

Guoliang You,

Wei Liu,

Xingchen Li,

Jianmin Ji,

Yanyong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chu_2025_ICCV, author = {Chu, Xiaomeng and Deng, Jiajun and You, Guoliang and Liu, Wei and Li, Xingchen and Ji, Jianmin and Zhang, Yanyong}, title = {GraspCoT: Integrating Physical Property Reasoning for 6-DoF Grasping under Flexible Language Instructions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10130-10140} }
Deep Space Weather Model: Long-Range Solar Flare Prediction from Multi-Wavelength Images: Shunya Nagashima,

Komei Sugiura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nagashima_2025_ICCV, author = {Nagashima, Shunya and Sugiura, Komei}, title = {Deep Space Weather Model: Long-Range Solar Flare Prediction from Multi-Wavelength Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9396-9405} }
Hydra-NeXt: Robust Closed-Loop Driving with Open-Loop Training: Zhenxin Li,

Shihao Wang,

Shiyi Lan,

Zhiding Yu,

Zuxuan Wu,

Jose M. Alvarez; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhenxin and Wang, Shihao and Lan, Shiyi and Yu, Zhiding and Wu, Zuxuan and Alvarez, Jose M.}, title = {Hydra-NeXt: Robust Closed-Loop Driving with Open-Loop Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27305-27314} }
Zero-Shot Vision Encoder Grafting via LLM Surrogates: Kaiyu Yue,

Vasu Singla,

Menglin Jia,

John Kirchenbauer,

Rifaa Qadri,

Zikui Cai,

Abhinav Bhatele,

Furong Huang,

Tom Goldstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yue_2025_ICCV, author = {Yue, Kaiyu and Singla, Vasu and Jia, Menglin and Kirchenbauer, John and Qadri, Rifaa and Cai, Zikui and Bhatele, Abhinav and Huang, Furong and Goldstein, Tom}, title = {Zero-Shot Vision Encoder Grafting via LLM Surrogates}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4275-4284} }
Depth Any Event Stream: Enhancing Event-based Monocular Depth Estimation via Dense-to-Sparse Distillation: Jinjing Zhu,

Tianbo Pan,

Zidong Cao,

Yexin Liu,

James T. Kwok,

Hui Xiong; [pdf]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jinjing and Pan, Tianbo and Cao, Zidong and Liu, Yexin and Kwok, James T. and Xiong, Hui}, title = {Depth Any Event Stream: Enhancing Event-based Monocular Depth Estimation via Dense-to-Sparse Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5146-5155} }
Uncover Treasures in DCT: Advancing JPEG Quality Enhancement by Exploiting Latent Correlations: Jing Yang,

Qunliang Xing,

Mai Xu,

Minglang Qiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Jing and Xing, Qunliang and Xu, Mai and Qiao, Minglang}, title = {Uncover Treasures in DCT: Advancing JPEG Quality Enhancement by Exploiting Latent Correlations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17598-17607} }
Layer-wise Vision Injection with Disentangled Attention for Efficient LVLMs: Xuange Zhang,

Dengjie Li,

Bo Liu,

Zenghao Bao,

Yao Zhou,

Baisong Yang,

Zhongying Liu,

Yujie Zhong,

Tongtong Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xuange and Li, Dengjie and Liu, Bo and Bao, Zenghao and Zhou, Yao and Yang, Baisong and Liu, Zhongying and Zhong, Yujie and Yuan, Tongtong}, title = {Layer-wise Vision Injection with Disentangled Attention for Efficient LVLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7004-7013} }
Heatmap Regression without Soft-Argmax for Facial Landmark Detection: Chiao-An Yang,

Raymond A. Yeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Chiao-An and Yeh, Raymond A.}, title = {Heatmap Regression without Soft-Argmax for Facial Landmark Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28729-28739} }
ICE-Bench: A Unified and Comprehensive Benchmark for Image Creating and Editing: Yulin Pan,

Xiangteng He,

Chaojie Mao,

Zhen Han,

Zeyinzi Jiang,

Jingfeng Zhang,

Yu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2025_ICCV, author = {Pan, Yulin and He, Xiangteng and Mao, Chaojie and Han, Zhen and Jiang, Zeyinzi and Zhang, Jingfeng and Liu, Yu}, title = {ICE-Bench: A Unified and Comprehensive Benchmark for Image Creating and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16586-16596} }
Unified Adversarial Augmentation for Improving Palmprint Recognition: Jianlong Jin,

Chenglong Zhao,

Ruixin Zhang,

Sheng Shang,

Yang Zhao,

Jun Wang,

Jingyun Zhang,

Shouhong Ding,

Wei Jia,

Yunsheng Wu; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Jianlong and Zhao, Chenglong and Zhang, Ruixin and Shang, Sheng and Zhao, Yang and Wang, Jun and Zhang, Jingyun and Ding, Shouhong and Jia, Wei and Wu, Yunsheng}, title = {Unified Adversarial Augmentation for Improving Palmprint Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14141-14151} }
PlugMark: A Plug-in Zero-Watermarking Framework for Diffusion Models: Pengzhen Chen,

Yanwei Liu,

Xiaoyan Gu,

Enci Liu,

Zhuoyi Shang,

Xiangyang Ji,

Wu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Pengzhen and Liu, Yanwei and Gu, Xiaoyan and Liu, Enci and Shang, Zhuoyi and Ji, Xiangyang and Liu, Wu}, title = {PlugMark: A Plug-in Zero-Watermarking Framework for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17335-17345} }
FLSeg: Enhancing Privacy and Robustness in Federated Learning under Heterogeneous Data via Model Segmentation: Zichun Su,

Zhi Lu,

Yutong Wu,

Renfei Shen,

Songfeng Lu; [pdf] [supp]
[bibtex]
@InProceedings{Su_2025_ICCV, author = {Su, Zichun and Lu, Zhi and Wu, Yutong and Shen, Renfei and Lu, Songfeng}, title = {FLSeg: Enhancing Privacy and Robustness in Federated Learning under Heterogeneous Data via Model Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3916-3925} }
Partially Matching Submap Helps: Uncertainty Modeling and Propagation for Text to Point Cloud Localization: Mingtao Feng,

Longlong Mei,

Zijie Wu,

Jianqiao Luo,

Fenghao Tian,

Jie Feng,

Weisheng Dong,

Yaonan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Mingtao and Mei, Longlong and Wu, Zijie and Luo, Jianqiao and Tian, Fenghao and Feng, Jie and Dong, Weisheng and Wang, Yaonan}, title = {Partially Matching Submap Helps: Uncertainty Modeling and Propagation for Text to Point Cloud Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8296-8305} }
Human-Object Interaction from Human-Level Instructions: Zhen Wu,

Jiaman Li,

Pei Xu,

C. Karen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Zhen and Li, Jiaman and Xu, Pei and Liu, C. Karen}, title = {Human-Object Interaction from Human-Level Instructions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11176-11186} }
SAM2Long: Enhancing SAM 2 for Long Video Segmentation with a Training-Free Memory Tree: Shuangrui Ding,

Rui Qian,

Xiaoyi Dong,

Pan Zhang,

Yuhang Zang,

Yuhang Cao,

Yuwei Guo,

Dahua Lin,

Jiaqi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2025_ICCV, author = {Ding, Shuangrui and Qian, Rui and Dong, Xiaoyi and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and Guo, Yuwei and Lin, Dahua and Wang, Jiaqi}, title = {SAM2Long: Enhancing SAM 2 for Long Video Segmentation with a Training-Free Memory Tree}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13614-13624} }
Adapting Vehicle Detectors for Aerial Imagery to Unseen Domains with Weak Supervision: Xiao Fang,

Minhyek Jeon,

Zheyang Qin,

Stanislav Panev,

Celso De Melo,

Shuowen Hu,

Shayok Chakraborty,

Fernando De La Torre; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Xiao and Jeon, Minhyek and Qin, Zheyang and Panev, Stanislav and De Melo, Celso and Hu, Shuowen and Chakraborty, Shayok and De La Torre, Fernando}, title = {Adapting Vehicle Detectors for Aerial Imagery to Unseen Domains with Weak Supervision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8088-8099} }
Neural Shell Texture Splatting: More Details and Fewer Primitives: Xin Zhang,

Anpei Chen,

Jincheng Xiong,

Pinxuan Dai,

Yujun Shen,

Weiwei Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xin and Chen, Anpei and Xiong, Jincheng and Dai, Pinxuan and Shen, Yujun and Xu, Weiwei}, title = {Neural Shell Texture Splatting: More Details and Fewer Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25229-25238} }
3DSRBench: A Comprehensive 3D Spatial Reasoning Benchmark: Wufei Ma,

Haoyu Chen,

Guofeng Zhang,

Yu-Cheng Chou,

Jieneng Chen,

Celso de Melo,

Alan Yuille; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Wufei and Chen, Haoyu and Zhang, Guofeng and Chou, Yu-Cheng and Chen, Jieneng and de Melo, Celso and Yuille, Alan}, title = {3DSRBench: A Comprehensive 3D Spatial Reasoning Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6924-6934} }
Causality-guided Prompt Learning for Vision-language Models via Visual Granulation: Mengyu Gao,

Qiulei Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Mengyu and Dong, Qiulei}, title = {Causality-guided Prompt Learning for Vision-language Models via Visual Granulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1141-1151} }
Arti-PG: A Toolbox for Procedurally Synthesizing Large-Scale and Diverse Articulated Objects with Rich Annotations: Jianhua Sun,

Yuxuan Li,

Jiude Wei,

Longfei Xu,

Nange Wang,

Yining Zhang,

Cewu Lu; [pdf]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Jianhua and Li, Yuxuan and Wei, Jiude and Xu, Longfei and Wang, Nange and Zhang, Yining and Lu, Cewu}, title = {Arti-PG: A Toolbox for Procedurally Synthesizing Large-Scale and Diverse Articulated Objects with Rich Annotations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6396-6405} }
X2I: Seamless Integration of Multimodal Understanding into Diffusion Transformer via Attention Distillation: Jian Ma,

Qirong Peng,

Xu Guo,

Chen Chen,

Haonan Lu,

Zhenyu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Jian and Peng, Qirong and Guo, Xu and Chen, Chen and Lu, Haonan and Yang, Zhenyu}, title = {X2I: Seamless Integration of Multimodal Understanding into Diffusion Transformer via Attention Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16733-16744} }
One Last Attention for Your Vision-Language Model: Liang Chen,

Ghazi Shazan Ahmad,

Tianjun Yao,

Lingqiao Liu,

Zhiqiang Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Liang and Ahmad, Ghazi Shazan and Yao, Tianjun and Liu, Lingqiao and Shen, Zhiqiang}, title = {One Last Attention for Your Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1464-1473} }
MMCR: Benchmarking Cross-Source Reasoning in Scientific Papers: Yang Tian,

Zheng Lu,

Mingqi Gao,

Zheng Liu,

Bo Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_ICCV, author = {Tian, Yang and Lu, Zheng and Gao, Mingqi and Liu, Zheng and Zhao, Bo}, title = {MMCR: Benchmarking Cross-Source Reasoning in Scientific Papers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {488-497} }
ZeroKey: Point-Level Reasoning and Zero-Shot 3D Keypoint Detection from Large Language Models: Bingchen Gong,

Diego Gomez,

Abdullah Hamdi,

Abdelrahman Eldesokey,

Ahmed Abdelreheem,

Peter Wonka,

Maks Ovsjanikov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2025_ICCV, author = {Gong, Bingchen and Gomez, Diego and Hamdi, Abdullah and Eldesokey, Abdelrahman and Abdelreheem, Ahmed and Wonka, Peter and Ovsjanikov, Maks}, title = {ZeroKey: Point-Level Reasoning and Zero-Shot 3D Keypoint Detection from Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22089-22099} }
Audio-visual Controlled Video Diffusion with Masked Selective State Spaces Modeling for Natural Talking Head Generation: Fa-Ting Hong,

Zunnan Xu,

Zixiang Zhou,

Jun Zhou,

Xiu Li,

Qin Lin,

Qinglin Lu,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_ICCV, author = {Hong, Fa-Ting and Xu, Zunnan and Zhou, Zixiang and Zhou, Jun and Li, Xiu and Lin, Qin and Lu, Qinglin and Xu, Dan}, title = {Audio-visual Controlled Video Diffusion with Masked Selective State Spaces Modeling for Natural Talking Head Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12549-12558} }
DreamCube: RGB-D Panorama Generation via Multi-plane Synchronization: Yukun Huang,

Yanning Zhou,

Jianan Wang,

Kaiyi Huang,

Xihui Liu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Yukun and Zhou, Yanning and Wang, Jianan and Huang, Kaiyi and Liu, Xihui}, title = {DreamCube: RGB-D Panorama Generation via Multi-plane Synchronization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24922-24932} }
SILO: Solving Inverse Problems with Latent Operators: Ron Raphaeli,

Sean Man,

Michael Elad; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Raphaeli_2025_ICCV, author = {Raphaeli, Ron and Man, Sean and Elad, Michael}, title = {SILO: Solving Inverse Problems with Latent Operators}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10570-10580} }
Entropy-Adaptive Diffusion Policy Optimization with Dynamic Step Alignment: RenYe Yan,

Jikang Cheng,

Yaozhong Gan,

Shikun Sun,

You Wu,

Yunfan Yang,

Liang Ling,

Jinlong Lin,

Yeshuang Zhu,

Jie Zhou,

Jinchao Zhang,

Junliang Xing,

Yimao Cai,

Ru Huang; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, RenYe and Cheng, Jikang and Gan, Yaozhong and Sun, Shikun and Wu, You and Yang, Yunfan and Ling, Liang and Lin, Jinlong and Zhu, Yeshuang and Zhou, Jie and Zhang, Jinchao and Xing, Junliang and Cai, Yimao and Huang, Ru}, title = {Entropy-Adaptive Diffusion Policy Optimization with Dynamic Step Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1924-1934} }
ScanEdit: Hierarchically-Guided Functional 3D Scan Editing: Mohamed El Amine Boudjoghra,

Ivan Laptev,

Angela Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{El_Amine_Boudjoghra_2025_ICCV, author = {El Amine Boudjoghra, Mohamed and Laptev, Ivan and Dai, Angela}, title = {ScanEdit: Hierarchically-Guided Functional 3D Scan Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27105-27115} }
DeFSS: Image-to-Mask Denoising Learning for Few-shot Segmentation: Zishu Qin,

Junhao Xu,

Weifeng Ge; [pdf]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Zishu and Xu, Junhao and Ge, Weifeng}, title = {DeFSS: Image-to-Mask Denoising Learning for Few-shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22232-22240} }
Bokehlicious: Photorealistic Bokeh Rendering with Controllable Apertures: Tim Seizinger,

Florin-Alexandru Vasluianu,

Marcos V. Conde,

Zongwei Wu,

Radu Timofte; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seizinger_2025_ICCV, author = {Seizinger, Tim and Vasluianu, Florin-Alexandru and Conde, Marcos V. and Wu, Zongwei and Timofte, Radu}, title = {Bokehlicious: Photorealistic Bokeh Rendering with Controllable Apertures}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8908-8917} }
Turbo2K: Towards Ultra-Efficient and High-Quality 2K Video Synthesis: Jingjing Ren,

Wenbo Li,

Zhongdao Wang,

Haoze Sun,

Bangzhen Liu,

Haoyu Chen,

Jiaqi Xu,

Aoxue Li,

Shifeng Zhang,

Bin Shao,

Yong Guo,

Lei Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Jingjing and Li, Wenbo and Wang, Zhongdao and Sun, Haoze and Liu, Bangzhen and Chen, Haoyu and Xu, Jiaqi and Li, Aoxue and Zhang, Shifeng and Shao, Bin and Guo, Yong and Zhu, Lei}, title = {Turbo2K: Towards Ultra-Efficient and High-Quality 2K Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18155-18165} }
Video2BEV: Transforming Drone Videos to BEVs for Video-based Geo-localization: Hao Ju,

Shaofei Huang,

Si Liu,

Zhedong Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ju_2025_ICCV, author = {Ju, Hao and Huang, Shaofei and Liu, Si and Zheng, Zhedong}, title = {Video2BEV: Transforming Drone Videos to BEVs for Video-based Geo-localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27073-27083} }
Puppet-Master: Scaling Interactive Video Generation as a Motion Prior for Part-Level Dynamics: Ruining Li,

Chuanxia Zheng,

Christian Rupprecht,

Andrea Vedaldi; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Ruining and Zheng, Chuanxia and Rupprecht, Christian and Vedaldi, Andrea}, title = {Puppet-Master: Scaling Interactive Video Generation as a Motion Prior for Part-Level Dynamics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13405-13415} }
MMAT-1M: A Large Reasoning Dataset for Multimodal Agent Tuning: Tianhong Gao,

Yannian Fu,

Weiqun Wu,

Haixiao Yue,

Shanshan Liu,

Gang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Tianhong and Fu, Yannian and Wu, Weiqun and Yue, Haixiao and Liu, Shanshan and Zhang, Gang}, title = {MMAT-1M: A Large Reasoning Dataset for Multimodal Agent Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1484-1494} }
FlowChef: Steering of Rectified Flow Models for Controlled Generations: Maitreya Patel,

Song Wen,

Dimitris N. Metaxas,

Yezhou Yang; [pdf] [supp]
[bibtex]
@InProceedings{Patel_2025_ICCV, author = {Patel, Maitreya and Wen, Song and Metaxas, Dimitris N. and Yang, Yezhou}, title = {FlowChef: Steering of Rectified Flow Models for Controlled Generations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15308-15318} }
Teaching VLMs to Localize Specific Objects from In-context Examples: Sivan Doveh,

Nimrod Shabtay,

Eli Schwartz,

Hilde Kuehne,

Raja Giryes,

Rogerio Feris,

Leonid Karlinsky,

James Glass,

Assaf Arbelle,

Shimon Ullman,

M. Jehanzeb Mirza; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Doveh_2025_ICCV, author = {Doveh, Sivan and Shabtay, Nimrod and Schwartz, Eli and Kuehne, Hilde and Giryes, Raja and Feris, Rogerio and Karlinsky, Leonid and Glass, James and Arbelle, Assaf and Ullman, Shimon and Mirza, M. Jehanzeb}, title = {Teaching VLMs to Localize Specific Objects from In-context Examples}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9572-9582} }
A Hidden Stumbling Block in Generalized Category Discovery: Distracted Attention: Qiyu Xu,

Zhanxuan Hu,

Yu Duan,

Ercheng Pei,

Yonghang Tai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Qiyu and Hu, Zhanxuan and Duan, Yu and Pei, Ercheng and Tai, Yonghang}, title = {A Hidden Stumbling Block in Generalized Category Discovery: Distracted Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {405-414} }
High-Precision 3D Measurement of Complex Textured Surfaces Using Multiple Filtering Approach: Yuchong Chen,

Jian Yu,

Shaoyan Gai,

Zeyu Cai,

Feipeng Da; [pdf]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yuchong and Yu, Jian and Gai, Shaoyan and Cai, Zeyu and Da, Feipeng}, title = {High-Precision 3D Measurement of Complex Textured Surfaces Using Multiple Filtering Approach}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25670-25679} }
Efficient Unsupervised Shortcut Learning Detection and Mitigation in Transformers: Lukas Kuhn,

Sari Sadiya,

Jörg Schlötterer,

Florian Buettner,

Christin Seifert,

Gemma Roig; [pdf] [supp]
[bibtex]
@InProceedings{Kuhn_2025_ICCV, author = {Kuhn, Lukas and Sadiya, Sari and Schl\"otterer, J\"org and Buettner, Florian and Seifert, Christin and Roig, Gemma}, title = {Efficient Unsupervised Shortcut Learning Detection and Mitigation in Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2217-2226} }
Adaptive Caching for Faster Video Generation with Diffusion Transformers: Kumara Kahatapitiya,

Haozhe Liu,

Sen He,

Ding Liu,

Menglin Jia,

Chenyang Zhang,

Michael S. Ryoo,

Tian Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kahatapitiya_2025_ICCV, author = {Kahatapitiya, Kumara and Liu, Haozhe and He, Sen and Liu, Ding and Jia, Menglin and Zhang, Chenyang and Ryoo, Michael S. and Xie, Tian}, title = {Adaptive Caching for Faster Video Generation with Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15240-15252} }
RGE-GS: Reward-Guided Expansive Driving Scene Reconstruction via Diffusion Priors: Sicong Du,

Jiarun Liu,

Qifeng Chen,

Hao-Xiang Chen,

Tai-Jiang Mu,

Sheng Yang; [pdf] [supp]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Sicong and Liu, Jiarun and Chen, Qifeng and Chen, Hao-Xiang and Mu, Tai-Jiang and Yang, Sheng}, title = {RGE-GS: Reward-Guided Expansive Driving Scene Reconstruction via Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25756-25764} }
EgoMusic-driven Human Dance Motion Estimation with Skeleton Mamba: Quang Nguyen,

Nhat Le,

Baoru Huang,

Minh Nhat Vu,

Chengcheng Tang,

Van Nguyen,

Ngan Le,

Thieu Vo,

Anh Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Quang and Le, Nhat and Huang, Baoru and Vu, Minh Nhat and Tang, Chengcheng and Nguyen, Van and Le, Ngan and Vo, Thieu and Nguyen, Anh}, title = {EgoMusic-driven Human Dance Motion Estimation with Skeleton Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12023-12033} }
OmniCache: A Trajectory-Oriented Global Perspective on Training-Free Cache Reuse for Diffusion Transformer Models: Huanpeng Chu,

Wei Wu,

Guanyu Feng,

Yutao Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chu_2025_ICCV, author = {Chu, Huanpeng and Wu, Wei and Feng, Guanyu and Zhang, Yutao}, title = {OmniCache: A Trajectory-Oriented Global Perspective on Training-Free Cache Reuse for Diffusion Transformer Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16302-16312} }
VIPerson: Flexibly Generating Virtual Identity for Person Re-Identification: Xiao-Wen Zhang,

Delong Zhang,

Yi-Xing Peng,

Zhi Ouyang,

Jingke Meng,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiao-Wen and Zhang, Delong and Peng, Yi-Xing and Ouyang, Zhi and Meng, Jingke and Zheng, Wei-Shi}, title = {VIPerson: Flexibly Generating Virtual Identity for Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23374-23384} }
St4RTrack: Simultaneous 4D Reconstruction and Tracking in the World: Haiwen Feng,

Junyi Zhang,

Qianqian Wang,

Yufei Ye,

Pengcheng Yu,

Michael J. Black,

Trevor Darrell,

Angjoo Kanazawa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Haiwen and Zhang, Junyi and Wang, Qianqian and Ye, Yufei and Yu, Pengcheng and Black, Michael J. and Darrell, Trevor and Kanazawa, Angjoo}, title = {St4RTrack: Simultaneous 4D Reconstruction and Tracking in the World}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8503-8513} }
PRM: Photometric Stereo based Large Reconstruction Model: Wenhang Ge,

Jiantao Lin,

Guibao Shen,

Jiawei Feng,

Tao Hu,

Xinli Xu,

Ying-Cong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2025_ICCV, author = {Ge, Wenhang and Lin, Jiantao and Shen, Guibao and Feng, Jiawei and Hu, Tao and Xu, Xinli and Chen, Ying-Cong}, title = {PRM: Photometric Stereo based Large Reconstruction Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25009-25018} }
Exploiting Frequency Dynamics for Enhanced Multimodal Event-based Action Recognition: Meiqi Cao,

Xiangbo Shu,

Xin Jiang,

Rui Yan,

Yazhou Yao,

Jinhui Tang; [pdf]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Meiqi and Shu, Xiangbo and Jiang, Xin and Yan, Rui and Yao, Yazhou and Tang, Jinhui}, title = {Exploiting Frequency Dynamics for Enhanced Multimodal Event-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5969-5979} }
AerialVG: A Challenging Benchmark for Aerial Visual Grounding by Exploring Positional Relations: Junli Liu,

Qizhi Chen,

Zhigang Wang,

Yiwen Tang,

Yiting Zhang,

Chi Yan,

Dong Wang,

Xuelong Li,

Bin Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Junli and Chen, Qizhi and Wang, Zhigang and Tang, Yiwen and Zhang, Yiting and Yan, Chi and Wang, Dong and Li, Xuelong and Zhao, Bin}, title = {AerialVG: A Challenging Benchmark for Aerial Visual Grounding by Exploring Positional Relations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5177-5187} }
DriveArena: A Closed-loop Generative Simulation Platform for Autonomous Driving: Xuemeng Yang,

Licheng Wen,

Tiantian Wei,

Yukai Ma,

Jianbiao Mei,

Xin Li,

Wenjie Lei,

Daocheng Fu,

Pinlong Cai,

Min Dou,

Liang He,

Yong Liu,

Botian Shi,

Yu Qiao; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Xuemeng and Wen, Licheng and Wei, Tiantian and Ma, Yukai and Mei, Jianbiao and Li, Xin and Lei, Wenjie and Fu, Daocheng and Cai, Pinlong and Dou, Min and He, Liang and Liu, Yong and Shi, Botian and Qiao, Yu}, title = {DriveArena: A Closed-loop Generative Simulation Platform for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26933-26943} }
InvRGB+L: Inverse Rendering of Complex Scenes with Unified Color and LiDAR Reflectance Modeling: Xiaoxue Chen,

Bhargav Chandaka,

Chih-Hao Lin,

Ya-Qin Zhang,

David Forsyth,

Hao Zhao,

Shenlong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Xiaoxue and Chandaka, Bhargav and Lin, Chih-Hao and Zhang, Ya-Qin and Forsyth, David and Zhao, Hao and Wang, Shenlong}, title = {InvRGB+L: Inverse Rendering of Complex Scenes with Unified Color and LiDAR Reflectance Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27176-27186} }
DreamFuse: Adaptive Image Fusion with Diffusion Transformer: Junjia Huang,

Pengxiang Yan,

Jiyang Liu,

Jie Wu,

Zhao Wang,

Yitong Wang,

Liang Lin,

Guanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Junjia and Yan, Pengxiang and Liu, Jiyang and Wu, Jie and Wang, Zhao and Wang, Yitong and Lin, Liang and Li, Guanbin}, title = {DreamFuse: Adaptive Image Fusion with Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17292-17301} }
DALIP: Distribution Alignment-based Language-Image Pre-Training for Domain-Specific Data: Junjie Wu,

Jiangtao Xie,

Zhaolin Zhang,

Qilong Wang,

Qinghua Hu,

Peihua Li,

Sen Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Junjie and Xie, Jiangtao and Zhang, Zhaolin and Wang, Qilong and Hu, Qinghua and Li, Peihua and Xu, Sen}, title = {DALIP: Distribution Alignment-based Language-Image Pre-Training for Domain-Specific Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2099-2109} }
Joint Semantic and Rendering Enhancements in 3D Gaussian Modeling with Anisotropic Local Encoding: Jingming He,

Chongyi Li,

Shiqi Wang,

Sam Kwong; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Jingming and Li, Chongyi and Wang, Shiqi and Kwong, Sam}, title = {Joint Semantic and Rendering Enhancements in 3D Gaussian Modeling with Anisotropic Local Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28354-28363} }
Unveiling the Invisible: Reasoning Complex Occlusions Amodally with AURA: Zhixuan Li,

Hyunse Yoon,

Sanghoon Lee,

Weisi Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhixuan and Yoon, Hyunse and Lee, Sanghoon and Lin, Weisi}, title = {Unveiling the Invisible: Reasoning Complex Occlusions Amodally with AURA}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21927-21937} }
AGO: Adaptive Grounding for Open World 3D Occupancy Prediction: Peizheng Li,

Shuxiao Ding,

You Zhou,

Qingwen Zhang,

Onat Inak,

Larissa Triess,

Niklas Hanselmann,

Marius Cordts,

Andreas Zell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Peizheng and Ding, Shuxiao and Zhou, You and Zhang, Qingwen and Inak, Onat and Triess, Larissa and Hanselmann, Niklas and Cordts, Marius and Zell, Andreas}, title = {AGO: Adaptive Grounding for Open World 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8645-8655} }
Free2Guide: Training-Free Text-to-Video Alignment using Image LVLM: Jaemin Kim,

Bryan Sangwoo Kim,

Jong Chul Ye; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jaemin and Kim, Bryan Sangwoo and Ye, Jong Chul}, title = {Free2Guide: Training-Free Text-to-Video Alignment using Image LVLM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17920-17929} }
T2I-Copilot: A Training-Free Multi-Agent Text-to-Image System for Enhanced Prompt Interpretation and Interactive Generation: Chieh-Yun Chen,

Min Shi,

Gong Zhang,

Humphrey Shi; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Chieh-Yun and Shi, Min and Zhang, Gong and Shi, Humphrey}, title = {T2I-Copilot: A Training-Free Multi-Agent Text-to-Image System for Enhanced Prompt Interpretation and Interactive Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19396-19405} }
Lumina-Image 2.0: A Unified and Efficient Image Generative Framework: Qi Qin,

Le Zhuo,

Yi Xin,

Ruoyi Du,

Zhen Li,

Bin Fu,

Yiting Lu,

Xinyue Li,

Dongyang Liu,

Xiangyang Zhu,

Will Beddow,

Erwann Millon,

Victor Perez,

Wenhai Wang,

Yu Qiao,

Bo Zhang,

Xiaohong Liu,

Hongsheng Li,

Chang Xu,

Peng Gao; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Qi and Zhuo, Le and Xin, Yi and Du, Ruoyi and Li, Zhen and Fu, Bin and Lu, Yiting and Li, Xinyue and Liu, Dongyang and Zhu, Xiangyang and Beddow, Will and Millon, Erwann and Perez, Victor and Wang, Wenhai and Qiao, Yu and Zhang, Bo and Liu, Xiaohong and Li, Hongsheng and Xu, Chang and Gao, Peng}, title = {Lumina-Image 2.0: A Unified and Efficient Image Generative Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20031-20042} }
mmCooper: A Multi-agent Multi-stage Communication-efficient and Collaboration-robust Cooperative Perception Framework: Bingyi Liu,

Jian Teng,

Hongfei Xue,

Enshu Wang,

Chuanhui Zhu,

Pu Wang,

Libing Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Bingyi and Teng, Jian and Xue, Hongfei and Wang, Enshu and Zhu, Chuanhui and Wang, Pu and Wu, Libing}, title = {mmCooper: A Multi-agent Multi-stage Communication-efficient and Collaboration-robust Cooperative Perception Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28396-28406} }
LightBSR: Towards Lightweight Blind Super-Resolution via Discriminative Implicit Degradation Representation Learning: Jiang Yuan,

Ji Ma,

Bo Wang,

Guanzhou Ke,

Weiming Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Jiang and Ma, Ji and Wang, Bo and Ke, Guanzhou and Hu, Weiming}, title = {LightBSR: Towards Lightweight Blind Super-Resolution via Discriminative Implicit Degradation Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11927-11936} }
Harnessing Uncertainty-aware Bounding Boxes for Unsupervised 3D Object Detection: Ruiyang Zhang,

Hu Zhang,

Zhedong Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Ruiyang and Zhang, Hu and Zheng, Zhedong}, title = {Harnessing Uncertainty-aware Bounding Boxes for Unsupervised 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9230-9240} }
OCSplats: Observation Completeness Quantification and Label Noise Separation in 3DGS: Han Ling,

Xian Xu,

Yinghui Sun,

Quansen Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ling_2025_ICCV, author = {Ling, Han and Xu, Xian and Sun, Yinghui and Sun, Quansen}, title = {OCSplats: Observation Completeness Quantification and Label Noise Separation in 3DGS}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25680-25689} }
One Encoder to Rule them All: Representation Learning for Model-free Visual Reinforcement Learning using Fourier Neural Operators: Parag Dutta,

Mohd Ayyoob,

Shalabh Bhatnagar,

Ambedkar Dukkipati; [pdf]
[bibtex]
@InProceedings{Dutta_2025_ICCV, author = {Dutta, Parag and Ayyoob, Mohd and Bhatnagar, Shalabh and Dukkipati, Ambedkar}, title = {One Encoder to Rule them All: Representation Learning for Model-free Visual Reinforcement Learning using Fourier Neural Operators}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4818-4827} }
LOMM: Latest Object Memory Management for Temporally Consistent Video Instance Segmentation: Seunghun Lee,

Jiwan Seo,

Minwoo Choi,

Kiljoon Han,

Jahoon Jeong,

Zane Durante,

Ehsan Adeli,

Sang Hyun Park,

Sunghoon Im; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Seunghun and Seo, Jiwan and Choi, Minwoo and Han, Kiljoon and Jeong, Jahoon and Durante, Zane and Adeli, Ehsan and Park, Sang Hyun and Im, Sunghoon}, title = {LOMM: Latest Object Memory Management for Temporally Consistent Video Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13719-13729} }
Open-Vocabulary HOI Detection with Interaction-aware Prompt and Concept Calibration: Ting Lei,

Shaofeng Yin,

Qingchao Chen,

Yuxin Peng,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2025_ICCV, author = {Lei, Ting and Yin, Shaofeng and Chen, Qingchao and Peng, Yuxin and Liu, Yang}, title = {Open-Vocabulary HOI Detection with Interaction-aware Prompt and Concept Calibration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23945-23957} }
DuCos: Duality Constrained Depth Super-Resolution via Foundation Model: Zhiqiang Yan,

Zhengxue Wang,

Haoye Dong,

Jun Li,

Jian Yang,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Zhiqiang and Wang, Zhengxue and Dong, Haoye and Li, Jun and Yang, Jian and Lee, Gim Hee}, title = {DuCos: Duality Constrained Depth Super-Resolution via Foundation Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8361-8371} }
ROVI: A VLM-LLM Re-Captioned Dataset for Open-Vocabulary Instance-Grounded Text-to-Image Generation: Cihang Peng,

Qiming Hou,

Zhong Ren,

Kun Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_ICCV, author = {Peng, Cihang and Hou, Qiming and Ren, Zhong and Zhou, Kun}, title = {ROVI: A VLM-LLM Re-Captioned Dataset for Open-Vocabulary Instance-Grounded Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20204-20214} }
PBFG: A New Physically-Based Dataset and Removal of Lens Flares and Glares: Jie Zhu,

Sungkil Lee; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jie and Lee, Sungkil}, title = {PBFG: A New Physically-Based Dataset and Removal of Lens Flares and Glares}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5448-5457} }
Active Perception Meets Rule-Guided RL: A Two-Phase Approach for Precise Object Navigation in Complex Environments: Liang Qin,

Min Wang,

Peiwei Li,

Wengang Zhou,

Houqiang Li; [pdf]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Liang and Wang, Min and Li, Peiwei and Zhou, Wengang and Li, Houqiang}, title = {Active Perception Meets Rule-Guided RL: A Two-Phase Approach for Precise Object Navigation in Complex Environments}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7603-7612} }
CAT: A Unified Click-and-Track Framework for Realistic Tracking: Yongsheng Yuan,

Jie Zhao,

Dong Wang,

Huchuan Lu; [pdf]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Yongsheng and Zhao, Jie and Wang, Dong and Lu, Huchuan}, title = {CAT: A Unified Click-and-Track Framework for Realistic Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5690-5700} }
Differential-informed Sample Selection Accelerates Multimodal Contrastive Learning: Zihua Zhao,

Feng Hong,

Mengxi Chen,

Pengyi Chen,

Benyuan Liu,

Jiangchao Yao,

Ya Zhang,

Yanfeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Zihua and Hong, Feng and Chen, Mengxi and Chen, Pengyi and Liu, Benyuan and Yao, Jiangchao and Zhang, Ya and Wang, Yanfeng}, title = {Differential-informed Sample Selection Accelerates Multimodal Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2930-2940} }
A View-consistent Sampling Method for Regularized Training of Neural Radiance Fields: Aoxiang Fan,

Corentin Dumery,

Nicolas Talabot,

Pascal Fua; [pdf] [arXiv]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Aoxiang and Dumery, Corentin and Talabot, Nicolas and Fua, Pascal}, title = {A View-consistent Sampling Method for Regularized Training of Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25961-25971} }
LUT-Fuse: Towards Extremely Fast Infrared and Visible Image Fusion via Distillation to Learnable Look-Up Tables: Xunpeng Yi,

Yibing Zhang,

Xinyu Xiang,

Qinglong Yan,

Han Xu,

Jiayi Ma; [pdf] [supp]
[bibtex]
@InProceedings{Yi_2025_ICCV, author = {Yi, Xunpeng and Zhang, Yibing and Xiang, Xinyu and Yan, Qinglong and Xu, Han and Ma, Jiayi}, title = {LUT-Fuse: Towards Extremely Fast Infrared and Visible Image Fusion via Distillation to Learnable Look-Up Tables}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14559-14568} }
AdsQA: Towards Advertisement Video Understanding: Xinwei Long,

Kai Tian,

Peng Xu,

Guoli Jia,

Jingxuan Li,

Sa Yang,

Yihua Shao,

Kaiyan Zhang,

Che Jiang,

Hao Xu,

Yang Liu,

Jiaheng Ma,

Bowen Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Long_2025_ICCV, author = {Long, Xinwei and Tian, Kai and Xu, Peng and Jia, Guoli and Li, Jingxuan and Yang, Sa and Shao, Yihua and Zhang, Kaiyan and Jiang, Che and Xu, Hao and Liu, Yang and Ma, Jiaheng and Zhou, Bowen}, title = {AdsQA: Towards Advertisement Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23396-23407} }
Fine-grained Abnormality Prompt Learning for Zero-shot Anomaly Detection: Jiawen Zhu,

Yew-Soon Ong,

Chunhua Shen,

Guansong Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jiawen and Ong, Yew-Soon and Shen, Chunhua and Pang, Guansong}, title = {Fine-grained Abnormality Prompt Learning for Zero-shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22241-22251} }
Learning Dense Feature Matching via Lifting Single 2D Image to 3D Space: Yingping Liang,

Yutao Hu,

Wenqi Shao,

Ying Fu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Yingping and Hu, Yutao and Shao, Wenqi and Fu, Ying}, title = {Learning Dense Feature Matching via Lifting Single 2D Image to 3D Space}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6621-6631} }
DNF-Intrinsic: Deterministic Noise-Free Diffusion for Indoor Inverse Rendering: Rongjia Zheng,

Qing Zhang,

Chengjiang Long,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Rongjia and Zhang, Qing and Long, Chengjiang and Zheng, Wei-Shi}, title = {DNF-Intrinsic: Deterministic Noise-Free Diffusion for Indoor Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10342-10352} }
Unified Open-World Segmentation with Multi-Modal Prompts: Yang Liu,

Yufei Yin,

Chenchen Jing,

Muzhi Zhu,

Hao Chen,

Yuling Xi,

Bo Feng,

Hao Wang,

Shiyu Li,

Chunhua Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yang and Yin, Yufei and Jing, Chenchen and Zhu, Muzhi and Chen, Hao and Xi, Yuling and Feng, Bo and Wang, Hao and Li, Shiyu and Shen, Chunhua}, title = {Unified Open-World Segmentation with Multi-Modal Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21557-21567} }
GausSim: Foreseeing Reality by Gaussian Simulator for Elastic Objects: Yidi Shao,

Mu Huang,

Chen Change Loy,

Bo Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2025_ICCV, author = {Shao, Yidi and Huang, Mu and Loy, Chen Change and Dai, Bo}, title = {GausSim: Foreseeing Reality by Gaussian Simulator for Elastic Objects}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7841-7850} }
ClaraVid: A Holistic Scene Reconstruction Benchmark From Aerial Perspective With Delentropy-Based Complexity Profiling: Radu Beche,

Sergiu Nedevschi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Beche_2025_ICCV, author = {Beche, Radu and Nedevschi, Sergiu}, title = {ClaraVid: A Holistic Scene Reconstruction Benchmark From Aerial Perspective With Delentropy-Based Complexity Profiling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26015-26025} }
TextMaster: A Unified Framework for Realistic Text Editing via Glyph-Style Dual-Control: Zhenyu Yan,

Jian Wang,

Aoqiang Wang,

Yuhan Li,

Wenxiang Shang,

Zhu Hangcheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Zhenyu and Wang, Jian and Wang, Aoqiang and Li, Yuhan and Shang, Wenxiang and Hangcheng, Zhu}, title = {TextMaster: A Unified Framework for Realistic Text Editing via Glyph-Style Dual-Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16112-16121} }
X2-Gaussian: 4D Radiative Gaussian Splatting for Continuous-time Tomographic Reconstruction: Weihao Yu,

Yuanhao Cai,

Ruyi Zha,

Zhiwen Fan,

Chenxin Li,

Yixuan Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Weihao and Cai, Yuanhao and Zha, Ruyi and Fan, Zhiwen and Li, Chenxin and Yuan, Yixuan}, title = {X2-Gaussian: 4D Radiative Gaussian Splatting for Continuous-time Tomographic Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24728-24738} }
FrameFusion: Combining Similarity and Importance for Video Token Reduction on Large Vision Language Models: Tianyu Fu,

Tengxuan Liu,

Qinghao Han,

Guohao Dai,

Shengen Yan,

Huazhong Yang,

Xuefei Ning,

Yu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Tianyu and Liu, Tengxuan and Han, Qinghao and Dai, Guohao and Yan, Shengen and Yang, Huazhong and Ning, Xuefei and Wang, Yu}, title = {FrameFusion: Combining Similarity and Importance for Video Token Reduction on Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22654-22663} }
AURELIA: Test-time Reasoning Distillation in Audio-Visual LLMs: Sanjoy Chowdhury,

Hanan Gani,

Nishit Anand,

Sayan Nag,

Ruohan Gao,

Mohamed Elhoseiny,

Salman Khan,

Dinesh Manocha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chowdhury_2025_ICCV, author = {Chowdhury, Sanjoy and Gani, Hanan and Anand, Nishit and Nag, Sayan and Gao, Ruohan and Elhoseiny, Mohamed and Khan, Salman and Manocha, Dinesh}, title = {AURELIA: Test-time Reasoning Distillation in Audio-Visual LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22899-22910} }
Unraveling the Smoothness Properties of Diffusion Models: A Gaussian Mixture Perspective: Yingyu Liang,

Zhizhou Sha,

Zhenmei Shi,

Zhao Song,

Mingda Wan,

Yufa Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Yingyu and Sha, Zhizhou and Shi, Zhenmei and Song, Zhao and Wan, Mingda and Zhou, Yufa}, title = {Unraveling the Smoothness Properties of Diffusion Models: A Gaussian Mixture Perspective}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11436-11446} }
FB-Diff: Fourier Basis-guided Diffusion for Temporal Interpolation of 4D Medical Imaging: Xin You,

Runze Yang,

Chuyan Zhang,

Zhongliang Jiang,

Jie Yang,

Nassir Navab; [pdf] [supp]
[bibtex]
@InProceedings{You_2025_ICCV, author = {You, Xin and Yang, Runze and Zhang, Chuyan and Jiang, Zhongliang and Yang, Jie and Navab, Nassir}, title = {FB-Diff: Fourier Basis-guided Diffusion for Temporal Interpolation of 4D Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28010-28020} }
UNIS: A Unified Framework for Achieving Unbiased Neural Implicit Surfaces in Volume Rendering: Junkai Deng,

Hanting Niu,

Jiaze Li,

Fei Hou,

Ying He; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Junkai and Niu, Hanting and Li, Jiaze and Hou, Fei and He, Ying}, title = {UNIS: A Unified Framework for Achieving Unbiased Neural Implicit Surfaces in Volume Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27671-27680} }
Scendi Score: Prompt-Aware Diversity Evaluation via Schur Complement of CLIP Embeddings: Azim Ospanov,

Mohammad Jalali,

Farzan Farnia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ospanov_2025_ICCV, author = {Ospanov, Azim and Jalali, Mohammad and Farnia, Farzan}, title = {Scendi Score: Prompt-Aware Diversity Evaluation via Schur Complement of CLIP Embeddings}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16927-16937} }
Seeing the Unseen: A Semantic Alignment and Context-Aware Prompt Framework for Open-Vocabulary Camouflaged Object Segmentation: Peng Ren,

Tian Bai,

Jing Sun,

Fuming Sun; [pdf]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Peng and Bai, Tian and Sun, Jing and Sun, Fuming}, title = {Seeing the Unseen: A Semantic Alignment and Context-Aware Prompt Framework for Open-Vocabulary Camouflaged Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23657-23666} }
HOLa: Zero-Shot HOI Detection with Low-Rank Decomposed VLM Feature Adaptation: Qinqian Lei,

Bo Wang,

Robby T. Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2025_ICCV, author = {Lei, Qinqian and Wang, Bo and Tan, Robby T.}, title = {HOLa: Zero-Shot HOI Detection with Low-Rank Decomposed VLM Feature Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1825-1835} }
Context-Aware Academic Emotion Dataset and Benchmark: Luming Zhao,

Jingwen Xuan,

Jiamin Lou,

Yonghui Yu,

Wenwu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Luming and Xuan, Jingwen and Lou, Jiamin and Yu, Yonghui and Yang, Wenwu}, title = {Context-Aware Academic Emotion Dataset and Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13859-13868} }
Contrastive Test-Time Composition of Multiple LoRA Models for Image Generation: Tuna Han Salih Meral,

Enis Simsar,

Federico Tombari,

Pinar Yanardag; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meral_2025_ICCV, author = {Meral, Tuna Han Salih and Simsar, Enis and Tombari, Federico and Yanardag, Pinar}, title = {Contrastive Test-Time Composition of Multiple LoRA Models for Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18090-18100} }
DiffRefine: Diffusion-based Proposal Specific Point Cloud Densification for Cross-Domain Object Detection: Sangyun Shin,

Yuhang He,

Xinyu Hou,

Samuel Hodgson,

Andrew Markham,

Niki Trigoni; [pdf]
[bibtex]
@InProceedings{Shin_2025_ICCV, author = {Shin, Sangyun and He, Yuhang and Hou, Xinyu and Hodgson, Samuel and Markham, Andrew and Trigoni, Niki}, title = {DiffRefine: Diffusion-based Proposal Specific Point Cloud Densification for Cross-Domain Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4888-4897} }
Seeing Through Deepfakes: A Human-Inspired Framework for Multi-Face Detection: Juan Hu,

Shaojing Fan,

Terence Sim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Juan and Fan, Shaojing and Sim, Terence}, title = {Seeing Through Deepfakes: A Human-Inspired Framework for Multi-Face Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14517-14527} }
Test-time Adaptation for Foundation Medical Segmentation Model Without Parametric Updates: Kecheng Chen,

Xinyu Luo,

Tiexin Qin,

Jie Liu,

Hui Liu,

Victor Ho Fun Lee,

Hong Yan,

Haoliang Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Kecheng and Luo, Xinyu and Qin, Tiexin and Liu, Jie and Liu, Hui and Lee, Victor Ho Fun and Yan, Hong and Li, Haoliang}, title = {Test-time Adaptation for Foundation Medical Segmentation Model Without Parametric Updates}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20075-20084} }
Hierarchy UGP: Hierarchy Unified Gaussian Primitive for Large-Scale Dynamic Scene Reconstruction: Hongyang Sun,

Qinglin Yang,

Jiawei Wang,

Zhen Xu,

Chen Liu,

Yida Wang,

Kun Zhan,

Hujun Bao,

Xiaowei Zhou,

Sida Peng; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Hongyang and Yang, Qinglin and Wang, Jiawei and Xu, Zhen and Liu, Chen and Wang, Yida and Zhan, Kun and Bao, Hujun and Zhou, Xiaowei and Peng, Sida}, title = {Hierarchy UGP: Hierarchy Unified Gaussian Primitive for Large-Scale Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26252-26262} }
SMSTracker: Tri-path Score Mask Sigma Fusion for Multi-Modal Tracking: Sixian Chan,

Zedong Li,

Wenhao Li,

Shijian Lu,

Chunhua Shen,

Xiaoqin Zhang; [pdf]
[bibtex]
@InProceedings{Chan_2025_ICCV, author = {Chan, Sixian and Li, Zedong and Li, Wenhao and Lu, Shijian and Shen, Chunhua and Zhang, Xiaoqin}, title = {SMSTracker: Tri-path Score Mask Sigma Fusion for Multi-Modal Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4766-4775} }
Distilling Parallel Gradients for Fast ODE Solvers of Diffusion Models: Beier Zhu,

Ruoyu Wang,

Tong Zhao,

Hanwang Zhang,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Beier and Wang, Ruoyu and Zhao, Tong and Zhang, Hanwang and Zhang, Chi}, title = {Distilling Parallel Gradients for Fast ODE Solvers of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19557-19566} }
Aligning Constraint Generation with Design Intent in Parametric CAD: Evan Casey,

Tianyu Zhang,

Shu Ishida,

John Roger Thompson,

Amir Khasahmadi,

Joseph George Lambourne,

Pradeep Kumar Jayaraman,

Karl D.D. Willis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Casey_2025_ICCV, author = {Casey, Evan and Zhang, Tianyu and Ishida, Shu and Thompson, John Roger and Khasahmadi, Amir and Lambourne, Joseph George and Jayaraman, Pradeep Kumar and Willis, Karl D.D.}, title = {Aligning Constraint Generation with Design Intent in Parametric CAD}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8613-8622} }
DisCo: Towards Distinct and Coherent Visual Encapsulation in Video MLLMs: Jiahe Zhao,

Rongkun Zheng,

Yi Wang,

Helin Wang,

Hengshuang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Jiahe and Zheng, Rongkun and Wang, Yi and Wang, Helin and Zhao, Hengshuang}, title = {DisCo: Towards Distinct and Coherent Visual Encapsulation in Video MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21710-21720} }
PUMA: Empowering Unified MLLM with Multi-granular Visual Generation: Rongyao Fang,

Chengqi Duan,

Kun Wang,

Hao Li,

Linjiang Huang,

Hao Tian,

Xingyu Zeng,

Rui Zhao,

Jifeng Dai,

Hongsheng Li,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Rongyao and Duan, Chengqi and Wang, Kun and Li, Hao and Huang, Linjiang and Tian, Hao and Zeng, Xingyu and Zhao, Rui and Dai, Jifeng and Li, Hongsheng and Liu, Xihui}, title = {PUMA: Empowering Unified MLLM with Multi-granular Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15447-15457} }
Axis-level Symmetry Detection with Group-Equivariant Representation: Wongyun Yu,

Ahyun Seo,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Wongyun and Seo, Ahyun and Cho, Minsu}, title = {Axis-level Symmetry Detection with Group-Equivariant Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24791-24800} }
SMP-Attack: Boosting the Transferability of Feature Importance-based Adversarial Attack with Semantics-aware Multi-granularity Patchout: Wen Yang,

Guodong Liu,

Di Ming; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Wen and Liu, Guodong and Ming, Di}, title = {SMP-Attack: Boosting the Transferability of Feature Importance-based Adversarial Attack with Semantics-aware Multi-granularity Patchout}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4444-4454} }
Referring to Any Person: Qing Jiang,

Lin Wu,

Zhaoyang Zeng,

Tianhe Ren,

Yuda Xiong,

Yihao Chen,

Liu Qin,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Qing and Wu, Lin and Zeng, Zhaoyang and Ren, Tianhe and Xiong, Yuda and Chen, Yihao and Qin, Liu and Zhang, Lei}, title = {Referring to Any Person}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21667-21678} }
GWM: Towards Scalable Gaussian World Models for Robotic Manipulation: Guanxing Lu,

Baoxiong Jia,

Puhao Li,

Yixin Chen,

Ziwei Wang,

Yansong Tang,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Guanxing and Jia, Baoxiong and Li, Puhao and Chen, Yixin and Wang, Ziwei and Tang, Yansong and Huang, Siyuan}, title = {GWM: Towards Scalable Gaussian World Models for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9263-9274} }
Statistical Confidence Rescoring for Robust 3D Scene Graph Generation from Multi-View Images: Qi Xun Yeo,

Yanyan Li,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yeo_2025_ICCV, author = {Yeo, Qi Xun and Li, Yanyan and Lee, Gim Hee}, title = {Statistical Confidence Rescoring for Robust 3D Scene Graph Generation from Multi-View Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24999-25008} }
Draw Your Mind: Personalized Generation via Condition-Level Modeling in Text-to-Image Diffusion Models: Hyungjin Kim,

Seokho Ahn,

Young-Duk Seo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Hyungjin and Ahn, Seokho and Seo, Young-Duk}, title = {Draw Your Mind: Personalized Generation via Condition-Level Modeling in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17171-17180} }
CopyrightShield: Enhancing Diffusion Model Security Against Copyright Infringement Attacks: Zhixiang Guo,

Siyuan Liang,

Aishan Liu,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Zhixiang and Liang, Siyuan and Liu, Aishan and Tao, Dacheng}, title = {CopyrightShield: Enhancing Diffusion Model Security Against Copyright Infringement Attacks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19417-19426} }
PixTalk: Controlling Photorealistic Image Processing and Editing with Language: Marcos V. Conde,

Zihao Lu,

Radu Timofte; [pdf] [supp]
[bibtex]
@InProceedings{Conde_2025_ICCV, author = {Conde, Marcos V. and Lu, Zihao and Timofte, Radu}, title = {PixTalk: Controlling Photorealistic Image Processing and Editing with Language}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19269-19279} }
Learning Streaming Video Representation via Multitask Training: Yibin Yan,

Jilan Xu,

Shangzhe Di,

Yikun Liu,

Yudi Shi,

Qirui Chen,

Zeqian Li,

Yifei Huang,

Weidi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Yibin and Xu, Jilan and Di, Shangzhe and Liu, Yikun and Shi, Yudi and Chen, Qirui and Li, Zeqian and Huang, Yifei and Xie, Weidi}, title = {Learning Streaming Video Representation via Multitask Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9900-9912} }
DASH: 4D Hash Encoding with Self-Supervised Decomposition for Real-Time Dynamic Scene Rendering: Jie Chen,

Zhangchi Hu,

Peixi Wu,

Huyue Zhu,

Hebei Li,

Xiaoyan Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jie and Hu, Zhangchi and Wu, Peixi and Zhu, Huyue and Li, Hebei and Sun, Xiaoyan}, title = {DASH: 4D Hash Encoding with Self-Supervised Decomposition for Real-Time Dynamic Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26349-26359} }
Pose-Star: Anatomy-Aware Editing for Open-World Fashion Images: Yuran Dong,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Yuran and Ye, Mang}, title = {Pose-Star: Anatomy-Aware Editing for Open-World Fashion Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15822-15831} }
MamV2XCalib: V2X-based Target-less Infrastructure Camera Calibration with State Space Model: Yaoye Zhu,

Zhe Wang,

Yan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yaoye and Wang, Zhe and Wang, Yan}, title = {MamV2XCalib: V2X-based Target-less Infrastructure Camera Calibration with State Space Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26696-26705} }
Beyond the Destination: A Novel Benchmark for Exploration-Aware Embodied Question Answering: Kaixuan Jiang,

Yang Liu,

Weixing Chen,

Jingzhou Luo,

Ziliang Chen,

Ling Pan,

Guanbin Li,

Liang Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Kaixuan and Liu, Yang and Chen, Weixing and Luo, Jingzhou and Chen, Ziliang and Pan, Ling and Li, Guanbin and Lin, Liang}, title = {Beyond the Destination: A Novel Benchmark for Exploration-Aware Embodied Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9091-9101} }
Semantic Equitable Clustering: A Simple and Effective Strategy for Clustering Vision Tokens: Qihang Fan,

Huaibo Huang,

Mingrui Chen,

Ran He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Qihang and Huang, Huaibo and Chen, Mingrui and He, Ran}, title = {Semantic Equitable Clustering: A Simple and Effective Strategy for Clustering Vision Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4019-4028} }
HiP-AD: Hierarchical and Multi-Granularity Planning with Deformable Attention for Autonomous Driving in a Single Decoder: Yingqi Tang,

Zhuoran Xu,

Zhaotie Meng,

Erkang Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2025_ICCV, author = {Tang, Yingqi and Xu, Zhuoran and Meng, Zhaotie and Cheng, Erkang}, title = {HiP-AD: Hierarchical and Multi-Granularity Planning with Deformable Attention for Autonomous Driving in a Single Decoder}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25605-25615} }
Visual Textualization for Image Prompted Object Detection: Yongjian Wu,

Yang Zhou,

Jiya Saiyin,

Bingzheng Wei,

Yan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Yongjian and Zhou, Yang and Saiyin, Jiya and Wei, Bingzheng and Xu, Yan}, title = {Visual Textualization for Image Prompted Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20900-20910} }
Sliced Wasserstein Bridge for Open-Vocabulary Video Instance Segmentation: Zheyun Qin,

Deng Yu,

Chuanchen Luo,

Zhumin Chen; [pdf]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Zheyun and Yu, Deng and Luo, Chuanchen and Chen, Zhumin}, title = {Sliced Wasserstein Bridge for Open-Vocabulary Video Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12470-12478} }
Moderating the Generalization of Score-based Generative Model: Wan Jiang,

He Wang,

Xin Zhang,

Dan Guo,

Zhaoxin Fan,

Yunfeng Diao,

Richang Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Wan and Wang, He and Zhang, Xin and Guo, Dan and Fan, Zhaoxin and Diao, Yunfeng and Hong, Richang}, title = {Moderating the Generalization of Score-based Generative Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {360-369} }
A Token-level Text Image Foundation Model for Document Understanding: Tongkun Guan,

Zining Wang,

Pei Fu,

Zhengtao Guo,

Wei Shen,

Kai Zhou,

Tiezhu Yue,

Chen Duan,

Hao Sun,

Qianyi Jiang,

Junfeng Luo,

Xiaokang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2025_ICCV, author = {Guan, Tongkun and Wang, Zining and Fu, Pei and Guo, Zhengtao and Shen, Wei and Zhou, Kai and Yue, Tiezhu and Duan, Chen and Sun, Hao and Jiang, Qianyi and Luo, Junfeng and Yang, Xiaokang}, title = {A Token-level Text Image Foundation Model for Document Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23210-23220} }
Riemannian-Geometric Fingerprints of Generative Models: Hae Jin Song,

Laurent Itti; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_ICCV, author = {Song, Hae Jin and Itti, Laurent}, title = {Riemannian-Geometric Fingerprints of Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11425-11435} }
NeRF Is a Valuable Assistant for 3D Gaussian Splatting: Shuangkang Fang,

I-Chao Shen,

Takeo Igarashi,

Yufeng Wang,

ZeSheng Wang,

Yi Yang,

Wenrui Ding,

Shuchang Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Shuangkang and Shen, I-Chao and Igarashi, Takeo and Wang, Yufeng and Wang, ZeSheng and Yang, Yi and Ding, Wenrui and Zhou, Shuchang}, title = {NeRF Is a Valuable Assistant for 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26230-26240} }
Towards Visual Localization Interoperability: Cross-Feature for Collaborative Visual Localization and Mapping: Alberto Jaenal,

Paula Carbó Cubero,

José Araújo,

André Mateus; [pdf] [supp]
[bibtex]
@InProceedings{Jaenal_2025_ICCV, author = {Jaenal, Alberto and Cubero, Paula Carb\'o and Ara\'ujo, Jos\'e and Mateus, Andr\'e}, title = {Towards Visual Localization Interoperability: Cross-Feature for Collaborative Visual Localization and Mapping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26783-26792} }
EasyControl: Adding Efficient and Flexible Control for Diffusion Transformer: Yuxuan Zhang,

Yirui Yuan,

Yiren Song,

Haofan Wang,

Jiaming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yuxuan and Yuan, Yirui and Song, Yiren and Wang, Haofan and Liu, Jiaming}, title = {EasyControl: Adding Efficient and Flexible Control for Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19513-19524} }
PerLDiff: Controllable Street View Synthesis Using Perspective-Layout Diffusion Model: Jinhua Zhang,

Hualian Sheng,

Sijia Cai,

Bing Deng,

Qiao Liang,

Wen Li,

Ying Fu,

Jieping Ye,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jinhua and Sheng, Hualian and Cai, Sijia and Deng, Bing and Liang, Qiao and Li, Wen and Fu, Ying and Ye, Jieping and Gu, Shuhang}, title = {PerLDiff: Controllable Street View Synthesis Using Perspective-Layout Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26306-26315} }
DocThinker: Explainable Multimodal Large Language Models with Rule-based Reinforcement Learning for Document Understanding: Wenwen Yu,

Zhibo Yang,

Yuliang Liu,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Wenwen and Yang, Zhibo and Liu, Yuliang and Bai, Xiang}, title = {DocThinker: Explainable Multimodal Large Language Models with Rule-based Reinforcement Learning for Document Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {837-847} }
I2VControl: Disentangled and Unified Video Motion Synthesis Control: Wanquan Feng,

Tianhao Qi,

Jiawei Liu,

Mingzhen Sun,

Pengqi Tu,

Tianxiang Ma,

Fei Dai,

Songtao Zhao,

Siyu Zhou,

Qian He; [pdf] [arXiv]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Wanquan and Qi, Tianhao and Liu, Jiawei and Sun, Mingzhen and Tu, Pengqi and Ma, Tianxiang and Dai, Fei and Zhao, Songtao and Zhou, Siyu and He, Qian}, title = {I2VControl: Disentangled and Unified Video Motion Synthesis Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14051-14060} }
Democratizing Text-to-Image Masked Generative Models with Compact Text-Aware One-Dimensional Tokens: Dongwon Kim,

Ju He,

Qihang Yu,

Chenglin Yang,

Xiaohui Shen,

Suha Kwak,

Liang-Chieh Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Dongwon and He, Ju and Yu, Qihang and Yang, Chenglin and Shen, Xiaohui and Kwak, Suha and Chen, Liang-Chieh}, title = {Democratizing Text-to-Image Masked Generative Models with Compact Text-Aware One-Dimensional Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18442-18452} }
Aligning Global Semantics and Local Textures in Generative Video Enhancement: Zhikai Chen,

Fuchen Long,

Zhaofan Qiu,

Ting Yao,

Wengang Zhou,

Jiebo Luo,

Tao Mei; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Zhikai and Long, Fuchen and Qiu, Zhaofan and Yao, Ting and Zhou, Wengang and Luo, Jiebo and Mei, Tao}, title = {Aligning Global Semantics and Local Textures in Generative Video Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17087-17096} }
VISO: Accelerating In-orbit Object Detection with Language-Guided Mask Learning and Sparse Inference: Meiqi Wang,

Han Qiu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Meiqi and Qiu, Han}, title = {VISO: Accelerating In-orbit Object Detection with Language-Guided Mask Learning and Sparse Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23300-23310} }
Reverse Convolution and Its Applications to Image Restoration: Xuhong Huang,

Shiqi Liu,

Kai Zhang,

Ying Tai,

Jian Yang,

Hui Zeng,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Xuhong and Liu, Shiqi and Zhang, Kai and Tai, Ying and Yang, Jian and Zeng, Hui and Zhang, Lei}, title = {Reverse Convolution and Its Applications to Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10507-10516} }
ATCTrack: Aligning Target-Context Cues with Dynamic Target States for Robust Vision-Language Tracking: Xiaokun Feng,

Shiyu Hu,

Xuchen Li,

Dailing Zhang,

Meiqi Wu,

Jing Zhang,

Xiaotang Chen,

Kaiqi Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Xiaokun and Hu, Shiyu and Li, Xuchen and Zhang, Dailing and Wu, Meiqi and Zhang, Jing and Chen, Xiaotang and Huang, Kaiqi}, title = {ATCTrack: Aligning Target-Context Cues with Dynamic Target States for Robust Vision-Language Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19850-19861} }
Reminiscence Attack on Residuals: Exploiting Approximate Machine Unlearning for Privacy: Yaxin Xiao,

Qingqing Ye,

Li Hu,

Huadi Zheng,

Haibo Hu,

Zi Liang,

Haoyang Li,

Yijie Jiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_ICCV, author = {Xiao, Yaxin and Ye, Qingqing and Hu, Li and Zheng, Huadi and Hu, Haibo and Liang, Zi and Li, Haoyang and Jiao, Yijie}, title = {Reminiscence Attack on Residuals: Exploiting Approximate Machine Unlearning for Privacy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3058-3068} }
SeaS: Few-shot Industrial Anomaly Image Generation with Separation and Sharing Fine-tuning: Zhewei Dai,

Shilei Zeng,

Haotian Liu,

Xurui Li,

Feng Xue,

Yu Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2025_ICCV, author = {Dai, Zhewei and Zeng, Shilei and Liu, Haotian and Li, Xurui and Xue, Feng and Zhou, Yu}, title = {SeaS: Few-shot Industrial Anomaly Image Generation with Separation and Sharing Fine-tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23135-23144} }
CreatiLayout: Siamese Multimodal Diffusion Transformer for Creative Layout-to-Image Generation: Hui Zhang,

Dexiang Hong,

Yitong Wang,

Jie Shao,

Xinglong Wu,

Zuxuan Wu,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hui and Hong, Dexiang and Wang, Yitong and Shao, Jie and Wu, Xinglong and Wu, Zuxuan and Jiang, Yu-Gang}, title = {CreatiLayout: Siamese Multimodal Diffusion Transformer for Creative Layout-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18487-18497} }
AMD: Adaptive Momentum and Decoupled Contrastive Learning Framework for Robust Long-Tail Trajectory Prediction: Bin Rao,

Haicheng Liao,

Yanchen Guan,

Chengyue Wang,

Bonan Wang,

Jiaxun Zhang,

Zhenning Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rao_2025_ICCV, author = {Rao, Bin and Liao, Haicheng and Guan, Yanchen and Wang, Chengyue and Wang, Bonan and Zhang, Jiaxun and Li, Zhenning}, title = {AMD: Adaptive Momentum and Decoupled Contrastive Learning Framework for Robust Long-Tail Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28849-28858} }
Music Grounding by Short Video: Zijie Xin,

Minquan Wang,

Jingyu Liu,

Quan Chen,

Ye Ma,

Peng Jiang,

Xirong Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Xin_2025_ICCV, author = {Xin, Zijie and Wang, Minquan and Liu, Jingyu and Chen, Quan and Ma, Ye and Jiang, Peng and Li, Xirong}, title = {Music Grounding by Short Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22285-22293} }
DyWA: Dynamics-adaptive World Action Model for Generalizable Non-prehensile Manipulation: Jiangran Lyu,

Ziming Li,

Xuesong Shi,

Chaoyi Xu,

Yizhou Wang,

He Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2025_ICCV, author = {Lyu, Jiangran and Li, Ziming and Shi, Xuesong and Xu, Chaoyi and Wang, Yizhou and Wang, He}, title = {DyWA: Dynamics-adaptive World Action Model for Generalizable Non-prehensile Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11058-11068} }
POMATO: Marrying Pointmap Matching with Temporal Motions for Dynamic 3D Reconstruction: Songyan Zhang,

Yongtao Ge,

Jinyuan Tian,

Guangkai Xu,

Hao Chen,

Chen Lv,

Chunhua Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Songyan and Ge, Yongtao and Tian, Jinyuan and Xu, Guangkai and Chen, Hao and Lv, Chen and Shen, Chunhua}, title = {POMATO: Marrying Pointmap Matching with Temporal Motions for Dynamic 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5680-5689} }
Flow Stochastic Segmentation Networks: Fabio De Sousa Ribeiro,

Omar Todd,

Charles Jones,

Avinash Kori,

Raghav Mehta,

Ben Glocker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{De_Sousa_Ribeiro_2025_ICCV, author = {De Sousa Ribeiro, Fabio and Todd, Omar and Jones, Charles and Kori, Avinash and Mehta, Raghav and Glocker, Ben}, title = {Flow Stochastic Segmentation Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14754-14765} }
Learning Large Motion Estimation from Intermediate Representations with a High-Resolution Optical Flow Dataset Featuring Long-Range Dynamic Motion: Hoonhee Cho,

Yuhwan Jeong,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2025_ICCV, author = {Cho, Hoonhee and Jeong, Yuhwan and Yoon, Kuk-Jin}, title = {Learning Large Motion Estimation from Intermediate Representations with a High-Resolution Optical Flow Dataset Featuring Long-Range Dynamic Motion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6176-6187} }
Towards Higher Effective Rank in Parameter-Efficient Fine-tuning using Khatri-Rao Product: Paul Albert,

Frederic Z. Zhang,

Hemanth Saratchandran,

Anton van den Hengel,

Ehsan Abbasnejad; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Albert_2025_ICCV, author = {Albert, Paul and Zhang, Frederic Z. and Saratchandran, Hemanth and van den Hengel, Anton and Abbasnejad, Ehsan}, title = {Towards Higher Effective Rank in Parameter-Efficient Fine-tuning using Khatri-Rao Product}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1292-1302} }
USP: Unified Self-Supervised Pretraining for Image Generation and Understanding: Xiangxiang Chu,

Renda Li,

Yong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chu_2025_ICCV, author = {Chu, Xiangxiang and Li, Renda and Wang, Yong}, title = {USP: Unified Self-Supervised Pretraining for Image Generation and Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18475-18486} }
Go to Zero: Towards Zero-shot Motion Generation with Million-scale Data: Ke Fan,

Shunlin Lu,

Minyue Dai,

Runyi Yu,

Lixing Xiao,

Zhiyang Dou,

Junting Dong,

Lizhuang Ma,

Jingbo Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Ke and Lu, Shunlin and Dai, Minyue and Yu, Runyi and Xiao, Lixing and Dou, Zhiyang and Dong, Junting and Ma, Lizhuang and Wang, Jingbo}, title = {Go to Zero: Towards Zero-shot Motion Generation with Million-scale Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13336-13348} }
Beyond Brain Decoding: Visual-Semantic Reconstructions to Mental Creation Extension Based on fMRI: Haodong Jing,

Dongyao Jiang,

Yongqiang Ma,

Haibo Hua,

Bo Huang,

Nanning Zheng; [pdf]
[bibtex]
@InProceedings{Jing_2025_ICCV, author = {Jing, Haodong and Jiang, Dongyao and Ma, Yongqiang and Hua, Haibo and Huang, Bo and Zheng, Nanning}, title = {Beyond Brain Decoding: Visual-Semantic Reconstructions to Mental Creation Extension Based on fMRI}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19258-19268} }
DAViD: Modeling Dynamic Affordance of 3D Objects Using Pre-trained Video Diffusion Models: Hyeonwoo Kim,

Sangwon Baik,

Hanbyul Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Hyeonwoo and Baik, Sangwon and Joo, Hanbyul}, title = {DAViD: Modeling Dynamic Affordance of 3D Objects Using Pre-trained Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10330-10341} }
SKALD: Learning-Based Shot Assembly for Coherent Multi-Shot Video Creation: Chen-Yi Lu,

Md Mehrab Tanjim,

Ishita Dasgupta,

Somdeb Sarkhel,

Gang Wu,

Saayan Mitra,

Somali Chaterji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Chen-Yi and Tanjim, Md Mehrab and Dasgupta, Ishita and Sarkhel, Somdeb and Wu, Gang and Mitra, Saayan and Chaterji, Somali}, title = {SKALD: Learning-Based Shot Assembly for Coherent Multi-Shot Video Creation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17859-17868} }
When Confidence Fails: Revisiting Pseudo-Label Selection in Semi-supervised Semantic Segmentation: Pan Liu,

Jinshi Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Pan and Liu, Jinshi}, title = {When Confidence Fails: Revisiting Pseudo-Label Selection in Semi-supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21874-21884} }
Preacher: Paper-to-Video Agentic System: Jingwei Liu,

Ling Yang,

Hao Luo,

Fan Wang,

Hongyan Li,

Mengdi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Jingwei and Yang, Ling and Luo, Hao and Wang, Fan and Li, Hongyan and Wang, Mengdi}, title = {Preacher: Paper-to-Video Agentic System}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17129-17139} }
More Reliable Pseudo-labels, Better Performance: A Generalized Approach to Single Positive Multi-label Learning: Luong Tran,

Thieu Vo,

Anh Nguyen,

Sang Dinh,

Van Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2025_ICCV, author = {Tran, Luong and Vo, Thieu and Nguyen, Anh and Dinh, Sang and Nguyen, Van}, title = {More Reliable Pseudo-labels, Better Performance: A Generalized Approach to Single Positive Multi-label Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1349-1358} }
Detection, Pose Estimation and Segmentation for Multiple Bodies: Closing the Virtuous Circle: Miroslav Purkrabek,

Jiri Matas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Purkrabek_2025_ICCV, author = {Purkrabek, Miroslav and Matas, Jiri}, title = {Detection, Pose Estimation and Segmentation for Multiple Bodies: Closing the Virtuous Circle}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9004-9013} }
CalliReader: Contextualizing Chinese Calligraphy via an Embedding-Aligned Vision-Language Model: Yuxuan Luo,

Jiaqi Tang,

Chenyi Huang,

Feiyang Hao,

Zhouhui Lian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Yuxuan and Tang, Jiaqi and Huang, Chenyi and Hao, Feiyang and Lian, Zhouhui}, title = {CalliReader: Contextualizing Chinese Calligraphy via an Embedding-Aligned Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23030-23040} }
NeuralSVG: An Implicit Representation for Text-to-Vector Generation: Sagi Polaczek,

Yuval Alaluf,

Elad Richardson,

Yael Vinker,

Daniel Cohen-Or; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Polaczek_2025_ICCV, author = {Polaczek, Sagi and Alaluf, Yuval and Richardson, Elad and Vinker, Yael and Cohen-Or, Daniel}, title = {NeuralSVG: An Implicit Representation for Text-to-Vector Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15458-15468} }
Hierarchical Cross-modal Prompt Learning for Vision-Language Models: Hao Zheng,

Shunzhi Yang,

Zhuoxin He,

Jinfeng Yang,

Zhenhua Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Hao and Yang, Shunzhi and He, Zhuoxin and Yang, Jinfeng and Huang, Zhenhua}, title = {Hierarchical Cross-modal Prompt Learning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1891-1901} }
RIPE: Reinforcement Learning on Unlabeled Image Pairs for Robust Keypoint Extraction: Johannes Künzel,

Anna Hilsmann,

Peter Eisert; [pdf] [supp]
[bibtex]
@InProceedings{Kunzel_2025_ICCV, author = {K\"unzel, Johannes and Hilsmann, Anna and Eisert, Peter}, title = {RIPE: Reinforcement Learning on Unlabeled Image Pairs for Robust Keypoint Extraction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4868-4877} }
Can We Achieve Efficient Diffusion Without Self-Attention? Distilling Self-Attention into Convolutions: Ziyi Dong,

Chengxing Zhou,

Weijian Deng,

Pengxu Wei,

Xiangyang Ji,

Liang Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Ziyi and Zhou, Chengxing and Deng, Weijian and Wei, Pengxu and Ji, Xiangyang and Lin, Liang}, title = {Can We Achieve Efficient Diffusion Without Self-Attention? Distilling Self-Attention into Convolutions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17401-17410} }
Activation Subspaces for Out-of-Distribution Detection: Barış Zöngür,

Robin Hesse,

Stefan Roth; [pdf] [supp]
[bibtex]
@InProceedings{Zongur_2025_ICCV, author = {Z\"ong\"ur, Bar{\i}\c{s} and Hesse, Robin and Roth, Stefan}, title = {Activation Subspaces for Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3509-3519} }
SIMS: Simulating Stylized Human-Scene Interactions with Retrieval-Augmented Script Generation: Wenjia Wang,

Liang Pan,

Zhiyang Dou,

Jidong Mei,

Zhouyingcheng Liao,

Yuke Lou,

Yifan Wu,

Lei Yang,

Jingbo Wang,

Taku Komura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Wenjia and Pan, Liang and Dou, Zhiyang and Mei, Jidong and Liao, Zhouyingcheng and Lou, Yuke and Wu, Yifan and Yang, Lei and Wang, Jingbo and Komura, Taku}, title = {SIMS: Simulating Stylized Human-Scene Interactions with Retrieval-Augmented Script Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14117-14127} }
LOTS of Fashion! Multi-Conditioning for Image Generation via Sketch-Text Pairing: Federico Girella,

Davide Talon,

Ziyue Liu,

Zanxi Ruan,

Yiming Wang,

Marco Cristani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Girella_2025_ICCV, author = {Girella, Federico and Talon, Davide and Liu, Ziyue and Ruan, Zanxi and Wang, Yiming and Cristani, Marco}, title = {LOTS of Fashion! Multi-Conditioning for Image Generation via Sketch-Text Pairing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19711-19720} }
DuoCLR: Dual-Surrogate Contrastive Learning for Skeleton-based Human Action Segmentation: Haitao Tian; [pdf] [arXiv]
[bibtex]
@InProceedings{Tian_2025_ICCV, author = {Tian, Haitao}, title = {DuoCLR: Dual-Surrogate Contrastive Learning for Skeleton-based Human Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13772-13782} }
Plug-in Feedback Self-adaptive Attention in CLIP for Training-free Open-Vocabulary Segmentation: Zhixiang Chi,

Yanan Wu,

Li Gu,

Huan Liu,

Ziqiang Wang,

Yang Zhang,

Yang Wang,

Konstantinos Plataniotis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chi_2025_ICCV, author = {Chi, Zhixiang and Wu, Yanan and Gu, Li and Liu, Huan and Wang, Ziqiang and Zhang, Yang and Wang, Yang and Plataniotis, Konstantinos}, title = {Plug-in Feedback Self-adaptive Attention in CLIP for Training-free Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22815-22825} }
Street Gaussians without 3D Object Tracker: Ruida Zhang,

Chengxi Li,

Chenyangguang Zhang,

Xingyu Liu,

Haili Yuan,

Yanyan Li,

Xiangyang Ji,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Ruida and Li, Chengxi and Zhang, Chenyangguang and Liu, Xingyu and Yuan, Haili and Li, Yanyan and Ji, Xiangyang and Lee, Gim Hee}, title = {Street Gaussians without 3D Object Tracker}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25722-25734} }
AllGCD: Leveraging All Unlabeled Data for Generalized Category Discovery: Xinzi Cao,

Ke Chen,

Feidiao Yang,

Xiawu Zheng,

Yonghong Tian,

Yutong Lu; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Xinzi and Chen, Ke and Yang, Feidiao and Zheng, Xiawu and Tian, Yonghong and Lu, Yutong}, title = {AllGCD: Leveraging All Unlabeled Data for Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3293-3303} }
Principles of Visual Tokens for Efficient Video Understanding: Xinyue Hao,

Gen Li,

Shreyank N Gowda,

Robert B. Fisher,

Jonathan Huang,

Anurag Arnab,

Laura Sevilla-Lara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hao_2025_ICCV, author = {Hao, Xinyue and Li, Gen and Gowda, Shreyank N and Fisher, Robert B. and Huang, Jonathan and Arnab, Anurag and Sevilla-Lara, Laura}, title = {Principles of Visual Tokens for Efficient Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21254-21264} }
Boosting Domain Generalized and Adaptive Detection with Diffusion Models: Fitness, Generalization, and Transferability: Boyong He,

Yuxiang Ji,

Zhuoyue Tan,

Liaoni Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Boyong and Ji, Yuxiang and Tan, Zhuoyue and Wu, Liaoni}, title = {Boosting Domain Generalized and Adaptive Detection with Diffusion Models: Fitness, Generalization, and Transferability}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1912-1923} }
Power of Cooperative Supervision: Multiple Teachers Framework for Advanced 3D Semi-Supervised Object Detection: Jin-Hee Lee,

Jae-Keun Lee,

Jeseok Kim,

Kwon Soon; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Jin-Hee and Lee, Jae-Keun and Kim, Jeseok and Soon, Kwon}, title = {Power of Cooperative Supervision: Multiple Teachers Framework for Advanced 3D Semi-Supervised Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6994-7003} }
SAM Encoder Breach by Adversarial Simplicial Complex Triggers Downstream Model Failures: Yi Qin,

Rui Wang,

Tao Huang,

Tong Xiao,

Liping Jing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Yi and Wang, Rui and Huang, Tao and Xiao, Tong and Jing, Liping}, title = {SAM Encoder Breach by Adversarial Simplicial Complex Triggers Downstream Model Failures}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10624-10634} }
ReassembleNet: Learnable Keypoints and Diffusion for 2D Fresco Reconstruction: Adeela Islam,

Stefano Fiorini,

Stuart James,

Pietro Morerio,

Alessio Del Bue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Islam_2025_ICCV, author = {Islam, Adeela and Fiorini, Stefano and James, Stuart and Morerio, Pietro and Del Bue, Alessio}, title = {ReassembleNet: Learnable Keypoints and Diffusion for 2D Fresco Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9048-9057} }
Similarity Memory Prior is All You Need for Medical Image Segmentation: Hao Tang,

Zhiqing Guo,

Liejun Wang,

Chao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_ICCV, author = {Tang, Hao and Guo, Zhiqing and Wang, Liejun and Liu, Chao}, title = {Similarity Memory Prior is All You Need for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23009-23018} }
MMGeo: Multimodal Compositional Geo-Localization for UAVs: Yuxiang Ji,

Boyong He,

Zhuoyue Tan,

Liaoni Wu; [pdf]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Yuxiang and He, Boyong and Tan, Zhuoyue and Wu, Liaoni}, title = {MMGeo: Multimodal Compositional Geo-Localization for UAVs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25165-25175} }
Tracing Copied Pixels and Regularizing Patch Affinity in Copy Detection: Yichen Lu,

Siwei Nie,

Minlong Lu,

Xudong Yang,

Xiaobo Zhang,

Peng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Yichen and Nie, Siwei and Lu, Minlong and Yang, Xudong and Zhang, Xiaobo and Zhang, Peng}, title = {Tracing Copied Pixels and Regularizing Patch Affinity in Copy Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19248-19257} }
InstructSeg: Unifying Instructed Visual Segmentation with Multi-modal Large Language Models: Cong Wei,

Yujie Zhong,

Haoxian Tan,

Yingsen Zeng,

Yong Liu,

Hongfa Wang,

Yujiu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Cong and Zhong, Yujie and Tan, Haoxian and Zeng, Yingsen and Liu, Yong and Wang, Hongfa and Yang, Yujiu}, title = {InstructSeg: Unifying Instructed Visual Segmentation with Multi-modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20193-20203} }
Multispectral Demosaicing via Dual Cameras: SaiKiran Tedla,

Junyong Lee,

Beixuan Yang,

Mahmoud Afifi,

Michael S. Brown; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tedla_2025_ICCV, author = {Tedla, SaiKiran and Lee, Junyong and Yang, Beixuan and Afifi, Mahmoud and Brown, Michael S.}, title = {Multispectral Demosaicing via Dual Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5405-5414} }
Reflect-DiT: Inference-Time Scaling for Text-to-Image Diffusion Transformers via In-Context Reflection: Shufan Li,

Konstantinos Kallidromitis,

Akash Gokul,

Arsh Koneru,

Yusuke Kato,

Kazuki Kozuka,

Aditya Grover; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Shufan and Kallidromitis, Konstantinos and Gokul, Akash and Koneru, Arsh and Kato, Yusuke and Kozuka, Kazuki and Grover, Aditya}, title = {Reflect-DiT: Inference-Time Scaling for Text-to-Image Diffusion Transformers via In-Context Reflection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15657-15668} }
HAMoBE: Hierarchical and Adaptive Mixture of Biometric Experts for Video-based Person ReID: Yiyang Su,

Yunping Shi,

Feng Liu,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_ICCV, author = {Su, Yiyang and Shi, Yunping and Liu, Feng and Liu, Xiaoming}, title = {HAMoBE: Hierarchical and Adaptive Mixture of Biometric Experts for Video-based Person ReID}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11525-11536} }
TopicGeo: An Efficient Unified Framework for Geolocation: Xin Wang,

Xinlin Wang,

Shuiping Gou; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xin and Wang, Xinlin and Gou, Shuiping}, title = {TopicGeo: An Efficient Unified Framework for Geolocation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8241-8251} }
Neighboring Autoregressive Modeling for Efficient Visual Generation: Yefei He,

Yuanyu He,

Shaoxuan He,

Feng Chen,

Hong Zhou,

Kaipeng Zhang,

Bohan Zhuang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Yefei and He, Yuanyu and He, Shaoxuan and Chen, Feng and Zhou, Hong and Zhang, Kaipeng and Zhuang, Bohan}, title = {Neighboring Autoregressive Modeling for Efficient Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19000-19010} }
Beyond Simple Edits: Composed Video Retrieval with Dense Modifications: Omkar Thawakar,

Dmitry Demidov,

Ritesh Thawkar,

Rao Muhammad Anwer,

Mubarak Shah,

Fahad Shahbaz Khan,

Salman Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thawakar_2025_ICCV, author = {Thawakar, Omkar and Demidov, Dmitry and Thawkar, Ritesh and Anwer, Rao Muhammad and Shah, Mubarak and Khan, Fahad Shahbaz and Khan, Salman}, title = {Beyond Simple Edits: Composed Video Retrieval with Dense Modifications}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20435-20444} }
MagicColor: Multi-Instance Sketch Colorization: Yinhan Zhang,

Yue Ma,

Bingyuan Wang,

Qifeng Chen,

Zeyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yinhan and Ma, Yue and Wang, Bingyuan and Chen, Qifeng and Wang, Zeyu}, title = {MagicColor: Multi-Instance Sketch Colorization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15205-15217} }
Active Learning Meets Foundation Models: Fast Remote Sensing Data Annotation for Object Detection: Marvin Burges,

Philipe Ambrozio Dias,

Carson Woody,

Sarah Walters,

Dalton Lunga; [pdf]
[bibtex]
@InProceedings{Burges_2025_ICCV, author = {Burges, Marvin and Dias, Philipe Ambrozio and Woody, Carson and Walters, Sarah and Lunga, Dalton}, title = {Active Learning Meets Foundation Models: Fast Remote Sensing Data Annotation for Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6058-6068} }
Im2Haircut: Single-view Strand-based Hair Reconstruction for Human Avatars: Vanessa Sklyarova,

Egor Zakharov,

Malte Prinzler,

Giorgio Becherini,

Michael J. Black,

Justus Thies; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sklyarova_2025_ICCV, author = {Sklyarova, Vanessa and Zakharov, Egor and Prinzler, Malte and Becherini, Giorgio and Black, Michael J. and Thies, Justus}, title = {Im2Haircut: Single-view Strand-based Hair Reconstruction for Human Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10656-10665} }
Leveraging Prior Knowledge of Diffusion Model for Person Search: Giyeol Kim,

Sooyoung Yang,

Jihyong Oh,

Myungjoo Kang,

Chanho Eom; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Giyeol and Yang, Sooyoung and Oh, Jihyong and Kang, Myungjoo and Eom, Chanho}, title = {Leveraging Prior Knowledge of Diffusion Model for Person Search}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20301-20312} }
PS3: A Multimodal Transformer Integrating Pathology Reports with Histology Images and Biological Pathways for Cancer Survival Prediction: Manahil Raza,

Ayesha Azam,

Talha Qaiser,

Nasir Rajpoot; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Raza_2025_ICCV, author = {Raza, Manahil and Azam, Ayesha and Qaiser, Talha and Rajpoot, Nasir}, title = {PS3: A Multimodal Transformer Integrating Pathology Reports with Histology Images and Biological Pathways for Cancer Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22175-22186} }
Debiasing Trace Guidance: Top-down Trace Distillation and Bottom-up Velocity Alignment for Unsupervised Anomaly Detection: Xingjian Wang,

Li Chai,

Jiming Chen; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xingjian and Chai, Li and Chen, Jiming}, title = {Debiasing Trace Guidance: Top-down Trace Distillation and Bottom-up Velocity Alignment for Unsupervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22989-22998} }
EDM: Efficient Deep Feature Matching: Xi Li,

Tong Rao,

Cihui Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xi and Rao, Tong and Pan, Cihui}, title = {EDM: Efficient Deep Feature Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26198-26208} }
Progressive Growing of Video Tokenizers for Temporally Compact Latent Spaces: Aniruddha Mahapatra,

Long Mai,

David Bourgin,

Yitian Zhang,

Feng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mahapatra_2025_ICCV, author = {Mahapatra, Aniruddha and Mai, Long and Bourgin, David and Zhang, Yitian and Liu, Feng}, title = {Progressive Growing of Video Tokenizers for Temporally Compact Latent Spaces}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17629-17639} }
Stronger, Steadier & Superior: Geometric Consistency in Depth VFM Forges Domain Generalized Semantic Segmentation: Siyu Chen,

Ting Han,

Changshe Zhang,

Xin Luo,

Meiliu Wu,

Guorong Cai,

Jinhe Su; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Siyu and Han, Ting and Zhang, Changshe and Luo, Xin and Wu, Meiliu and Cai, Guorong and Su, Jinhe}, title = {Stronger, Steadier \& Superior: Geometric Consistency in Depth VFM Forges Domain Generalized Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8285-8295} }
CorrCLIP: Reconstructing Patch Correlations in CLIP for Open-Vocabulary Semantic Segmentation: Dengke Zhang,

Fagui Liu,

Quan Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Dengke and Liu, Fagui and Tang, Quan}, title = {CorrCLIP: Reconstructing Patch Correlations in CLIP for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24677-24687} }
LBM: Latent Bridge Matching for Fast Image-to-Image Translation: Clément Chadebec,

Onur Tasar,

Sanjeev Sreetharan,

Benjamin Aubin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chadebec_2025_ICCV, author = {Chadebec, Cl\'ement and Tasar, Onur and Sreetharan, Sanjeev and Aubin, Benjamin}, title = {LBM: Latent Bridge Matching for Fast Image-to-Image Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29086-29098} }
DreamRenderer: Taming Multi-Instance Attribute Control in Large-Scale Text-to-Image Models: Dewei Zhou,

Mingwei Li,

Zongxin Yang,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Dewei and Li, Mingwei and Yang, Zongxin and Yang, Yi}, title = {DreamRenderer: Taming Multi-Instance Attribute Control in Large-Scale Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16712-16722} }
Towards a 3D Transfer-based Black-box Attack via Critical Feature Guidance: Shuchao Pang,

Zhenghan Chen,

Shen Zhang,

Liming Lu,

Siyuan Liang,

Anan Du,

Yongbin Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2025_ICCV, author = {Pang, Shuchao and Chen, Zhenghan and Zhang, Shen and Lu, Liming and Liang, Siyuan and Du, Anan and Zhou, Yongbin}, title = {Towards a 3D Transfer-based Black-box Attack via Critical Feature Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26912-26922} }
TurboVSR: Fantastic Video Upscalers and Where to Find Them: Zhongdao Wang,

Guodongfang Zhao,

Jingjing Ren,

Bailan Feng,

Shifeng Zhang,

Wenbo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zhongdao and Zhao, Guodongfang and Ren, Jingjing and Feng, Bailan and Zhang, Shifeng and Li, Wenbo}, title = {TurboVSR: Fantastic Video Upscalers and Where to Find Them}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18132-18142} }
LLM-Assisted Semantic Guidance for Sparsely Annotated Remote Sensing Object Detection: Wei Liao,

Chunyan Xu,

Chenxu Wang,

Zhen Cui; [pdf] [arXiv]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Wei and Xu, Chunyan and Wang, Chenxu and Cui, Zhen}, title = {LLM-Assisted Semantic Guidance for Sparsely Annotated Remote Sensing Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22519-22528} }
NuPlanQA: A Large-Scale Dataset and Benchmark for Multi-View Driving Scene Understanding in Multi-Modal Large Language Models: Sung-Yeon Park,

Can Cui,

Yunsheng Ma,

Ahmadreza Moradipari,

Rohit Gupta,

Kyungtae Han,

Ziran Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Sung-Yeon and Cui, Can and Ma, Yunsheng and Moradipari, Ahmadreza and Gupta, Rohit and Han, Kyungtae and Wang, Ziran}, title = {NuPlanQA: A Large-Scale Dataset and Benchmark for Multi-View Driving Scene Understanding in Multi-Modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8066-8076} }
Geometric Alignment and Prior Modulation for View-Guided Point Cloud Completion on Unseen Categories: Jingqiao Xiu,

Yicong Li,

Na Zhao,

Han Fang,

Xiang Wang,

Angela Yao; [pdf]
[bibtex]
@InProceedings{Xiu_2025_ICCV, author = {Xiu, Jingqiao and Li, Yicong and Zhao, Na and Fang, Han and Wang, Xiang and Yao, Angela}, title = {Geometric Alignment and Prior Modulation for View-Guided Point Cloud Completion on Unseen Categories}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27435-27444} }
ODDR: Outlier Detection & Dimension Reduction Based Defense Against Adversarial Patches: Nandish Chattopadhyay,

Amira Guesmi,

Muhammad Abdullah Hanif,

Bassem Ouni,

Muhammad Shafique; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chattopadhyay_2025_ICCV, author = {Chattopadhyay, Nandish and Guesmi, Amira and Hanif, Muhammad Abdullah and Ouni, Bassem and Shafique, Muhammad}, title = {ODDR: Outlier Detection \& Dimension Reduction Based Defense Against Adversarial Patches}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22999-23008} }
Mind the Gap: Preserving and Compensating for the Modality Gap in CLIP-Based Continual Learning: Linlan Huang,

Xusheng Cao,

Haori Lu,

Yifan Meng,

Fei Yang,

Xialei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Linlan and Cao, Xusheng and Lu, Haori and Meng, Yifan and Yang, Fei and Liu, Xialei}, title = {Mind the Gap: Preserving and Compensating for the Modality Gap in CLIP-Based Continual Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3777-3786} }
Stochastic Gradient Estimation for Higher-Order Differentiable Rendering: Zican Wang,

Michael Fischer,

Tobias Ritschel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zican and Fischer, Michael and Ritschel, Tobias}, title = {Stochastic Gradient Estimation for Higher-Order Differentiable Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28198-28206} }
PolGS: Polarimetric Gaussian Splatting for Fast Reflective Surface Reconstruction: Yufei Han,

Bowen Tie,

Heng Guo,

Youwei Lyu,

Si Li,

Boxin Shi,

Yunpeng Jia,

Zhanyu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Yufei and Tie, Bowen and Guo, Heng and Lyu, Youwei and Li, Si and Shi, Boxin and Jia, Yunpeng and Ma, Zhanyu}, title = {PolGS: Polarimetric Gaussian Splatting for Fast Reflective Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28073-28082} }
AdvDreamer Unveils: Are Vision-Language Models Truly Ready for Real-World 3D Variations?: Shouwei Ruan,

Hanqing Liu,

Yao Huang,

Xiaoqi Wang,

Caixin Kang,

Hang Su,

Yinpeng Dong,

Xingxing Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ruan_2025_ICCV, author = {Ruan, Shouwei and Liu, Hanqing and Huang, Yao and Wang, Xiaoqi and Kang, Caixin and Su, Hang and Dong, Yinpeng and Wei, Xingxing}, title = {AdvDreamer Unveils: Are Vision-Language Models Truly Ready for Real-World 3D Variations?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7894-7904} }
ERNet: Efficient Non-Rigid Registration Network for Point Sequences: Guangzhao He,

Yuxi Xiao,

Zhen Xu,

Xiaowei Zhou,

Sida Peng; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Guangzhao and Xiao, Yuxi and Xu, Zhen and Zhou, Xiaowei and Peng, Sida}, title = {ERNet: Efficient Non-Rigid Registration Network for Point Sequences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27156-27165} }
Does Your Vision-Language Model Get Lost in the Long Video Sampling Dilemma?: Tianyuan Qu,

Longxiang Tang,

Bohao Peng,

Senqiao Yang,

Bei Yu,

Jiaya Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_ICCV, author = {Qu, Tianyuan and Tang, Longxiang and Peng, Bohao and Yang, Senqiao and Yu, Bei and Jia, Jiaya}, title = {Does Your Vision-Language Model Get Lost in the Long Video Sampling Dilemma?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20889-20899} }
Not all Views are Created Equal: Analyzing Viewpoint Instabilities in Vision Foundation Models: Mateusz Michalkiewicz,

Sheena Bai,

Mahsa Baktashmotlagh,

Varun Jampani,

Guha Balakrishnan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Michalkiewicz_2025_ICCV, author = {Michalkiewicz, Mateusz and Bai, Sheena and Baktashmotlagh, Mahsa and Jampani, Varun and Balakrishnan, Guha}, title = {Not all Views are Created Equal: Analyzing Viewpoint Instabilities in Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9113-9123} }
HumorDB: Can AI understand graphical humor?: Vedaant V Jain,

Gabriel Kreiman,

Felipe dos Santos Alves Feitosa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jain_2025_ICCV, author = {Jain, Vedaant V and Kreiman, Gabriel and dos Santos Alves Feitosa, Felipe}, title = {HumorDB: Can AI understand graphical humor?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {604-613} }
Towards Safer and Understandable Driver Intention Prediction: Mukilan Karuppasamy,

Shankar Gangisetty,

Shyam Nandan Rai,

Carlo Masone,

C V Jawahar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karuppasamy_2025_ICCV, author = {Karuppasamy, Mukilan and Gangisetty, Shankar and Rai, Shyam Nandan and Masone, Carlo and Jawahar, C V}, title = {Towards Safer and Understandable Driver Intention Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25378-25387} }
Generative Gaussian Splatting: Generating 3D Scenes with Video Diffusion Priors: Katja Schwarz,

Norman Müller,

Peter Kontschieder; [pdf] [supp]
[bibtex]
@InProceedings{Schwarz_2025_ICCV, author = {Schwarz, Katja and M\"uller, Norman and Kontschieder, Peter}, title = {Generative Gaussian Splatting: Generating 3D Scenes with Video Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27510-27520} }
AffordDexGrasp: Open-set Language-guided Dexterous Grasp with Generalizable-Instructive Affordance: Yi-Lin Wei,

Mu Lin,

Yuhao Lin,

Jian-Jian Jiang,

Xiao-Ming Wu,

Ling-An Zeng,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Yi-Lin and Lin, Mu and Lin, Yuhao and Jiang, Jian-Jian and Wu, Xiao-Ming and Zeng, Ling-An and Zheng, Wei-Shi}, title = {AffordDexGrasp: Open-set Language-guided Dexterous Grasp with Generalizable-Instructive Affordance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11818-11828} }
AR-1-to-3: Single Image to Consistent 3D Object via Next-View Prediction: Xuying Zhang,

Yupeng Zhou,

Kai Wang,

Yikai Wang,

Zhen Li,

Shaohui Jiao,

Daquan Zhou,

Qibin Hou,

Ming-Ming Cheng; [pdf]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xuying and Zhou, Yupeng and Wang, Kai and Wang, Yikai and Li, Zhen and Jiao, Shaohui and Zhou, Daquan and Hou, Qibin and Cheng, Ming-Ming}, title = {AR-1-to-3: Single Image to Consistent 3D Object via Next-View Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26273-26283} }
Inverse 3D Microscopy Rendering for Cell Shape Inference with Active Mesh: Sacha Ichbiah,

Anshuman Sinha,

Fabrice Delbary,

Hervé Turlier; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ichbiah_2025_ICCV, author = {Ichbiah, Sacha and Sinha, Anshuman and Delbary, Fabrice and Turlier, Herv\'e}, title = {Inverse 3D Microscopy Rendering for Cell Shape Inference with Active Mesh}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26987-26998} }
NeurOp-Diff: Continuous Remote Sensing Image Super-Resolution via Neural Operator Diffusion: Zihao Xu,

Yuzhi Tang,

Bowen Xu,

Qingquan Li; [pdf]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Zihao and Tang, Yuzhi and Xu, Bowen and Li, Qingquan}, title = {NeurOp-Diff: Continuous Remote Sensing Image Super-Resolution via Neural Operator Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12491-12501} }
DC-TTA: Divide-and-Conquer Framework for Test-Time Adaptation of Interactive Segmentation: Jihun Kim,

Hoyong Kwon,

Hyeokjun Kweon,

Wooseong Jeong,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jihun and Kwon, Hoyong and Kweon, Hyeokjun and Jeong, Wooseong and Yoon, Kuk-Jin}, title = {DC-TTA: Divide-and-Conquer Framework for Test-Time Adaptation of Interactive Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23279-23289} }
PROL : Rehearsal Free Continual Learning in Streaming Data via Prompt Online Learning: M. Anwar Ma'sum,

Mahardhika Pratama,

Savitha Ramasamy,

Lin Liu,

Habibullah Habibullah,

Ryszard Kowalczyk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma'sum_2025_ICCV, author = {Ma'sum, M. Anwar and Pratama, Mahardhika and Ramasamy, Savitha and Liu, Lin and Habibullah, Habibullah and Kowalczyk, Ryszard}, title = {PROL : Rehearsal Free Continual Learning in Streaming Data via Prompt Online Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2471-2481} }
Generalization-Preserved Learning: Closing the Backdoor to Catastrophic Forgetting in Continual Deepfake Detection: Xueyi Zhang,

Peiyin Zhu,

Chengwei Zhang,

Zhiyuan Yan,

Jikang Cheng,

Mingrui Lao,

Siqi Cai,

Yanming Guo; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xueyi and Zhu, Peiyin and Zhang, Chengwei and Yan, Zhiyuan and Cheng, Jikang and Lao, Mingrui and Cai, Siqi and Guo, Yanming}, title = {Generalization-Preserved Learning: Closing the Backdoor to Catastrophic Forgetting in Continual Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3798-3808} }
JailbreakDiffBench: A Comprehensive Benchmark for Jailbreaking Diffusion Models: Xiaolong Jin,

Zixuan Weng,

Hanxi Guo,

Chenlong Yin,

Siyuan Cheng,

Guangyu Shen,

Xiangyu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Xiaolong and Weng, Zixuan and Guo, Hanxi and Yin, Chenlong and Cheng, Siyuan and Shen, Guangyu and Zhang, Xiangyu}, title = {JailbreakDiffBench: A Comprehensive Benchmark for Jailbreaking Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16461-16471} }
InfiniteYou: Flexible Photo Recrafting While Preserving Your Identity: Liming Jiang,

Qing Yan,

Yumin Jia,

Zichuan Liu,

Hao Kang,

Xin Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Liming and Yan, Qing and Jia, Yumin and Liu, Zichuan and Kang, Hao and Lu, Xin}, title = {InfiniteYou: Flexible Photo Recrafting While Preserving Your Identity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10898-10907} }
GeoMan: Temporally Consistent Human Geometry Estimation using Image-to-Video Diffusion: Gwanghyun Kim,

Xueting Li,

Ye Yuan,

Koki Nagano,

Tianye Li,

Jan Kautz,

Se Young Chun,

Umar Iqbal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Gwanghyun and Li, Xueting and Yuan, Ye and Nagano, Koki and Li, Tianye and Kautz, Jan and Chun, Se Young and Iqbal, Umar}, title = {GeoMan: Temporally Consistent Human Geometry Estimation using Image-to-Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7451-7461} }
NAVER: A Neuro-Symbolic Compositional Automaton for Visual Grounding with Explicit Logic Reasoning: Zhixi Cai,

Fucai Ke,

Simindokht Jahangard,

Maria Garcia de la Banda,

Reza Haffari,

Peter J. Stuckey,

Hamid Rezatofighi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2025_ICCV, author = {Cai, Zhixi and Ke, Fucai and Jahangard, Simindokht and de la Banda, Maria Garcia and Haffari, Reza and Stuckey, Peter J. and Rezatofighi, Hamid}, title = {NAVER: A Neuro-Symbolic Compositional Automaton for Visual Grounding with Explicit Logic Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24078-24089} }
Generative Adversarial Diffusion: U-Chae Jun,

Jaeeun Ko,

Jiwoo Kang; [pdf] [supp]
[bibtex]
@InProceedings{Jun_2025_ICCV, author = {Jun, U-Chae and Ko, Jaeeun and Kang, Jiwoo}, title = {Generative Adversarial Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16786-16796} }
GaussianVideo: Efficient Video Representation via Hierarchical Gaussian Splatting: Andrew Bond,

Jui-Hsien Wang,

Long Mai,

Erkut Erdem,

Aykut Erdem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bond_2025_ICCV, author = {Bond, Andrew and Wang, Jui-Hsien and Mai, Long and Erdem, Erkut and Erdem, Aykut}, title = {GaussianVideo: Efficient Video Representation via Hierarchical Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7187-7196} }
MergeOcc: Bridge the Domain Gap between Different LiDARs for Robust Occupancy Prediction: Zikun Xu,

Shaobing Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Zikun and Xu, Shaobing}, title = {MergeOcc: Bridge the Domain Gap between Different LiDARs for Robust Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26539-26548} }
AutoScape: Geometry-Consistent Long-Horizon Scene Generation: Jiacheng Chen,

Ziyu Jiang,

Mingfu Liang,

Bingbing Zhuang,

Jong-Chyi Su,

Sparsh Garg,

Ying Wu,

Manmohan Chandraker; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jiacheng and Jiang, Ziyu and Liang, Mingfu and Zhuang, Bingbing and Su, Jong-Chyi and Garg, Sparsh and Wu, Ying and Chandraker, Manmohan}, title = {AutoScape: Geometry-Consistent Long-Horizon Scene Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25700-25711} }
Few-Shot Image Quality Assessment via Adaptation of Vision-Language Models: Xudong Li,

Zihao Huang,

Yan Zhang,

Yunhang Shen,

Ke Li,

Xiawu Zheng,

Liujuan Cao,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xudong and Huang, Zihao and Zhang, Yan and Shen, Yunhang and Li, Ke and Zheng, Xiawu and Cao, Liujuan and Ji, Rongrong}, title = {Few-Shot Image Quality Assessment via Adaptation of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10442-10452} }
UIP2P: Unsupervised Instruction-based Image Editing via Edit Reversibility Constraint: Enis Simsar,

Alessio Tonioni,

Yongqin Xian,

Thomas Hofmann,

Federico Tombari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Simsar_2025_ICCV, author = {Simsar, Enis and Tonioni, Alessio and Xian, Yongqin and Hofmann, Thomas and Tombari, Federico}, title = {UIP2P: Unsupervised Instruction-based Image Editing via Edit Reversibility Constraint}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18895-18905} }
Beyond Spatial Frequency: Pixel-wise Temporal Frequency-based Deepfake Video Detection: Taehoon Kim,

Jongwook Choi,

Yonghyun Jeong,

Haeun Noh,

Jaejun Yoo,

Seungryul Baek,

Jongwon Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Taehoon and Choi, Jongwook and Jeong, Yonghyun and Noh, Haeun and Yoo, Jaejun and Baek, Seungryul and Choi, Jongwon}, title = {Beyond Spatial Frequency: Pixel-wise Temporal Frequency-based Deepfake Video Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11198-11207} }
Synchronizing Task Behavior: Aligning Multiple Tasks during Test-Time Training: Wooseong Jeong,

Jegyeong Cho,

Youngho Yoon,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_ICCV, author = {Jeong, Wooseong and Cho, Jegyeong and Yoon, Youngho and Yoon, Kuk-Jin}, title = {Synchronizing Task Behavior: Aligning Multiple Tasks during Test-Time Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24340-24350} }
FLOSS: Free Lunch in Open-vocabulary Semantic Segmentation: Yasser Benigmim,

Mohammad Fahes,

Tuan-Hung Vu,

Andrei Bursuc,

Raoul de Charette; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Benigmim_2025_ICCV, author = {Benigmim, Yasser and Fahes, Mohammad and Vu, Tuan-Hung and Bursuc, Andrei and de Charette, Raoul}, title = {FLOSS: Free Lunch in Open-vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21471-21481} }
p-MoD: Building Mixture-of-Depths MLLMs via Progressive Ratio Decay: Jun Zhang,

Desen Meng,

Zhengming Zhang,

Zhenpeng Huang,

Tao Wu,

Limin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jun and Meng, Desen and Zhang, Zhengming and Huang, Zhenpeng and Wu, Tao and Wang, Limin}, title = {p-MoD: Building Mixture-of-Depths MLLMs via Progressive Ratio Decay}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3705-3715} }
SAME: Learning Generic Language-Guided Visual Navigation with State-Adaptive Mixture of Experts: Gengze Zhou,

Yicong Hong,

Zun Wang,

Chongyang Zhao,

Mohit Bansal,

Qi Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Gengze and Hong, Yicong and Wang, Zun and Zhao, Chongyang and Bansal, Mohit and Wu, Qi}, title = {SAME: Learning Generic Language-Guided Visual Navigation with State-Adaptive Mixture of Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7794-7807} }
ToF-Splatting: Dense SLAM using Sparse Time-of-Flight Depth and Multi-Frame Integration: Andrea Conti,

Matteo Poggi,

Valerio Cambareri,

Martin R. Oswald,

Stefano Mattoccia; [pdf] [supp]
[bibtex]
@InProceedings{Conti_2025_ICCV, author = {Conti, Andrea and Poggi, Matteo and Cambareri, Valerio and Oswald, Martin R. and Mattoccia, Stefano}, title = {ToF-Splatting: Dense SLAM using Sparse Time-of-Flight Depth and Multi-Frame Integration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28344-28353} }
PASD: A Pixel-Adaptive Swarm Dynamics Approach for Unsupervised Low-Light Image Enhancement: Shuai Jin,

Yuhua Qian,

Feijiang Li,

Guoqing Liu,

Xinyan Liang; [pdf]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Shuai and Qian, Yuhua and Li, Feijiang and Liu, Guoqing and Liang, Xinyan}, title = {PASD: A Pixel-Adaptive Swarm Dynamics Approach for Unsupervised Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9070-9079} }
ConsNoTrainLoRA: Data-driven Weight Initialization of Low-rank Adapters using Constraints: Debasmit Das,

Hyoungwoo Park,

Munawar Hayat,

Seokeon Choi,

Sungrack Yun,

Fatih Porikli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Das_2025_ICCV, author = {Das, Debasmit and Park, Hyoungwoo and Hayat, Munawar and Choi, Seokeon and Yun, Sungrack and Porikli, Fatih}, title = {ConsNoTrainLoRA: Data-driven Weight Initialization of Low-rank Adapters using Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {498-507} }
Correspondence-Free Fast and Robust Spherical Point Pattern Registration: Anik Sarker,

Alan T. Asbeck; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sarker_2025_ICCV, author = {Sarker, Anik and Asbeck, Alan T.}, title = {Correspondence-Free Fast and Robust Spherical Point Pattern Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28156-28166} }
Adversarial Robustness of Discriminative Self-Supervised Learning in Vision: Ömer Veysel Çağatan,

Ömer Faruk Tal,

M. Emre Gursoy; [pdf] [supp]
[bibtex]
@InProceedings{Cagatan_2025_ICCV, author = {\c{C}a\u{g}atan, \"Omer Veysel and Tal, \"Omer Faruk and Gursoy, M. Emre}, title = {Adversarial Robustness of Discriminative Self-Supervised Learning in Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2313-2324} }
CT-ScanGaze: A Dataset and Baselines for 3D Volumetric Scanpath Modeling: Trong Thang Pham,

Akash Awasthi,

Saba Khan,

Esteban Duran Marti,

Tien-Phat Nguyen,

Khoa Vo,

Minh Tran,

Son Nguyen,

Cuong Tran,

Yuki Ikebe,

Anh Totti Nguyen,

Anh Nguyen,

Zhigang Deng,

Carol C. Wu,

Hien Nguyen,

Ngan Le; [pdf] [supp]
[bibtex]
@InProceedings{Pham_2025_ICCV, author = {Pham, Trong Thang and Awasthi, Akash and Khan, Saba and Marti, Esteban Duran and Nguyen, Tien-Phat and Vo, Khoa and Tran, Minh and Nguyen, Son and Tran, Cuong and Ikebe, Yuki and Nguyen, Anh Totti and Nguyen, Anh and Deng, Zhigang and Wu, Carol C. and Nguyen, Hien and Le, Ngan}, title = {CT-ScanGaze: A Dataset and Baselines for 3D Volumetric Scanpath Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21732-21743} }
MOBIUS: Big-to-Mobile Universal Instance Segmentation via Multi-modal Bottleneck Fusion and Calibrated Decoder Pruning: Mattia Segu,

Marta Tintore Gazulla,

Yongqin Xian,

Luc Van Gool,

Federico Tombari; [pdf] [supp]
[bibtex]
@InProceedings{Segu_2025_ICCV, author = {Segu, Mattia and Gazulla, Marta Tintore and Xian, Yongqin and Van Gool, Luc and Tombari, Federico}, title = {MOBIUS: Big-to-Mobile Universal Instance Segmentation via Multi-modal Bottleneck Fusion and Calibrated Decoder Pruning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20726-20736} }
VoxelKP: A Voxel-based Network Architecture for Human Keypoint Estimation in LiDAR Data: Jian Shi,

Peter Wonka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Jian and Wonka, Peter}, title = {VoxelKP: A Voxel-based Network Architecture for Human Keypoint Estimation in LiDAR Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28282-28291} }
TemCoCo: Temporally Consistent Multi-modal Video Fusion with Visual-Semantic Collaboration: Meiqi Gong,

Hao Zhang,

Xunpeng Yi,

Linfeng Tang,

Jiayi Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2025_ICCV, author = {Gong, Meiqi and Zhang, Hao and Yi, Xunpeng and Tang, Linfeng and Ma, Jiayi}, title = {TemCoCo: Temporally Consistent Multi-modal Video Fusion with Visual-Semantic Collaboration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14326-14335} }
Normal and Abnormal Pathology Knowledge-Augmented Vision-Language Model for Anomaly Detection in Pathology Images: Jinsol Song,

Jiamu Wang,

Anh Tien Nguyen,

Keunho Byeon,

Sangjeong Ahn,

Sung Hak Lee,

Jin Tae Kwak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_ICCV, author = {Song, Jinsol and Wang, Jiamu and Nguyen, Anh Tien and Byeon, Keunho and Ahn, Sangjeong and Lee, Sung Hak and Kwak, Jin Tae}, title = {Normal and Abnormal Pathology Knowledge-Augmented Vision-Language Model for Anomaly Detection in Pathology Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22066-22076} }
COME: Dual Structure-Semantic Learning with Collaborative MoE for Universal Lesion Detection Across Heterogeneous Ultrasound Datasets: Lingyu Chen,

Yawen Zeng,

Yue Wang,

Peng Wan,

Guochen Ning,

Hongen Liao,

Daoqiang Zhang,

Fang Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Lingyu and Zeng, Yawen and Wang, Yue and Wan, Peng and Ning, Guochen and Liao, Hongen and Zhang, Daoqiang and Chen, Fang}, title = {COME: Dual Structure-Semantic Learning with Collaborative MoE for Universal Lesion Detection Across Heterogeneous Ultrasound Datasets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21460-21470} }
Fine-Tuning Visual Autogressive Models for Subject-Driven Generation: Jiwoo Chung,

Sangeek Hyun,

Hyunjun Kim,

Eunseo Koh,

MinKyu Lee,

Jae-Pil Heo; [pdf] [supp]
[bibtex]
@InProceedings{Chung_2025_ICCV, author = {Chung, Jiwoo and Hyun, Sangeek and Kim, Hyunjun and Koh, Eunseo and Lee, MinKyu and Heo, Jae-Pil}, title = {Fine-Tuning Visual Autogressive Models for Subject-Driven Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19174-19184} }
Voyaging into Perpetual Dynamic Scenes from a Single View: Fengrui Tian,

Tianjiao Ding,

Jinqi Luo,

Hancheng Min,

Rene Vidal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_ICCV, author = {Tian, Fengrui and Ding, Tianjiao and Luo, Jinqi and Min, Hancheng and Vidal, Rene}, title = {Voyaging into Perpetual Dynamic Scenes from a Single View}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7698-7708} }
Accelerating Diffusion Transformer via Gradient-Optimized Cache: Junxiang Qiu,

Lin Liu,

Shuo Wang,

Jinda Lu,

Kezhou Chen,

Yanbin Hao; [pdf] [arXiv]
[bibtex]
@InProceedings{Qiu_2025_ICCV, author = {Qiu, Junxiang and Liu, Lin and Wang, Shuo and Lu, Jinda and Chen, Kezhou and Hao, Yanbin}, title = {Accelerating Diffusion Transformer via Gradient-Optimized Cache}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17608-17617} }
SceneSplat: Gaussian Splatting-based Scene Understanding with Vision-Language Pretraining: Yue Li,

Qi Ma,

Runyi Yang,

Huapeng Li,

Mengjiao Ma,

Bin Ren,

Nikola Popovic,

Nicu Sebe,

Ender Konukoglu,

Theo Gevers,

Luc Van Gool,

Martin R. Oswald,

Danda Pani Paudel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yue and Ma, Qi and Yang, Runyi and Li, Huapeng and Ma, Mengjiao and Ren, Bin and Popovic, Nikola and Sebe, Nicu and Konukoglu, Ender and Gevers, Theo and Van Gool, Luc and Oswald, Martin R. and Paudel, Danda Pani}, title = {SceneSplat: Gaussian Splatting-based Scene Understanding with Vision-Language Pretraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4961-4972} }
Beyond the Limits: Overcoming Negative Correlation of Activation-Based Training-Free NAS: Haidong Kang,

Lianbo Ma,

Pengjun Chen,

Guo Yu,

Xingwei Wang,

Min Huang; [pdf] [supp]
[bibtex]
@InProceedings{Kang_2025_ICCV, author = {Kang, Haidong and Ma, Lianbo and Chen, Pengjun and Yu, Guo and Wang, Xingwei and Huang, Min}, title = {Beyond the Limits: Overcoming Negative Correlation of Activation-Based Training-Free NAS}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {796-805} }
Unknown Text Learning for CLIP-based Few-Shot Open-set Recognition: Rui Ma,

Qilong Wang,

Bing Cao,

Qinghua Hu,

Yahong Han; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Rui and Wang, Qilong and Cao, Bing and Hu, Qinghua and Han, Yahong}, title = {Unknown Text Learning for CLIP-based Few-Shot Open-set Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {657-667} }
VoteSplat: Hough Voting Gaussian Splatting for 3D Scene Understanding: Minchao Jiang,

Shunyu Jia,

Jiaming Gu,

Xiaoyuan Lu,

Guangming Zhu,

Anqi Dong,

Liang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Minchao and Jia, Shunyu and Gu, Jiaming and Lu, Xiaoyuan and Zhu, Guangming and Dong, Anqi and Zhang, Liang}, title = {VoteSplat: Hough Voting Gaussian Splatting for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6456-6465} }
C4D: 4D Made from 3D through Dual Correspondences: Shizun Wang,

Zhenxiang Jiang,

Xingyi Yang,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Shizun and Jiang, Zhenxiang and Yang, Xingyi and Wang, Xinchao}, title = {C4D: 4D Made from 3D through Dual Correspondences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7570-7580} }
An Efficient Hybrid Vision Transformer for TinyML Applications: Fanhong Zeng,

Huanan Li,

Juntao Guan,

Rui Fan,

Tong Wu,

Xilong Wang,

Rui Lai; [pdf]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Fanhong and Li, Huanan and Guan, Juntao and Fan, Rui and Wu, Tong and Wang, Xilong and Lai, Rui}, title = {An Efficient Hybrid Vision Transformer for TinyML Applications}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19914-19924} }
Object-centric Video Question Answering with Visual Grounding and Referring: Haochen Wang,

Qirui Chen,

Cilin Yan,

Jiayin Cai,

Xiaolong Jiang,

Yao Hu,

Weidi Xie,

Stratis Gavves; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haochen and Chen, Qirui and Yan, Cilin and Cai, Jiayin and Jiang, Xiaolong and Hu, Yao and Xie, Weidi and Gavves, Stratis}, title = {Object-centric Video Question Answering with Visual Grounding and Referring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22274-22284} }
Face Retouching with Diffusion Data Generation and Spectral Restorement: Zhidan Xu,

Xiaoqin Zhang,

Shijian Lu; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Zhidan and Zhang, Xiaoqin and Lu, Shijian}, title = {Face Retouching with Diffusion Data Generation and Spectral Restorement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14722-14731} }
AG2aussian: Anchor-Graph Structured Gaussian Splatting for Instance-Level 3D Scene Understanding and Editing: Zhaonan Wang,

Manyi Li,

Changhe Tu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zhaonan and Li, Manyi and Tu, Changhe}, title = {AG2aussian: Anchor-Graph Structured Gaussian Splatting for Instance-Level 3D Scene Understanding and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26806-26816} }
Beyond Blur: A Fluid Perspective on Generative Diffusion Models: Grzegorz Gruszczynski,

Jakub Meixner,

Michal Wlodarczyk,

Przemyslaw Musialski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gruszczynski_2025_ICCV, author = {Gruszczynski, Grzegorz and Meixner, Jakub and Wlodarczyk, Michal and Musialski, Przemyslaw}, title = {Beyond Blur: A Fluid Perspective on Generative Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17818-17827} }
CanonSwap: High-Fidelity and Consistent Video Face Swapping via Canonical Space Modulation: Xiangyang Luo,

Ye Zhu,

Yunfei Liu,

Lijian Lin,

Cong Wan,

Zijian Cai,

Yu Li,

Shao-Lun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Xiangyang and Zhu, Ye and Liu, Yunfei and Lin, Lijian and Wan, Cong and Cai, Zijian and Li, Yu and Huang, Shao-Lun}, title = {CanonSwap: High-Fidelity and Consistent Video Face Swapping via Canonical Space Modulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10064-10074} }
Soft Local Completeness: Rethinking Completeness in XAI: Ziv Weiss Haddad,

Oren Barkan,

Yehonatan Elisha,

Noam Koenigstein; [pdf] [supp]
[bibtex]
@InProceedings{Haddad_2025_ICCV, author = {Haddad, Ziv Weiss and Barkan, Oren and Elisha, Yehonatan and Koenigstein, Noam}, title = {Soft Local Completeness: Rethinking Completeness in XAI}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19794-19804} }
Perceiving and Acting in First-Person: A Dataset and Benchmark for Egocentric Human-Object-Human Interactions: Liang Xu,

Chengqun Yang,

Zili Lin,

Fei Xu,

Yifan Liu,

Congsheng Xu,

Yiyi Zhang,

Jie Qin,

Xingdong Sheng,

Yunhui Liu,

Xin Jin,

Yichao Yan,

Wenjun Zeng,

Xiaokang Yang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Liang and Yang, Chengqun and Lin, Zili and Xu, Fei and Liu, Yifan and Xu, Congsheng and Zhang, Yiyi and Qin, Jie and Sheng, Xingdong and Liu, Yunhui and Jin, Xin and Yan, Yichao and Zeng, Wenjun and Yang, Xiaokang}, title = {Perceiving and Acting in First-Person: A Dataset and Benchmark for Egocentric Human-Object-Human Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12535-12548} }
Open-ended Hierarchical Streaming Video Understanding with Vision Language Models: Hyolim Kang,

Yunsu Park,

Youngbeom Yoo,

Yeeun Choi,

Seon Joo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_ICCV, author = {Kang, Hyolim and Park, Yunsu and Yoo, Youngbeom and Choi, Yeeun and Kim, Seon Joo}, title = {Open-ended Hierarchical Streaming Video Understanding with Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20715-20725} }
SUB: Benchmarking CBM Generalization via Synthetic Attribute Substitutions: Jessica Bader,

Leander Girrbach,

Stephan Alaniz,

Zeynep Akata; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bader_2025_ICCV, author = {Bader, Jessica and Girrbach, Leander and Alaniz, Stephan and Akata, Zeynep}, title = {SUB: Benchmarking CBM Generalization via Synthetic Attribute Substitutions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23188-23198} }
G-DexGrasp: Generalizable Dexterous Grasping Synthesis Via Part-Aware Prior Retrieval and Prior-Assisted Generation: Juntao Jian,

Xiuping Liu,

Zixuan Chen,

Manyi Li,

Jian Liu,

Ruizhen Hu; [pdf] [supp]
[bibtex]
@InProceedings{Jian_2025_ICCV, author = {Jian, Juntao and Liu, Xiuping and Chen, Zixuan and Li, Manyi and Liu, Jian and Hu, Ruizhen}, title = {G-DexGrasp: Generalizable Dexterous Grasping Synthesis Via Part-Aware Prior Retrieval and Prior-Assisted Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11447-11457} }
Autoregressive Denoising Score Matching is a Good Video Anomaly Detector: Hanwen Zhang,

Congqi Cao,

Qinyi Lv,

Lingtong Min,

Yanning Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hanwen and Cao, Congqi and Lv, Qinyi and Min, Lingtong and Zhang, Yanning}, title = {Autoregressive Denoising Score Matching is a Good Video Anomaly Detector}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12057-12067} }
LUDVIG: Learning-Free Uplifting of 2D Visual Features to Gaussian Splatting Scenes: Juliette Marrie,

Romain Menegaux,

Michael Arbel,

Diane Larlus,

Julien Mairal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Marrie_2025_ICCV, author = {Marrie, Juliette and Menegaux, Romain and Arbel, Michael and Larlus, Diane and Mairal, Julien}, title = {LUDVIG: Learning-Free Uplifting of 2D Visual Features to Gaussian Splatting Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7440-7450} }
PARTE: Part-Guided Texturing for 3D Human Reconstruction from a Single Image: Hyeongjin Nam,

Donghwan Kim,

Gyeongsik Moon,

Kyoung Mu Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2025_ICCV, author = {Nam, Hyeongjin and Kim, Donghwan and Moon, Gyeongsik and Lee, Kyoung Mu}, title = {PARTE: Part-Guided Texturing for 3D Human Reconstruction from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8547-8557} }
LLaVA-SP: Enhancing Visual Representation with Visual Spatial Tokens for MLLMs: Haoran Lou,

Chunxiao Fan,

Ziyan Liu,

Yuexin Wu,

Xinliang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Lou_2025_ICCV, author = {Lou, Haoran and Fan, Chunxiao and Liu, Ziyan and Wu, Yuexin and Wang, Xinliang}, title = {LLaVA-SP: Enhancing Visual Representation with Visual Spatial Tokens for MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22014-22024} }
3D-MOOD: Lifting 2D to 3D for Monocular Open-Set Object Detection: Yung-Hsu Yang,

Luigi Piccinelli,

Mattia Segu,

Siyuan Li,

Rui Huang,

Yuqian Fu,

Marc Pollefeys,

Hermann Blum,

Zuria Bauer; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Yung-Hsu and Piccinelli, Luigi and Segu, Mattia and Li, Siyuan and Huang, Rui and Fu, Yuqian and Pollefeys, Marc and Blum, Hermann and Bauer, Zuria}, title = {3D-MOOD: Lifting 2D to 3D for Monocular Open-Set Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7429-7439} }
LLM-enhanced Action-aware Multi-modal Prompt Tuning for Image-Text Matching: Mengxiao Tian,

Xinxiao Wu,

Shuo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_ICCV, author = {Tian, Mengxiao and Wu, Xinxiao and Yang, Shuo}, title = {LLM-enhanced Action-aware Multi-modal Prompt Tuning for Image-Text Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20748-20757} }
PointGAC: Geometric-Aware Codebook for Masked Point Modeling: Abiao Li,

Chenlei Lv,

Yuming Fang,

Yifan Zuo,

Jian Zhang,

Guofeng Mei; [pdf]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Abiao and Lv, Chenlei and Fang, Yuming and Zuo, Yifan and Zhang, Jian and Mei, Guofeng}, title = {PointGAC: Geometric-Aware Codebook for Masked Point Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24989-24998} }
Unbiased Region-Language Alignment for Open-Vocabulary Dense Prediction: Yunheng Li,

Yuxuan Li,

Quan-Sheng Zeng,

Wenhai Wang,

Qibin Hou,

Ming-Ming Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yunheng and Li, Yuxuan and Zeng, Quan-Sheng and Wang, Wenhai and Hou, Qibin and Cheng, Ming-Ming}, title = {Unbiased Region-Language Alignment for Open-Vocabulary Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23795-23805} }
CLIPer: Hierarchically Improving Spatial Representation of CLIP for Open-Vocabulary Semantic Segmentation: Lin Sun,

Jiale Cao,

Jin Xie,

Xiaoheng Jiang,

Yanwei Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Lin and Cao, Jiale and Xie, Jin and Jiang, Xiaoheng and Pang, Yanwei}, title = {CLIPer: Hierarchically Improving Spatial Representation of CLIP for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23199-23209} }
Adversarial Reconstruction Feedback for Robust Fine-grained Generalization: Shijie Wang,

Jian Shi,

Haojie Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Shijie and Shi, Jian and Li, Haojie}, title = {Adversarial Reconstruction Feedback for Robust Fine-grained Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3080-3090} }
ConceptSplit: Decoupled Multi-Concept Personalization of Diffusion Models via Token-wise Adaptation and Attention Disentanglement: Habin Lim,

Yeongseob Won,

Juwon Seo,

Gyeong-Moon Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lim_2025_ICCV, author = {Lim, Habin and Won, Yeongseob and Seo, Juwon and Park, Gyeong-Moon}, title = {ConceptSplit: Decoupled Multi-Concept Personalization of Diffusion Models via Token-wise Adaptation and Attention Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18421-18430} }
Consistency Trajectory Matching for One-Step Generative Super-Resolution: Weiyi You,

Mingyang Zhang,

Leheng Zhang,

Xingyu Zhou,

Kexuan Shi,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2025_ICCV, author = {You, Weiyi and Zhang, Mingyang and Zhang, Leheng and Zhou, Xingyu and Shi, Kexuan and Gu, Shuhang}, title = {Consistency Trajectory Matching for One-Step Generative Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12747-12756} }
SGAD: Semantic and Geometric-aware Descriptor for Local Feature Matching: Xiangzeng Liu,

Chi Wang,

Guanglu Shi,

Xiaodong Zhang,

Qiguang Miao,

Miao Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Xiangzeng and Wang, Chi and Shi, Guanglu and Zhang, Xiaodong and Miao, Qiguang and Fan, Miao}, title = {SGAD: Semantic and Geometric-aware Descriptor for Local Feature Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27095-27104} }
SpectralAR: Spectral Autoregressive Visual Generation: Yuanhui Huang,

Weiliang Chen,

Wenzhao Zheng,

Yueqi Duan,

Jie Zhou,

Jiwen Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Yuanhui and Chen, Weiliang and Zheng, Wenzhao and Duan, Yueqi and Zhou, Jie and Lu, Jiwen}, title = {SpectralAR: Spectral Autoregressive Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15842-15852} }
RogSplat: Robust Gaussian Splatting via Generative Priors: Hanyang Kong,

Xingyi Yang,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Kong_2025_ICCV, author = {Kong, Hanyang and Yang, Xingyi and Wang, Xinchao}, title = {RogSplat: Robust Gaussian Splatting via Generative Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25735-25745} }
When Schrodinger Bridge Meets Real-World Image Dehazing with Unpaired Training: Yunwei Lan,

Zhigao Cui,

Xin Luo,

Chang Liu,

Nian Wang,

Menglin Zhang,

Yanzhao Su,

Dong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lan_2025_ICCV, author = {Lan, Yunwei and Cui, Zhigao and Luo, Xin and Liu, Chang and Wang, Nian and Zhang, Menglin and Su, Yanzhao and Liu, Dong}, title = {When Schrodinger Bridge Meets Real-World Image Dehazing with Unpaired Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8756-8765} }
CARIM: Caption-Based Autonomous Driving Scene Retrieval via Inclusive Text Matching: Minjoo Ki,

Daejung Kim,

Kisung Kim,

Seon Joo Kim,

Jinhan Lee; [pdf] [supp]
[bibtex]
@InProceedings{Ki_2025_ICCV, author = {Ki, Minjoo and Kim, Daejung and Kim, Kisung and Kim, Seon Joo and Lee, Jinhan}, title = {CARIM: Caption-Based Autonomous Driving Scene Retrieval via Inclusive Text Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22036-22045} }
Punching Bag vs. Punching Person: Motion Transferability in Videos: Raiyaan Abdullah,

Jared Claypoole,

Michael Cogswell,

Ajay Divakaran,

Yogesh Rawat; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Abdullah_2025_ICCV, author = {Abdullah, Raiyaan and Claypoole, Jared and Cogswell, Michael and Divakaran, Ajay and Rawat, Yogesh}, title = {Punching Bag vs. Punching Person: Motion Transferability in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11348-11358} }
CODE-CL: Conceptor-Based Gradient Projection for Deep Continual Learning: Marco P. E. Apolinario,

Sakshi Choudhary,

Kaushik Roy; [pdf] [supp]
[bibtex]
@InProceedings{Apolinario_2025_ICCV, author = {Apolinario, Marco P. E. and Choudhary, Sakshi and Roy, Kaushik}, title = {CODE-CL: Conceptor-Based Gradient Projection for Deep Continual Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {775-784} }
Boosting Generative Adversarial Transferability with Self-supervised Vision Transformer Features: Shangbo Wu,

Yu-an Tan,

Ruinan Ma,

Wencong Ma,

Dehua Zhu,

Yuanzhang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Shangbo and Tan, Yu-an and Ma, Ruinan and Ma, Wencong and Zhu, Dehua and Li, Yuanzhang}, title = {Boosting Generative Adversarial Transferability with Self-supervised Vision Transformer Features}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {530-540} }
Zero-Shot Compositional Video Learning with Coding Rate Reduction: Heeseok Jung,

Jun-Hyeon Bak,

Yujin Jeong,

Gyugeun Lee,

Jinwoo Ahn,

Eun-Sol Kim; [pdf] [supp]
[bibtex]
@InProceedings{Jung_2025_ICCV, author = {Jung, Heeseok and Bak, Jun-Hyeon and Jeong, Yujin and Lee, Gyugeun and Ahn, Jinwoo and Kim, Eun-Sol}, title = {Zero-Shot Compositional Video Learning with Coding Rate Reduction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20508-20518} }
AVTrustBench: Assessing and Enhancing Reliability and Robustness in Audio-Visual LLMs: Sanjoy Chowdhury,

Sayan Nag,

Subhrajyoti Dasgupta,

Yaoting Wang,

Mohamed Elhoseiny,

Ruohan Gao,

Dinesh Manocha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chowdhury_2025_ICCV, author = {Chowdhury, Sanjoy and Nag, Sayan and Dasgupta, Subhrajyoti and Wang, Yaoting and Elhoseiny, Mohamed and Gao, Ruohan and Manocha, Dinesh}, title = {AVTrustBench: Assessing and Enhancing Reliability and Robustness in Audio-Visual LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1590-1601} }
LOTA: Bit-Planes Guided AI-Generated Image Detection: Hongsong Wang,

Renxi Cheng,

Yang Zhang,

Chaolei Han,

Jie Gui; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Hongsong and Cheng, Renxi and Zhang, Yang and Han, Chaolei and Gui, Jie}, title = {LOTA: Bit-Planes Guided AI-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17246-17255} }
Benchmarking Egocentric Visual-Inertial SLAM at City Scale: Anusha Krishnan,

Shaohui Liu,

Paul-Edouard Sarlin,

Oscar Gentilhomme,

David Caruso,

Maurizio Monge,

Richard Newcombe,

Jakob Engel,

Marc Pollefeys; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Krishnan_2025_ICCV, author = {Krishnan, Anusha and Liu, Shaohui and Sarlin, Paul-Edouard and Gentilhomme, Oscar and Caruso, David and Monge, Maurizio and Newcombe, Richard and Engel, Jakob and Pollefeys, Marc}, title = {Benchmarking Egocentric Visual-Inertial SLAM at City Scale}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25207-25217} }
CATSplat: Context-Aware Transformer with Spatial Guidance for Generalizable 3D Gaussian Splatting from A Single-View Image: Wonseok Roh,

Hwanhee Jung,

Jong Wook Kim,

Seunggwan Lee,

Innfarn Yoo,

Andreas Lugmayr,

Seunggeun Chi,

Karthik Ramani,

Sangpil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Roh_2025_ICCV, author = {Roh, Wonseok and Jung, Hwanhee and Kim, Jong Wook and Lee, Seunggwan and Yoo, Innfarn and Lugmayr, Andreas and Chi, Seunggeun and Ramani, Karthik and Kim, Sangpil}, title = {CATSplat: Context-Aware Transformer with Spatial Guidance for Generalizable 3D Gaussian Splatting from A Single-View Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28228-28238} }
Supercharged One-step Text-to-Image Diffusion Models with Negative Prompts: Viet Nguyen,

Anh Nguyen,

Trung Dao,

Khoi Nguyen,

Cuong Pham,

Toan Tran,

Anh Tran; [pdf] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Viet and Nguyen, Anh and Dao, Trung and Nguyen, Khoi and Pham, Cuong and Tran, Toan and Tran, Anh}, title = {Supercharged One-step Text-to-Image Diffusion Models with Negative Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18004-18013} }
Learning Normals of Noisy Points by Local Gradient-Aware Surface Filtering: Qing Li,

Huifang Feng,

Xun Gong,

Yu-Shen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Qing and Feng, Huifang and Gong, Xun and Liu, Yu-Shen}, title = {Learning Normals of Noisy Points by Local Gradient-Aware Surface Filtering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28828-28838} }
CoTracker3: Simpler and Better Point Tracking by Pseudo-Labelling Real Videos: Nikita Karaev,

Yuri Makarov,

Jianyuan Wang,

Natalia Neverova,

Andrea Vedaldi,

Christian Rupprecht; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karaev_2025_ICCV, author = {Karaev, Nikita and Makarov, Yuri and Wang, Jianyuan and Neverova, Natalia and Vedaldi, Andrea and Rupprecht, Christian}, title = {CoTracker3: Simpler and Better Point Tracking by Pseudo-Labelling Real Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6013-6022} }
IMG: Calibrating Diffusion Models via Implicit Multimodal Guidance: Jiayi Guo,

Chuanhao Yan,

Xingqian Xu,

Yulin Wang,

Kai Wang,

Gao Huang,

Humphrey Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Jiayi and Yan, Chuanhao and Xu, Xingqian and Wang, Yulin and Wang, Kai and Huang, Gao and Shi, Humphrey}, title = {IMG: Calibrating Diffusion Models via Implicit Multimodal Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16079-16089} }
VRBench: A Benchmark for Multi-Step Reasoning in Long Narrative Videos: Jiashuo Yu,

Yue Wu,

Meng Chu,

Zhifei Ren,

Zizheng Huang,

Pei Chu,

Ruijie Zhang,

Yinan He,

Qirui Li,

Songze Li,

Zhenxiang Li,

Zhongying Tu,

Conghui He,

Yu Qiao,

Yali Wang,

Yi Wang,

Limin Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Jiashuo and Wu, Yue and Chu, Meng and Ren, Zhifei and Huang, Zizheng and Chu, Pei and Zhang, Ruijie and He, Yinan and Li, Qirui and Li, Songze and Li, Zhenxiang and Tu, Zhongying and He, Conghui and Qiao, Yu and Wang, Yali and Wang, Yi and Wang, Limin}, title = {VRBench: A Benchmark for Multi-Step Reasoning in Long Narrative Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21655-21666} }
PUMPS: Skeleton-Agnostic Point-based Universal Motion Pre-Training for Synthesis in Human Motion Tasks: Clinton Ansun Mo,

Kun Hu,

Chengjiang Long,

Dong Yuan,

Wan-Chi Siu,

Zhiyong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mo_2025_ICCV, author = {Mo, Clinton Ansun and Hu, Kun and Long, Chengjiang and Yuan, Dong and Siu, Wan-Chi and Wang, Zhiyong}, title = {PUMPS: Skeleton-Agnostic Point-based Universal Motion Pre-Training for Synthesis in Human Motion Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14496-14506} }
When and Where do Data Poisons Attack Textual Inversion?: Jeremy Styborski,

Mingzhi Lyu,

Jiayou Lu,

Nupur Kapur,

Adams Wai-Kin Kong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Styborski_2025_ICCV, author = {Styborski, Jeremy and Lyu, Mingzhi and Lu, Jiayou and Kapur, Nupur and Kong, Adams Wai-Kin}, title = {When and Where do Data Poisons Attack Textual Inversion?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19439-19449} }
RAGD: Regional-Aware Diffusion Model for Text-to-Image Generation: Zhennan Chen,

Yajie Li,

Haofan Wang,

Zhibo Chen,

Zhengkai Jiang,

Jun Li,

Qian Wang,

Jian Yang,

Ying Tai; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Zhennan and Li, Yajie and Wang, Haofan and Chen, Zhibo and Jiang, Zhengkai and Li, Jun and Wang, Qian and Yang, Jian and Tai, Ying}, title = {RAGD: Regional-Aware Diffusion Model for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19331-19341} }
OpenM3D: Open Vocabulary Multi-view Indoor 3D Object Detection without Human Annotations: Peng-Hao Hsu,

Ke Zhang,

Fu-En Wang,

Tao Tu,

Ming-Feng Li,

Yu-Lun Liu,

Albert Y. C. Chen,

Min Sun,

Cheng-Hao Kuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hsu_2025_ICCV, author = {Hsu, Peng-Hao and Zhang, Ke and Wang, Fu-En and Tu, Tao and Li, Ming-Feng and Liu, Yu-Lun and Chen, Albert Y. C. and Sun, Min and Kuo, Cheng-Hao}, title = {OpenM3D: Open Vocabulary Multi-view Indoor 3D Object Detection without Human Annotations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8688-8698} }
From Prompt to Progression: Taming Video Diffusion Models for Seamless Attribute Transition: Ling Lo,

Kelvin C.K. Chan,

Wen-Huang Cheng,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lo_2025_ICCV, author = {Lo, Ling and Chan, Kelvin C.K. and Cheng, Wen-Huang and Yang, Ming-Hsuan}, title = {From Prompt to Progression: Taming Video Diffusion Models for Seamless Attribute Transition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18651-18660} }
Benchmarking Multimodal CoT Reward Model Stepwise by Visual Program: Minghe Gao,

Xuqi Liu,

Zhongqi Yue,

Yang Wu,

Shuang Chen,

Juncheng Li,

Siliang Tang,

Fei Wu,

Tat-Seng Chua,

Yueting Zhuang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Minghe and Liu, Xuqi and Yue, Zhongqi and Wu, Yang and Chen, Shuang and Li, Juncheng and Tang, Siliang and Wu, Fei and Chua, Tat-Seng and Zhuang, Yueting}, title = {Benchmarking Multimodal CoT Reward Model Stepwise by Visual Program}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1718-1728} }
Web Artifact Attacks Disrupt Vision Language Models: Maan Qraitem,

Piotr Teterwak,

Kate Saenko,

Bryan A. Plummer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qraitem_2025_ICCV, author = {Qraitem, Maan and Teterwak, Piotr and Saenko, Kate and Plummer, Bryan A.}, title = {Web Artifact Attacks Disrupt Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1048-1057} }
GlassWizard: Harvesting Diffusion Priors for Glass Surface Detection: Wenxue Li,

Tian Ye,

Xinyu Xiong,

Jinbin Bai,

Feilong Tang,

Wenxuan Song,

Zhaohu Xing,

Lie Ju,

Guanbin Li,

Lei Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Wenxue and Ye, Tian and Xiong, Xinyu and Bai, Jinbin and Tang, Feilong and Song, Wenxuan and Xing, Zhaohu and Ju, Lie and Li, Guanbin and Zhu, Lei}, title = {GlassWizard: Harvesting Diffusion Priors for Glass Surface Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17848-17858} }
Quanta Neural Networks: From Photons to Perception: Varun Sundar,

Tianyi Zhang,

Sacha Jungerman,

Mohit Gupta; [pdf] [supp]
[bibtex]
@InProceedings{Sundar_2025_ICCV, author = {Sundar, Varun and Zhang, Tianyi and Jungerman, Sacha and Gupta, Mohit}, title = {Quanta Neural Networks: From Photons to Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5091-5101} }
Cross-Subject Mind Decoding from Inaccurate Representations: Yangyang Xu,

Bangzhen Liu,

Wenqi Shao,

Yong Du,

Shengfeng He,

Tingting Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Yangyang and Liu, Bangzhen and Shao, Wenqi and Du, Yong and He, Shengfeng and Zhu, Tingting}, title = {Cross-Subject Mind Decoding from Inaccurate Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15066-15075} }
How Can Objects Help Video-Language Understanding?: Zitian Tang,

Shijie Wang,

Junho Cho,

Jaewook Yoo,

Chen Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_ICCV, author = {Tang, Zitian and Wang, Shijie and Cho, Junho and Yoo, Jaewook and Sun, Chen}, title = {How Can Objects Help Video-Language Understanding?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21994-22003} }
ImageGen-CoT: Enhancing Text-to-Image In-context Learning with Chain-of-Thought Reasoning: Jiaqi Liao,

Zhengyuan Yang,

Linjie Li,

Dianqi Li,

Kevin Lin,

Yu Cheng,

Lijuan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Jiaqi and Yang, Zhengyuan and Li, Linjie and Li, Dianqi and Lin, Kevin and Cheng, Yu and Wang, Lijuan}, title = {ImageGen-CoT: Enhancing Text-to-Image In-context Learning with Chain-of-Thought Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17214-17223} }
Auto-Regressive Transformation for Image Alignment: Kanggeon Lee,

Soochahn Lee,

Kyoung Mu Lee; [pdf] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Kanggeon and Lee, Soochahn and Lee, Kyoung Mu}, title = {Auto-Regressive Transformation for Image Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13569-13579} }
Beyond RGB: Adaptive Parallel Processing for RAW Object Detection: Shani Gamrian,

Hila Barel,

Feiran Li,

Masakazu Yoshimura,

Daisuke Iso; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gamrian_2025_ICCV, author = {Gamrian, Shani and Barel, Hila and Li, Feiran and Yoshimura, Masakazu and Iso, Daisuke}, title = {Beyond RGB: Adaptive Parallel Processing for RAW Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5547-5557} }
Stable Virtual Camera: Generative View Synthesis with Diffusion Models: Jensen Zhou,

Hang Gao,

Vikram Voleti,

Aaryaman Vasishta,

Chun-Han Yao,

Mark Boss,

Philip Torr,

Christian Rupprecht,

Varun Jampani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Jensen and Gao, Hang and Voleti, Vikram and Vasishta, Aaryaman and Yao, Chun-Han and Boss, Mark and Torr, Philip and Rupprecht, Christian and Jampani, Varun}, title = {Stable Virtual Camera: Generative View Synthesis with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12405-12414} }
Conditional Latent Diffusion Models for Zero-Shot Instance Segmentation: Maximilian Ulmer,

Wout Boerdijk,

Rudolph Triebel,

Maximilian Durner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ulmer_2025_ICCV, author = {Ulmer, Maximilian and Boerdijk, Wout and Triebel, Rudolph and Durner, Maximilian}, title = {Conditional Latent Diffusion Models for Zero-Shot Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24360-24369} }
Learning Counterfactually Decoupled Attention for Open-World Model Attribution: Yu Zheng,

Boyang Gong,

Fanye Kong,

Yueqi Duan,

Bingyao Yu,

Wenzhao Zheng,

Lei Chen,

Jiwen Lu,

Jie Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Yu and Gong, Boyang and Kong, Fanye and Duan, Yueqi and Yu, Bingyao and Zheng, Wenzhao and Chen, Lei and Lu, Jiwen and Zhou, Jie}, title = {Learning Counterfactually Decoupled Attention for Open-World Model Attribution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {122-132} }
DepR: Depth Guided Single-view Scene Reconstruction with Instance-level Diffusion: Qingcheng Zhao,

Xiang Zhang,

Haiyang Xu,

Zeyuan Chen,

Jianwen Xie,

Yuan Gao,

Zhuowen Tu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Qingcheng and Zhang, Xiang and Xu, Haiyang and Chen, Zeyuan and Xie, Jianwen and Gao, Yuan and Tu, Zhuowen}, title = {DepR: Depth Guided Single-view Scene Reconstruction with Instance-level Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5722-5733} }
Edicho: Consistent Image Editing in the Wild: Qingyan Bai,

Hao Ouyang,

Yinghao Xu,

Qiuyu Wang,

Ceyuan Yang,

Ka Leong Cheng,

Yujun Shen,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2025_ICCV, author = {Bai, Qingyan and Ouyang, Hao and Xu, Yinghao and Wang, Qiuyu and Yang, Ceyuan and Cheng, Ka Leong and Shen, Yujun and Chen, Qifeng}, title = {Edicho: Consistent Image Editing in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15277-15287} }
Stable Diffusion Models are Secretly Good at Visual In-Context Learning: Trevine Oorloff,

Vishwanath Sindagi,

Wele Gedara Chaminda Bandara,

Ali Shafahi,

Amin Ghiasi,

Charan Prakash,

Reza Ardekani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Oorloff_2025_ICCV, author = {Oorloff, Trevine and Sindagi, Vishwanath and Bandara, Wele Gedara Chaminda and Shafahi, Ali and Ghiasi, Amin and Prakash, Charan and Ardekani, Reza}, title = {Stable Diffusion Models are Secretly Good at Visual In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23604-23613} }
T2Bs: Text-to-Character Blendshapes via Video Generation: Jiahao Luo,

Chaoyang Wang,

Michael Vasilkovsky,

Vladislav Shakhrai,

Di Liu,

Peiye Zhuang,

Sergey Tulyakov,

Peter Wonka,

Hsin-Ying Lee,

James Davis,

Jian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Jiahao and Wang, Chaoyang and Vasilkovsky, Michael and Shakhrai, Vladislav and Liu, Di and Zhuang, Peiye and Tulyakov, Sergey and Wonka, Peter and Lee, Hsin-Ying and Davis, James and Wang, Jian}, title = {T2Bs: Text-to-Character Blendshapes via Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13625-13637} }
QuEST: Low-bit Diffusion Model Quantization via Efficient Selective Finetuning: Haoxuan Wang,

Yuzhang Shang,

Zhihang Yuan,

Junyi Wu,

Junchi Yan,

Yan Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haoxuan and Shang, Yuzhang and Yuan, Zhihang and Wu, Junyi and Yan, Junchi and Yan, Yan}, title = {QuEST: Low-bit Diffusion Model Quantization via Efficient Selective Finetuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15542-15551} }
Task-Specific Zero-shot Quantization-Aware Training for Object Detection: Changhao Li,

Xinrui Chen,

Ji Wang,

Kang Zhao,

Jianfei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Changhao and Chen, Xinrui and Wang, Ji and Zhao, Kang and Chen, Jianfei}, title = {Task-Specific Zero-shot Quantization-Aware Training for Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22868-22878} }
Shot-by-Shot: Film-Grammar-Aware Training-Free Audio Description Generation: Junyu Xie,

Tengda Han,

Max Bain,

Arsha Nagrani,

Eshika Khandelwal,

Gül Varol,

Weidi Xie,

Andrew Zisserman; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Junyu and Han, Tengda and Bain, Max and Nagrani, Arsha and Khandelwal, Eshika and Varol, G\"ul and Xie, Weidi and Zisserman, Andrew}, title = {Shot-by-Shot: Film-Grammar-Aware Training-Free Audio Description Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16503-16513} }
Snakes and Ladders: Two Steps Up for VideoMamba: Hui Lu,

Albert A. Salah,

Ronald Poppe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Hui and Salah, Albert A. and Poppe, Ronald}, title = {Snakes and Ladders: Two Steps Up for VideoMamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24234-24244} }
Generalized Few-Shot Point Cloud Segmentation via LLM-Assisted Hyper-Relation Matching: Zhaoyang Li,

Yuan Wang,

Guoxin Xiong,

Wangkai Li,

Yuwen Pan,

Tianzhu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhaoyang and Wang, Yuan and Xiong, Guoxin and Li, Wangkai and Pan, Yuwen and Zhang, Tianzhu}, title = {Generalized Few-Shot Point Cloud Segmentation via LLM-Assisted Hyper-Relation Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23063-23073} }
Extending Foundational Monocular Depth Estimators to Fisheye Cameras with Calibration Tokens: Suchisrit Gangopadhyay,

Jung-Hee Kim,

Xien Chen,

Patrick Rim,

Hyoungseob Park,

Alex Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gangopadhyay_2025_ICCV, author = {Gangopadhyay, Suchisrit and Kim, Jung-Hee and Chen, Xien and Rim, Patrick and Park, Hyoungseob and Wong, Alex}, title = {Extending Foundational Monocular Depth Estimators to Fisheye Cameras with Calibration Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5198-5209} }
IGD: Instructional Graphic Design with Multimodal Layer Generation: Yadong Qu,

Shancheng Fang,

Yuxin Wang,

Xiaorui Wang,

Zhineng Chen,

Hongtao Xie,

Yongdong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_ICCV, author = {Qu, Yadong and Fang, Shancheng and Wang, Yuxin and Wang, Xiaorui and Chen, Zhineng and Xie, Hongtao and Zhang, Yongdong}, title = {IGD: Instructional Graphic Design with Multimodal Layer Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18218-18228} }
AdaptiveAE: An Adaptive Exposure Strategy for HDR Capturing in Dynamic Scenes: Tianyi Xu,

Fan Zhang,

Boxin Shi,

Tianfan Xue,

Yujin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Tianyi and Zhang, Fan and Shi, Boxin and Xue, Tianfan and Wang, Yujin}, title = {AdaptiveAE: An Adaptive Exposure Strategy for HDR Capturing in Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25176-25185} }
UINavBench: A Framework for Comprehensive Evaluation of Interactive Digital Agents: Harsh Agrawal,

Eldon Schoop,

Xinlei Pan,

Anuj Mahajan,

Ari Seff,

Di Feng,

Ruijia Cheng,

Andres Romero Mier Y Teran,

Esteban Gomez,

Abhishek Sundararajan,

Forrest Huang,

Amanda Swearngin,

Mohana Prasad Sathya Moorthy,

Jeff Nichols,

Alexander Toshev; [pdf] [supp]
[bibtex]
@InProceedings{Agrawal_2025_ICCV, author = {Agrawal, Harsh and Schoop, Eldon and Pan, Xinlei and Mahajan, Anuj and Seff, Ari and Feng, Di and Cheng, Ruijia and Teran, Andres Romero Mier Y and Gomez, Esteban and Sundararajan, Abhishek and Huang, Forrest and Swearngin, Amanda and Moorthy, Mohana Prasad Sathya and Nichols, Jeff and Toshev, Alexander}, title = {UINavBench: A Framework for Comprehensive Evaluation of Interactive Digital Agents}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23353-23363} }
FedWSQ: Efficient Federated Learning with Weight Standardization and Distribution-Aware Non-Uniform Quantization: Seung-Wook Kim,

Seongyeol Kim,

Jiah Kim,

Seowon Ji,

Se-Ho Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Seung-Wook and Kim, Seongyeol and Kim, Jiah and Ji, Seowon and Lee, Se-Ho}, title = {FedWSQ: Efficient Federated Learning with Weight Standardization and Distribution-Aware Non-Uniform Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4616-4625} }
Knowledge Transfer from Interaction Learning: Yilin Gao,

Kangyi Chen,

Zhongxing Peng,

Hengjie Lu,

Shugong Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Yilin and Chen, Kangyi and Peng, Zhongxing and Lu, Hengjie and Xu, Shugong}, title = {Knowledge Transfer from Interaction Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3585-3595} }
Instruction-based Image Editing with Planning, Reasoning, and Generation: Liya Ji,

Chenyang Qi,

Qifeng Chen; [pdf]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Liya and Qi, Chenyang and Chen, Qifeng}, title = {Instruction-based Image Editing with Planning, Reasoning, and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17506-17515} }
Enhancing Mamba Decoder with Bidirectional Interaction in Multi-Task Dense Prediction: Mang Cao,

Sanping Zhou,

Yizhe Li,

Ye Deng,

Wenli Huang,

Le Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Mang and Zhou, Sanping and Li, Yizhe and Deng, Ye and Huang, Wenli and Wang, Le}, title = {Enhancing Mamba Decoder with Bidirectional Interaction in Multi-Task Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18815-18824} }
CityGS-X: A Scalable Architecture for Efficient and Geometrically Accurate Large-Scale Scene Reconstruction: Yuanyuan Gao,

Hao Li,

Jiaqi Chen,

Zhengyu Zou,

Zhihang Zhong,

Dingwen Zhang,

Xiao Sun,

Junwei Han; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Yuanyuan and Li, Hao and Chen, Jiaqi and Zou, Zhengyu and Zhong, Zhihang and Zhang, Dingwen and Sun, Xiao and Han, Junwei}, title = {CityGS-X: A Scalable Architecture for Efficient and Geometrically Accurate Large-Scale Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27187-27196} }
Dataset Distillation via Vision-Language Category Prototype: Yawen Zou,

Guang Li,

Duo Su,

Zi Wang,

Jun Yu,

Chao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zou_2025_ICCV, author = {Zou, Yawen and Li, Guang and Su, Duo and Wang, Zi and Yu, Jun and Zhang, Chao}, title = {Dataset Distillation via Vision-Language Category Prototype}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2941-2950} }
Token Activation Map to Visually Explain Multimodal LLMs: Yi Li,

Hualiang Wang,

Xinpeng Ding,

Haonan Wang,

Xiaomeng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yi and Wang, Hualiang and Ding, Xinpeng and Wang, Haonan and Li, Xiaomeng}, title = {Token Activation Map to Visually Explain Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {48-58} }
OcRFDet: Object-Centric Radiance Fields for Multi-View 3D Object Detection in Autonomous Driving: Mingqian Ji,

Shanshan Zhang,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Mingqian and Zhang, Shanshan and Yang, Jian}, title = {OcRFDet: Object-Centric Radiance Fields for Multi-View 3D Object Detection in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24933-24942} }
A Unified Framework to BRIDGE Complete and Incomplete Deep Multi-View Clustering under Non-IID Missing Patterns: Xiaorui Jiang,

Buyun He,

Peng Yuan Zhou,

Xinyue Chen,

Jingcai Guo,

Jie Xu,

Yong Liao; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Xiaorui and He, Buyun and Zhou, Peng Yuan and Chen, Xinyue and Guo, Jingcai and Xu, Jie and Liao, Yong}, title = {A Unified Framework to BRIDGE Complete and Incomplete Deep Multi-View Clustering under Non-IID Missing Patterns}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {594-603} }
FairHuman: Boosting Hand and Face Quality in Human Image Generation with Minimum Potential Delay Fairness in Diffusion Models: Yuxuan Wang,

Tianwei Cao,

Huayu Zhang,

Zhongjiang He,

Kongming Liang,

Zhanyu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuxuan and Cao, Tianwei and Zhang, Huayu and He, Zhongjiang and Liang, Kongming and Ma, Zhanyu}, title = {FairHuman: Boosting Hand and Face Quality in Human Image Generation with Minimum Potential Delay Fairness in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17046-17055} }
CC-OCR: A Comprehensive and Challenging OCR Benchmark for Evaluating Large Multimodal Models in Literacy: Zhibo Yang,

Jun Tang,

Zhaohai Li,

Pengfei Wang,

Jianqiang Wan,

Humen Zhong,

Xuejing Liu,

Mingkun Yang,

Peng Wang,

Shuai Bai,

Lianwen Jin,

Junyang Lin; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zhibo and Tang, Jun and Li, Zhaohai and Wang, Pengfei and Wan, Jianqiang and Zhong, Humen and Liu, Xuejing and Yang, Mingkun and Wang, Peng and Bai, Shuai and Jin, Lianwen and Lin, Junyang}, title = {CC-OCR: A Comprehensive and Challenging OCR Benchmark for Evaluating Large Multimodal Models in Literacy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21744-21754} }
PEFTDiff: Diffusion-Guided Transferability Estimation for Parameter-Efficient Fine-Tuning: Prafful Kumar Khoba,

Zijian Wang,

Chetan Arora,

Mahsa Baktashmotlagh; [pdf] [supp]
[bibtex]
@InProceedings{Khoba_2025_ICCV, author = {Khoba, Prafful Kumar and Wang, Zijian and Arora, Chetan and Baktashmotlagh, Mahsa}, title = {PEFTDiff: Diffusion-Guided Transferability Estimation for Parameter-Efficient Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1454-1463} }
CODA: Repurposing Continuous VAEs for Discrete Tokenization: Zeyu Liu,

Zanlin Ni,

Yeguo Hua,

Xin Deng,

Xiao Ma,

Cheng Zhong,

Gao Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Zeyu and Ni, Zanlin and Hua, Yeguo and Deng, Xin and Ma, Xiao and Zhong, Cheng and Huang, Gao}, title = {CODA: Repurposing Continuous VAEs for Discrete Tokenization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18906-18916} }
GeoAvatar: Adaptive Geometrical Gaussian Splatting for 3D Head Avatar: SeungJun Moon,

Hah Min Lew,

Seungeun Lee,

Ji-Su Kang,

Gyeong-Moon Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2025_ICCV, author = {Moon, SeungJun and Lew, Hah Min and Lee, Seungeun and Kang, Ji-Su and Park, Gyeong-Moon}, title = {GeoAvatar: Adaptive Geometrical Gaussian Splatting for 3D Head Avatar}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12811-12821} }
Group-wise Scaling and Orthogonal Decomposition for Domain-Invariant Feature Extraction in Face Anti-Spoofing: Seungjin Jung,

Kanghee Lee,

Yonghyun Jeong,

Haeun Noh,

Jungmin Lee,

Jongwon Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_ICCV, author = {Jung, Seungjin and Lee, Kanghee and Jeong, Yonghyun and Noh, Haeun and Lee, Jungmin and Choi, Jongwon}, title = {Group-wise Scaling and Orthogonal Decomposition for Domain-Invariant Feature Extraction in Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13372-13381} }
RCTDistill: Cross-Modal Knowledge Distillation Framework for Radar-Camera 3D Object Detection with Temporal Fusion: Geonho Bang,

Minjae Seong,

Jisong Kim,

Geunju Baek,

Daye Oh,

Junhyung Kim,

Junho Koh,

Jun Won Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bang_2025_ICCV, author = {Bang, Geonho and Seong, Minjae and Kim, Jisong and Baek, Geunju and Oh, Daye and Kim, Junhyung and Koh, Junho and Choi, Jun Won}, title = {RCTDistill: Cross-Modal Knowledge Distillation Framework for Radar-Camera 3D Object Detection with Temporal Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25315-25324} }
Towards Annotation-Free Evaluation: KPAScore for Human Keypoint Detection: Xiaoxiao Wang,

Chunxiao Li,

Peng Sun,

Boming Miao,

Yunjian Zhang,

Yao Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xiaoxiao and Li, Chunxiao and Sun, Peng and Miao, Boming and Zhang, Yunjian and Zhu, Yao}, title = {Towards Annotation-Free Evaluation: KPAScore for Human Keypoint Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8441-8450} }
TITAN: Query-Token based Domain Adaptive Adversarial Learning: Tajamul Ashraf,

Janibul Bashir; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ashraf_2025_ICCV, author = {Ashraf, Tajamul and Bashir, Janibul}, title = {TITAN: Query-Token based Domain Adaptive Adversarial Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {250-262} }
AnyCalib: On-Manifold Learning for Model-Agnostic Single-View Camera Calibration: Javier Tirado-Garín,

Javier Civera; [pdf] [supp]
[bibtex]
@InProceedings{Tirado-Garin_2025_ICCV, author = {Tirado-Gar{\'\i}n, Javier and Civera, Javier}, title = {AnyCalib: On-Manifold Learning for Model-Agnostic Single-View Camera Calibration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8044-8055} }
SEGA: A Stepwise Evolution Paradigm for Content-Aware Layout Generation with Design Prior: Haoran Wang,

Bo Zhao,

Jinghui Wang,

Hanzhang Wang,

Huan Yang,

Wei Ji,

Hao Liu,

Xinyan Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haoran and Zhao, Bo and Wang, Jinghui and Wang, Hanzhang and Yang, Huan and Ji, Wei and Liu, Hao and Xiao, Xinyan}, title = {SEGA: A Stepwise Evolution Paradigm for Content-Aware Layout Generation with Design Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19321-19330} }
GEOPARD: Geometric Pretraining for Articulation Prediction in 3D Shapes: Pradyumn Goyal,

Dmitry Petrov,

Sheldon Andrews,

Yizhak Ben-Shabat,

Hsueh-Ti Derek Liu,

Evangelos Kalogerakis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Goyal_2025_ICCV, author = {Goyal, Pradyumn and Petrov, Dmitry and Andrews, Sheldon and Ben-Shabat, Yizhak and Liu, Hsueh-Ti Derek and Kalogerakis, Evangelos}, title = {GEOPARD: Geometric Pretraining for Articulation Prediction in 3D Shapes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9332-9341} }
ViSpeak: Visual Instruction Feedback in Streaming Videos: Shenghao Fu,

Qize Yang,

Yuan-Ming Li,

Yi-Xing Peng,

Kun-Yu Lin,

Xihan Wei,

Jian-Fang Hu,

Xiaohua Xie,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Shenghao and Yang, Qize and Li, Yuan-Ming and Peng, Yi-Xing and Lin, Kun-Yu and Wei, Xihan and Hu, Jian-Fang and Xie, Xiaohua and Zheng, Wei-Shi}, title = {ViSpeak: Visual Instruction Feedback in Streaming Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21778-21788} }
Feature Coding in the Era of Large Models: Dataset, Test Conditions, and Benchmark: Changsheng Gao,

Yifan Ma,

Qiaoxi Chen,

Yenan Xu,

Dong Liu,

Weisi Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Changsheng and Ma, Yifan and Chen, Qiaoxi and Xu, Yenan and Liu, Dong and Lin, Weisi}, title = {Feature Coding in the Era of Large Models: Dataset, Test Conditions, and Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1068-1077} }
VideoAds for Fast-Paced Video Understanding: Zheyuan Zhang,

Wanying Dou,

Linkai Peng,

Hongyi Pan,

Ulas Bagci,

Boqing Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zheyuan and Dou, Wanying and Peng, Linkai and Pan, Hongyi and Bagci, Ulas and Gong, Boqing}, title = {VideoAds for Fast-Paced Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21812-21821} }
MGSR: 2D/3D Mutual-boosted Gaussian Splatting for High-fidelity Surface Reconstruction under Various Light Conditions: Qingyuan Zhou,

Yuehu Gong,

Weidong Yang,

Jiaze Li,

Yeqi Luo,

Baixin Xu,

Shuhao Li,

Ben Fei,

Ying He; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Qingyuan and Gong, Yuehu and Yang, Weidong and Li, Jiaze and Luo, Yeqi and Xu, Baixin and Li, Shuhao and Fei, Ben and He, Ying}, title = {MGSR: 2D/3D Mutual-boosted Gaussian Splatting for High-fidelity Surface Reconstruction under Various Light Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27295-27304} }
Modeling Human Gaze Behavior with Diffusion Models for Unified Scanpath Prediction: Giuseppe Cartella,

Vittorio Cuculo,

Alessandro D'Amelio,

Marcella Cornia,

Giuseppe Boccignone,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cartella_2025_ICCV, author = {Cartella, Giuseppe and Cuculo, Vittorio and D'Amelio, Alessandro and Cornia, Marcella and Boccignone, Giuseppe and Cucchiara, Rita}, title = {Modeling Human Gaze Behavior with Diffusion Models for Unified Scanpath Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16206-16216} }
Efficient Multi-Person Motion Prediction by Lightweight Spatial and Temporal Interactions: Yuanhong Zheng,

Ruixuan Yu,

Jian Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Yuanhong and Yu, Ruixuan and Sun, Jian}, title = {Efficient Multi-Person Motion Prediction by Lightweight Spatial and Temporal Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10844-10853} }
I2V3D: Controllable Image-to-video Generation with 3D Guidance: Zhiyuan Zhang,

Dongdong Chen,

Jing Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zhiyuan and Chen, Dongdong and Liao, Jing}, title = {I2V3D: Controllable Image-to-video Generation with 3D Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13360-13371} }
TAG-WM: Tamper-Aware Generative Image Watermarking via Diffusion Inversion Sensitivity: Yuzhuo Chen,

Zehua Ma,

Han Fang,

Weiming Zhang,

Nenghai Yu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yuzhuo and Ma, Zehua and Fang, Han and Zhang, Weiming and Yu, Nenghai}, title = {TAG-WM: Tamper-Aware Generative Image Watermarking via Diffusion Inversion Sensitivity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16723-16732} }
MIEB: Massive Image Embedding Benchmark: Chenghao Xiao,

Isaac Chung,

Imene Kerboua,

Jamie Stirling,

Xin Zhang,

Márton Kardos,

Roman Solomatin,

Noura Al Moubayed,

Kenneth Enevoldsen,

Niklas Muennighoff; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_ICCV, author = {Xiao, Chenghao and Chung, Isaac and Kerboua, Imene and Stirling, Jamie and Zhang, Xin and Kardos, M\'arton and Solomatin, Roman and Al Moubayed, Noura and Enevoldsen, Kenneth and Muennighoff, Niklas}, title = {MIEB: Massive Image Embedding Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22187-22198} }
IMoRe: Implicit Program-Guided Reasoning for Human Motion Q&A: Chen Li,

Chinthani Sugandhika,

Yeo Keat Ee,

Eric Peh,

Hao Zhang,

Hong Yang,

Deepu Rajan,

Basura Fernando; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Chen and Sugandhika, Chinthani and Ee, Yeo Keat and Peh, Eric and Zhang, Hao and Yang, Hong and Rajan, Deepu and Fernando, Basura}, title = {IMoRe: Implicit Program-Guided Reasoning for Human Motion Q\&A}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12987-12996} }
SViM3D: Stable Video Material Diffusion for Single Image 3D Generation: Andreas Engelhardt,

Mark Boss,

Vikram Voleti,

Chun-Han Yao,

Hendrik P. A. Lensch,

Varun Jampani; [pdf] [supp]
[bibtex]
@InProceedings{Engelhardt_2025_ICCV, author = {Engelhardt, Andreas and Boss, Mark and Voleti, Vikram and Yao, Chun-Han and Lensch, Hendrik P. A. and Jampani, Varun}, title = {SViM3D: Stable Video Material Diffusion for Single Image 3D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28428-28439} }
VAGUE: Visual Contexts Clarify Ambiguous Expressions: Heejeong Nam,

Jinwoo Ahn,

Keummin Ka,

Jiwan Chung,

Youngjae Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2025_ICCV, author = {Nam, Heejeong and Ahn, Jinwoo and Ka, Keummin and Chung, Jiwan and Yu, Youngjae}, title = {VAGUE: Visual Contexts Clarify Ambiguous Expressions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1537-1547} }
Enhancing Numerical Prediction of MLLMs with Soft Labeling: Pei Wang,

Zhaowei Cai,

Hao Yang,

Davide Modolo,

Ashwin Swaminathan; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Pei and Cai, Zhaowei and Yang, Hao and Modolo, Davide and Swaminathan, Ashwin}, title = {Enhancing Numerical Prediction of MLLMs with Soft Labeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3424-3434} }
Fine-Grained 3D Gaussian Head Avatars Modeling from Static Captures via Joint Reconstruction and Registration: Yuan Sun,

Xuan Wang,

Cong Wang,

WeiLi Zhang,

Yanbo Fan,

Yu Guo,

Fei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Yuan and Wang, Xuan and Wang, Cong and Zhang, WeiLi and Fan, Yanbo and Guo, Yu and Wang, Fei}, title = {Fine-Grained 3D Gaussian Head Avatars Modeling from Static Captures via Joint Reconstruction and Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14293-14304} }
DynamicFace: High-Quality and Consistent Face Swapping for Image and Video using Composable 3D Facial Priors: Runqi Wang,

Yang Chen,

Sijie Xu,

Tianyao He,

Wei Zhu,

Dejia Song,

Nemo Chen,

Xu Tang,

Yao Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Runqi and Chen, Yang and Xu, Sijie and He, Tianyao and Zhu, Wei and Song, Dejia and Chen, Nemo and Tang, Xu and Hu, Yao}, title = {DynamicFace: High-Quality and Consistent Face Swapping for Image and Video using Composable 3D Facial Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13438-13447} }
CoMoGaussian: Continuous Motion-Aware Gaussian Splatting from Motion-Blurred Images: Jungho Lee,

Donghyeong Kim,

Dogyoon Lee,

Suhwan Cho,

Minhyeok Lee,

Wonjoon Lee,

Taeoh Kim,

Dongyoon Wee,

Sangyoun Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Jungho and Kim, Donghyeong and Lee, Dogyoon and Cho, Suhwan and Lee, Minhyeok and Lee, Wonjoon and Kim, Taeoh and Wee, Dongyoon and Lee, Sangyoun}, title = {CoMoGaussian: Continuous Motion-Aware Gaussian Splatting from Motion-Blurred Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26415-26424} }
Open-Unfairness Adversarial Mitigation for Generalized Deepfake Detection: Zhaoyang Li,

Zhu Teng,

Baopeng Zhang,

Jianping Fan; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhaoyang and Teng, Zhu and Zhang, Baopeng and Fan, Jianping}, title = {Open-Unfairness Adversarial Mitigation for Generalized Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {698-707} }
Recognizing Actions from Robotic View for Natural Human-Robot Interaction: Ziyi Wang,

Peiming Li,

Hong Liu,

Zhichao Deng,

Can Wang,

Jun Liu,

Junsong Yuan,

Mengyuan Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ziyi and Li, Peiming and Liu, Hong and Deng, Zhichao and Wang, Can and Liu, Jun and Yuan, Junsong and Liu, Mengyuan}, title = {Recognizing Actions from Robotic View for Natural Human-Robot Interaction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14218-14227} }
WIPES: Wavelet-based Visual Primitives: Wenhao Zhang,

Hao Zhu,

Delong Wu,

Di Kang,

Linchao Bao,

Xun Cao,

Zhan Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wenhao and Zhu, Hao and Wu, Delong and Kang, Di and Bao, Linchao and Cao, Xun and Ma, Zhan}, title = {WIPES: Wavelet-based Visual Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27338-27347} }
Frequency Domain-Based Diffusion Model for Unpaired Image Dehazing: Chengxu Liu,

Lu Qi,

Jinshan Pan,

Xueming Qian,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Chengxu and Qi, Lu and Pan, Jinshan and Qian, Xueming and Yang, Ming-Hsuan}, title = {Frequency Domain-Based Diffusion Model for Unpaired Image Dehazing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7538-7547} }
Unsupervised Histopathological Image Semantic Segmentation with Overlapping Patches Consistency Constraint: Wentian Cai,

Weizhao Weng,

Zihao Huang,

Yandan Chen,

Siquan Huang,

Ping Gao,

Victor C. M. Leung,

Ying Gao; [pdf]
[bibtex]
@InProceedings{Cai_2025_ICCV, author = {Cai, Wentian and Weng, Weizhao and Huang, Zihao and Chen, Yandan and Huang, Siquan and Gao, Ping and Leung, Victor C. M. and Gao, Ying}, title = {Unsupervised Histopathological Image Semantic Segmentation with Overlapping Patches Consistency Constraint}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23332-23341} }
SAGI: Semantically Aligned and Uncertainty Guided AI Image Inpainting: Paschalis Giakoumoglou,

Dimitrios Karageorgiou,

Symeon Papadopoulos,

Panagiotis C. Petrantonakis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Giakoumoglou_2025_ICCV, author = {Giakoumoglou, Paschalis and Karageorgiou, Dimitrios and Papadopoulos, Symeon and Petrantonakis, Panagiotis C.}, title = {SAGI: Semantically Aligned and Uncertainty Guided AI Image Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16090-16101} }
Adversarial Purification via Super-Resolution and Diffusion: Mincheol Park,

Cheonjun Park,

Seungseop Lim,

Mijin Koo,

Hyunwuk Lee,

Won Woo Ro,

Suhyun Kim; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Mincheol and Park, Cheonjun and Lim, Seungseop and Koo, Mijin and Lee, Hyunwuk and Ro, Won Woo and Kim, Suhyun}, title = {Adversarial Purification via Super-Resolution and Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4605-4615} }
RAGDiffusion: Faithful Cloth Generation via External Knowledge Assimilation: Yuhan Li,

Xianfeng Tan,

Wenxiang Shang,

Yubo Wu,

Jian Wang,

Xuanhong Chen,

Yi Zhang,

Hangcheng Zhu,

Bingbing Ni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yuhan and Tan, Xianfeng and Shang, Wenxiang and Wu, Yubo and Wang, Jian and Chen, Xuanhong and Zhang, Yi and Zhu, Hangcheng and Ni, Bingbing}, title = {RAGDiffusion: Faithful Cloth Generation via External Knowledge Assimilation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17485-17495} }
Diff2I2P: Differentiable Image-to-Point Cloud Registration with Diffusion Prior: Juncheng Mu,

Chengwei Ren,

Weixiang Zhang,

Liang Pan,

Xiao-Ping Zhang,

Yue Gao; [pdf] [supp]
[bibtex]
@InProceedings{Mu_2025_ICCV, author = {Mu, Juncheng and Ren, Chengwei and Zhang, Weixiang and Pan, Liang and Zhang, Xiao-Ping and Gao, Yue}, title = {Diff2I2P: Differentiable Image-to-Point Cloud Registration with Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25777-25787} }
Dynamic Typography: Bringing Text to Life via Video Diffusion Prior: Zichen Liu,

Yihao Meng,

Hao Ouyang,

Yue Yu,

Bolin Zhao,

Daniel Cohen-Or,

Huamin Qu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Zichen and Meng, Yihao and Ouyang, Hao and Yu, Yue and Zhao, Bolin and Cohen-Or, Daniel and Qu, Huamin}, title = {Dynamic Typography: Bringing Text to Life via Video Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14787-14797} }
LightSwitch: Multi-view Relighting with Material-guided Diffusion: Yehonathan Litman,

Fernando De la Torre,

Shubham Tulsiani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Litman_2025_ICCV, author = {Litman, Yehonathan and De la Torre, Fernando and Tulsiani, Shubham}, title = {LightSwitch: Multi-view Relighting with Material-guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27750-27759} }
CanFields: Consolidating Diffeomorphic Flows for Non-Rigid 4D Interpolation from Arbitrary-Length Sequences: Miaowei Wang,

Changjian Li,

Amir Vaxman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Miaowei and Li, Changjian and Vaxman, Amir}, title = {CanFields: Consolidating Diffeomorphic Flows for Non-Rigid 4D Interpolation from Arbitrary-Length Sequences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28587-28598} }
Occlusion-robust Stylization for Drawing-based 3D Animation: Sunjae Yoon,

Gwanhyeong Koo,

Younghwan Lee,

Ji Woo Hong,

Chang D. Yoo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoon_2025_ICCV, author = {Yoon, Sunjae and Koo, Gwanhyeong and Lee, Younghwan and Hong, Ji Woo and Yoo, Chang D.}, title = {Occlusion-robust Stylization for Drawing-based 3D Animation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12263-12273} }
Towards Foundational Models for Single-Chip Radar: Tianshu Huang,

Akarsh Prabhakara,

Chuhan Chen,

Jay Karhade,

Deva Ramanan,

Matthew O'toole,

Anthony Rowe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Tianshu and Prabhakara, Akarsh and Chen, Chuhan and Karhade, Jay and Ramanan, Deva and O'toole, Matthew and Rowe, Anthony}, title = {Towards Foundational Models for Single-Chip Radar}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24655-24665} }
Augmented and Softened Matching for Unsupervised Visible-Infrared Person Re-Identification: Zhiqi Pang,

Chunyu Wang,

Lingling Zhao,

Junjie Wang; [pdf] [supp]
[bibtex]
@InProceedings{Pang_2025_ICCV, author = {Pang, Zhiqi and Wang, Chunyu and Zhao, Lingling and Wang, Junjie}, title = {Augmented and Softened Matching for Unsupervised Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11100-11109} }
Spatial Alignment and Temporal Matching Adapter for Video-Radar Remote Physiological Measurement: Qian Liang,

Ruixu Geng,

Jinbo Chen,

Haoyu Wang,

Yan Chen,

Yang Hu; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Qian and Geng, Ruixu and Chen, Jinbo and Wang, Haoyu and Chen, Yan and Hu, Yang}, title = {Spatial Alignment and Temporal Matching Adapter for Video-Radar Remote Physiological Measurement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8623-8633} }
StableCodec: Taming One-Step Diffusion for Extreme Image Compression: Tianyu Zhang,

Xin Luo,

Li Li,

Dong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Tianyu and Luo, Xin and Li, Li and Liu, Dong}, title = {StableCodec: Taming One-Step Diffusion for Extreme Image Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17379-17389} }
Seal Your Backdoor with Variational Defense: Ivan Sabolić,

Matej Grcić,

Siniša Šegvić; [pdf] [supp]
[bibtex]
@InProceedings{Sabolic_2025_ICCV, author = {Saboli\'c, Ivan and Grci\'c, Matej and \v{S}egvi\'c, Sini\v{s}a}, title = {Seal Your Backdoor with Variational Defense}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {752-764} }
From Abyssal Darkness to Blinding Glare: A Benchmark on Extreme Exposure Correction in Real World: Bo Wang,

Huiyuan Fu,

Zhiye Huang,

Siru Zhang,

Xin Wang,

Huadong Ma; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Bo and Fu, Huiyuan and Huang, Zhiye and Zhang, Siru and Wang, Xin and Ma, Huadong}, title = {From Abyssal Darkness to Blinding Glare: A Benchmark on Extreme Exposure Correction in Real World}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7666-7675} }
MVTrajecter: Multi-View Pedestrian Tracking with Trajectory Motion Cost and Trajectory Appearance Cost: Taiga Yamane,

Ryo Masumura,

Satoshi Suzuki,

Shota Orihashi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yamane_2025_ICCV, author = {Yamane, Taiga and Masumura, Ryo and Suzuki, Satoshi and Orihashi, Shota}, title = {MVTrajecter: Multi-View Pedestrian Tracking with Trajectory Motion Cost and Trajectory Appearance Cost}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13270-13280} }
GeoDiffusion: A Training-Free Framework for Accurate 3D Geometric Conditioning in Image Generation: Phillip Mueller,

Talip Uenlue,

Sebastian Schmidt,

Marcel Kollovieh,

Jiajie Fan,

Stephan Günnemann,

Lars Mikelsons; [pdf] [supp]
[bibtex]
@InProceedings{Mueller_2025_ICCV, author = {Mueller, Phillip and Uenlue, Talip and Schmidt, Sebastian and Kollovieh, Marcel and Fan, Jiajie and G\"unnemann, Stephan and Mikelsons, Lars}, title = {GeoDiffusion: A Training-Free Framework for Accurate 3D Geometric Conditioning in Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6374-6384} }
VALLR: Visual ASR Language Model for Lip Reading: Marshall Thomas,

Edward Fish,

Richard Bowden; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thomas_2025_ICCV, author = {Thomas, Marshall and Fish, Edward and Bowden, Richard}, title = {VALLR: Visual ASR Language Model for Lip Reading}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2846-2856} }
Learning Efficient and Generalizable Human Representation with Human Gaussian Model: Yifan Liu,

Shengjun Zhang,

Chensheng Dai,

Yang Chen,

Hao Liu,

Chen Li,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yifan and Zhang, Shengjun and Dai, Chensheng and Chen, Yang and Liu, Hao and Li, Chen and Duan, Yueqi}, title = {Learning Efficient and Generalizable Human Representation with Human Gaussian Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11797-11806} }
DexH2R: A Benchmark for Dynamic Dexterous Grasping in Human-to-Robot Handover: Youzhuo Wang,

Jiayi Ye,

Chuyang Xiao,

Yiming Zhong,

Heng Tao,

Hang Yu,

Yumeng Liu,

Jingyi Yu,

Yuexin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Youzhuo and Ye, Jiayi and Xiao, Chuyang and Zhong, Yiming and Tao, Heng and Yu, Hang and Liu, Yumeng and Yu, Jingyi and Ma, Yuexin}, title = {DexH2R: A Benchmark for Dynamic Dexterous Grasping in Human-to-Robot Handover}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12702-12712} }
Mobile Video Diffusion: Haitam Ben Yahia,

Denis Korzhenkov,

Ioannis Lelekas,

Amir Ghodrati,

Amirhossein Habibian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ben_Yahia_2025_ICCV, author = {Ben Yahia, Haitam and Korzhenkov, Denis and Lelekas, Ioannis and Ghodrati, Amir and Habibian, Amirhossein}, title = {Mobile Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19450-19460} }
Importance-Based Token Merging for Efficient Image and Video Generation: Haoyu Wu,

Jingyi Xu,

Hieu Le,

Dimitris Samaras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Haoyu and Xu, Jingyi and Le, Hieu and Samaras, Dimitris}, title = {Importance-Based Token Merging for Efficient Image and Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4983-4995} }
Semi-ViM: Bidirectional State Space Model for Mitigating Label Imbalance in Semi-Supervised Learning: Hongyang He,

Hongyang Xie,

Haochen You,

Victor Sanchez; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Hongyang and Xie, Hongyang and You, Haochen and Sanchez, Victor}, title = {Semi-ViM: Bidirectional State Space Model for Mitigating Label Imbalance in Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {765-774} }
Diffusion Transformer meets Multi-level Wavelet Spectrum for Single Image Super-Resolution: Peng Du,

Hui Li,

Han Xu,

Paul Barom Jeon,

Dongwook Lee,

Daehyun Ji,

Ran Yang,

Feng Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Peng and Li, Hui and Xu, Han and Jeon, Paul Barom and Lee, Dongwook and Ji, Daehyun and Yang, Ran and Zhu, Feng}, title = {Diffusion Transformer meets Multi-level Wavelet Spectrum for Single Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19700-19710} }
Towards Human-like Virtual Beings: Simulating Human Behavior in 3D Scenes: Chen Liang,

Wenguan Wang,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Chen and Wang, Wenguan and Yang, Yi}, title = {Towards Human-like Virtual Beings: Simulating Human Behavior in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10753-10763} }
HiGarment: Cross-modal Harmony Based Diffusion Model for Flat Sketch to Realistic Garment Image: Junyi Guo,

Jingxuan Zhang,

Fangyu Wu,

Huanda Lu,

Qiufeng Wang,

Wenmian Yang,

Eng Gee Lim,

Dongming Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Junyi and Zhang, Jingxuan and Wu, Fangyu and Lu, Huanda and Wang, Qiufeng and Yang, Wenmian and Lim, Eng Gee and Lu, Dongming}, title = {HiGarment: Cross-modal Harmony Based Diffusion Model for Flat Sketch to Realistic Garment Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18542-18551} }
Robust Unfolding Network for HDR Imaging with Modulo Cameras: Zhile Chen,

Hui Ji; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Zhile and Ji, Hui}, title = {Robust Unfolding Network for HDR Imaging with Modulo Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25218-25228} }
Player-Centric Multimodal Prompt Generation for Large Language Model Based Identity-Aware Basketball Video Captioning: Zeyu Xi,

Haoying Sun,

Yaofei Wu,

Junchi Yan,

Haoran Zhang,

Lifang Wu,

Liang Wang,

Changwen Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xi_2025_ICCV, author = {Xi, Zeyu and Sun, Haoying and Wu, Yaofei and Yan, Junchi and Zhang, Haoran and Wu, Lifang and Wang, Liang and Chen, Changwen}, title = {Player-Centric Multimodal Prompt Generation for Large Language Model Based Identity-Aware Basketball Video Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24330-24339} }
ArchiSet: Benchmarking Editable and Consistent Single-View 3D Reconstruction of Buildings with Specific Window-to-Wall Ratios: Jun Yin,

Pengyu Zeng,

Licheng Shen,

Miao Zhang,

Jing Zhong,

Yuxing Han,

Shuai Lu; [pdf]
[bibtex]
@InProceedings{Yin_2025_ICCV, author = {Yin, Jun and Zeng, Pengyu and Shen, Licheng and Zhang, Miao and Zhong, Jing and Han, Yuxing and Lu, Shuai}, title = {ArchiSet: Benchmarking Editable and Consistent Single-View 3D Reconstruction of Buildings with Specific Window-to-Wall Ratios}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26004-26014} }
Scale Your Instructions: Enhance the Instruction-Following Fidelity of Unified Image Generation Model by Self-Adaptive Attention Scaling: Chao Zhou,

Tianyi Wei,

Nenghai Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Chao and Wei, Tianyi and Yu, Nenghai}, title = {Scale Your Instructions: Enhance the Instruction-Following Fidelity of Unified Image Generation Model by Self-Adaptive Attention Scaling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15171-15181} }
From Enhancement to Understanding: Build a Generalized Bridge for Low-light Vision via Semantically Consistent Unsupervised Fine-tuning: Sen Wang,

Shao Zeng,

Tianjun Gu,

Zhizhong Zhang,

Ruixin Zhang,

Shouhong Ding,

Jingyun Zhang,

Jun Wang,

Xin Tan,

Yuan Xie,

Lizhuang Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Sen and Zeng, Shao and Gu, Tianjun and Zhang, Zhizhong and Zhang, Ruixin and Ding, Shouhong and Zhang, Jingyun and Wang, Jun and Tan, Xin and Xie, Yuan and Ma, Lizhuang}, title = {From Enhancement to Understanding: Build a Generalized Bridge for Low-light Vision via Semantically Consistent Unsupervised Fine-tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13804-13814} }
Translation of Text Embedding via Delta Vector to Suppress Strongly Entangled Content in Text-to-Image Diffusion Models: Eunseo Koh,

Seunghoo Hong,

Tae-Young Kim,

Simon S. Woo,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koh_2025_ICCV, author = {Koh, Eunseo and Hong, Seunghoo and Kim, Tae-Young and Woo, Simon S. and Heo, Jae-Pil}, title = {Translation of Text Embedding via Delta Vector to Suppress Strongly Entangled Content in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15365-15374} }
ARMO: Autoregressive Rigging for Multi-Category Objects: Mingze Sun,

Shiwei Mao,

Keyi Chen,

Yurun Chen,

Shunlin Lu,

Jingbo Wang,

Junting Dong,

Ruqi Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Mingze and Mao, Shiwei and Chen, Keyi and Chen, Yurun and Lu, Shunlin and Wang, Jingbo and Dong, Junting and Huang, Ruqi}, title = {ARMO: Autoregressive Rigging for Multi-Category Objects}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7721-7730} }
DIP: Unsupervised Dense In-Context Post-training of Visual Representations: Sophia Sirko-Galouchenko,

Spyros Gidaris,

Antonin Vobecky,

Andrei Bursuc,

Nicolas Thome; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sirko-Galouchenko_2025_ICCV, author = {Sirko-Galouchenko, Sophia and Gidaris, Spyros and Vobecky, Antonin and Bursuc, Andrei and Thome, Nicolas}, title = {DIP: Unsupervised Dense In-Context Post-training of Visual Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4264-4274} }
Think Twice: Test-Time Reasoning for Robust CLIP Zero-Shot Classification: Shenyu Lu,

Zhaoying Pan,

Xiaoqian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Shenyu and Pan, Zhaoying and Wang, Xiaoqian}, title = {Think Twice: Test-Time Reasoning for Robust CLIP Zero-Shot Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2919-2929} }
Training-free Geometric Image Editing on Diffusion Models: Hanshen Zhu,

Zhen Zhu,

Kaile Zhang,

Yiming Gong,

Yuliang Liu,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Hanshen and Zhu, Zhen and Zhang, Kaile and Gong, Yiming and Liu, Yuliang and Bai, Xiang}, title = {Training-free Geometric Image Editing on Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19130-19140} }
HyperGCT: A Dynamic Hyper-GNN-Learned Geometric Constraint for 3D Registration: Xiyu Zhang,

Jiayi Ma,

Jianwei Guo,

Wei Hu,

Zhaoshuai Qi,

Fei Hui,

Jiaqi Yang,

Yanning Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiyu and Ma, Jiayi and Guo, Jianwei and Hu, Wei and Qi, Zhaoshuai and Hui, Fei and Yang, Jiaqi and Zhang, Yanning}, title = {HyperGCT: A Dynamic Hyper-GNN-Learned Geometric Constraint for 3D Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24750-24759} }
Dark-ISP: Enhancing RAW Image Processing for Low-Light Object Detection: Jiasheng Guo,

Xin Gao,

Yuxiang Yan,

Guanghao Li,

Jian Pu; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Jiasheng and Gao, Xin and Yan, Yuxiang and Li, Guanghao and Pu, Jian}, title = {Dark-ISP: Enhancing RAW Image Processing for Low-Light Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9583-9593} }
Harmonizing Visual Representations for Unified Multimodal Understanding and Generation: Size Wu,

Wenwei Zhang,

Lumin Xu,

Sheng Jin,

Zhonghua Wu,

Qingyi Tao,

Wentao Liu,

Wei Li,

Chen Change Loy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Size and Zhang, Wenwei and Xu, Lumin and Jin, Sheng and Wu, Zhonghua and Tao, Qingyi and Liu, Wentao and Li, Wei and Loy, Chen Change}, title = {Harmonizing Visual Representations for Unified Multimodal Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17739-17750} }
Environment-Agnostic Pose: Generating Environment-independent Object Representations for 6D Pose Estimation: Shaobo Zhang,

Yuhang Huang,

Wanqing Zhao,

Wei Zhao,

Ziyu Guan,

Jinye Peng; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shaobo and Huang, Yuhang and Zhao, Wanqing and Zhao, Wei and Guan, Ziyu and Peng, Jinye}, title = {Environment-Agnostic Pose: Generating Environment-independent Object Representations for 6D Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8678-8687} }
UnMix-NeRF: Spectral Unmixing Meets Neural Radiance Fields: Fabian Perez,

Sara Rojas,

Carlos Hinojosa,

Hoover Rueda-Chacón,

Bernard Ghanem; [pdf] [supp]
[bibtex]
@InProceedings{Perez_2025_ICCV, author = {Perez, Fabian and Rojas, Sara and Hinojosa, Carlos and Rueda-Chac\'on, Hoover and Ghanem, Bernard}, title = {UnMix-NeRF: Spectral Unmixing Meets Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26284-26293} }
Progressive Homeostatic and Plastic Prompt Tuning for Audio-Visual Multi-Task Incremental Learning: Jiong Yin,

Liang Li,

Jiehua Zhang,

Yuhan Gao,

Chenggang Yan,

Xichun Sheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_ICCV, author = {Yin, Jiong and Li, Liang and Zhang, Jiehua and Gao, Yuhan and Yan, Chenggang and Sheng, Xichun}, title = {Progressive Homeostatic and Plastic Prompt Tuning for Audio-Visual Multi-Task Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2022-2033} }
Multidimensional Byte Pair Encoding: Shortened Sequences for Improved Visual Data Generation: Tim Elsner,

Paula Usinger,

Julius Nehring-Wirxel,

Gregor Kobsik,

Victor Czech,

Yanjiang He,

Isaak Lim,

Leif Kobbelt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Elsner_2025_ICCV, author = {Elsner, Tim and Usinger, Paula and Nehring-Wirxel, Julius and Kobsik, Gregor and Czech, Victor and He, Yanjiang and Lim, Isaak and Kobbelt, Leif}, title = {Multidimensional Byte Pair Encoding: Shortened Sequences for Improved Visual Data Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21331-21341} }
All in One: Visual-Description-Guided Unified Point Cloud Segmentation: Zongyan Han,

Mohamed El Amine Boudjoghra,

Jiahua Dong,

Jinhong Wang,

Rao Muhammad Anwer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Zongyan and El Amine Boudjoghra, Mohamed and Dong, Jiahua and Wang, Jinhong and Anwer, Rao Muhammad}, title = {All in One: Visual-Description-Guided Unified Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24835-24845} }
Controllable-LPMoE: Adapting to Challenging Object Segmentation via Dynamic Local Priors from Mixture-of-Experts: Yanguang Sun,

Jiawei Lian,

Jian Yang,

Lei Luo; [pdf]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Yanguang and Lian, Jiawei and Yang, Jian and Luo, Lei}, title = {Controllable-LPMoE: Adapting to Challenging Object Segmentation via Dynamic Local Priors from Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22327-22337} }
Frequency-Dynamic Attention Modulation For Dense Prediction: Linwei Chen,

Lin Gu,

Ying Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Linwei and Gu, Lin and Fu, Ying}, title = {Frequency-Dynamic Attention Modulation For Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22620-22632} }
Enpowering Your Pansharpening Models with Generalizability: Unified Distribution is All You Need: Yongchuan Cui,

Peng Liu,

Hui Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Cui_2025_ICCV, author = {Cui, Yongchuan and Liu, Peng and Zhang, Hui}, title = {Enpowering Your Pansharpening Models with Generalizability: Unified Distribution is All You Need}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11850-11860} }
Semantic versus Identity: A Divide-and-Conquer Approach towards Adjustable Medical Image De-Identification: Yuan Tian,

Shuo Wang,

Rongzhao Zhang,

Zijian Chen,

Yankai Jiang,

Chunyi Li,

Xiangyang Zhu,

Fang Yan,

Qiang Hu,

XiaoSong Wang,

Guangtao Zhai; [pdf] [arXiv]
[bibtex]
@InProceedings{Tian_2025_ICCV, author = {Tian, Yuan and Wang, Shuo and Zhang, Rongzhao and Chen, Zijian and Jiang, Yankai and Li, Chunyi and Zhu, Xiangyang and Yan, Fang and Hu, Qiang and Wang, XiaoSong and Zhai, Guangtao}, title = {Semantic versus Identity: A Divide-and-Conquer Approach towards Adjustable Medical Image De-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20613-20625} }
DEPTHOR: Depth Enhancement from a Practical Light-Weight dToF Sensor and RGB Image: Jijun Xiang,

Xuan Zhu,

Xianqi Wang,

Yu Wang,

Hong Zhang,

Fei Guo,

Xin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2025_ICCV, author = {Xiang, Jijun and Zhu, Xuan and Wang, Xianqi and Wang, Yu and Zhang, Hong and Guo, Fei and Yang, Xin}, title = {DEPTHOR: Depth Enhancement from a Practical Light-Weight dToF Sensor and RGB Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6101-6111} }
Describe Anything: Detailed Localized Image and Video Captioning: Long Lian,

Yifan Ding,

Yunhao Ge,

Sifei Liu,

Hanzi Mao,

Boyi Li,

Marco Pavone,

Ming-Yu Liu,

Trevor Darrell,

Adam Yala,

Yin Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lian_2025_ICCV, author = {Lian, Long and Ding, Yifan and Ge, Yunhao and Liu, Sifei and Mao, Hanzi and Li, Boyi and Pavone, Marco and Liu, Ming-Yu and Darrell, Trevor and Yala, Adam and Cui, Yin}, title = {Describe Anything: Detailed Localized Image and Video Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21766-21777} }
Point Cloud Self-supervised Learning via 3D to Multi-view Masked Learner: Zhimin Chen,

Xuewei Chen,

Xiao Guo,

Yingwei Li,

Longlong Jing,

Liang Yang,

Bing Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Zhimin and Chen, Xuewei and Guo, Xiao and Li, Yingwei and Jing, Longlong and Yang, Liang and Li, Bing}, title = {Point Cloud Self-supervised Learning via 3D to Multi-view Masked Learner}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27618-27629} }
Probabilistic Prototype Calibration of Vision-language Models for Generalized Few-shot Semantic Segmentation: Jie Liu,

Jiayi Shen,

Pan Zhou,

Jan-Jakob Sonke,

Efstratios Gavves; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Jie and Shen, Jiayi and Zhou, Pan and Sonke, Jan-Jakob and Gavves, Efstratios}, title = {Probabilistic Prototype Calibration of Vision-language Models for Generalized Few-shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21155-21165} }
KV-Edit: Training-Free Image Editing for Precise Background Preservation: Tianrui Zhu,

Shiyi Zhang,

Jiawei Shao,

Yansong Tang; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Tianrui and Zhang, Shiyi and Shao, Jiawei and Tang, Yansong}, title = {KV-Edit: Training-Free Image Editing for Precise Background Preservation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16607-16617} }
Triad: Empowering LMM-based Anomaly Detection with Expert-guided Region-of-Interest Tokenizer and Manufacturing Process: Yuanze Li,

Shihao Yuan,

Haolin Wang,

Qizhang Li,

Ming Liu,

Chen Xu,

Guangming Shi,

Wangmeng Zuo; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yuanze and Yuan, Shihao and Wang, Haolin and Li, Qizhang and Liu, Ming and Xu, Chen and Shi, Guangming and Zuo, Wangmeng}, title = {Triad: Empowering LMM-based Anomaly Detection with Expert-guided Region-of-Interest Tokenizer and Manufacturing Process}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21917-21926} }
When Anchors Meet Cold Diffusion: A Multi-Stage Approach to Lane Detection: Bo-Lun Huang,

Zi-Xiang Ni,

Feng-Kai Huang,

Hong-Han Shuai,

Wen-Huang Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Bo-Lun and Ni, Zi-Xiang and Huang, Feng-Kai and Shuai, Hong-Han and Cheng, Wen-Huang}, title = {When Anchors Meet Cold Diffusion: A Multi-Stage Approach to Lane Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27917-27926} }
Dream-to-Recon: Monocular 3D Reconstruction with Diffusion-Depth Distillation from Single Images: Philipp Wulff,

Felix Wimbauer,

Dominik Muhle,

Daniel Cremers; [pdf] [supp]
[bibtex]
@InProceedings{Wulff_2025_ICCV, author = {Wulff, Philipp and Wimbauer, Felix and Muhle, Dominik and Cremers, Daniel}, title = {Dream-to-Recon: Monocular 3D Reconstruction with Diffusion-Depth Distillation from Single Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9352-9362} }
Breaking Grid Constraints: Dynamic Graph Reconstruction Network for Multi-organ Segmentation: Junhao Xiao,

Yang Wei,

Jingyu Wang,

Yongchao Wang,

Xiuli Bi,

Bin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2025_ICCV, author = {Xiao, Junhao and Wei, Yang and Wang, Jingyu and Wang, Yongchao and Bi, Xiuli and Xiao, Bin}, title = {Breaking Grid Constraints: Dynamic Graph Reconstruction Network for Multi-organ Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24413-24422} }
SplArt: Articulation Estimation and Part-Level Reconstruction with 3D Gaussian Splatting: Shengjie Lin,

Jiading Fang,

Muhammad Zubair Irshad,

Vitor Campagnolo Guizilini,

Rares Andrei Ambrus,

Greg Shakhnarovich,

Matthew R. Walter; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Shengjie and Fang, Jiading and Irshad, Muhammad Zubair and Guizilini, Vitor Campagnolo and Ambrus, Rares Andrei and Shakhnarovich, Greg and Walter, Matthew R.}, title = {SplArt: Articulation Estimation and Part-Level Reconstruction with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8841-8851} }
FedXDS: Leveraging Model Attribution Methods to counteract Data Heterogeneity in Federated Learning: Maximilian Andreas Hoefler,

Karsten Mueller,

Wojciech Samek; [pdf] [supp]
[bibtex]
@InProceedings{Hoefler_2025_ICCV, author = {Hoefler, Maximilian Andreas and Mueller, Karsten and Samek, Wojciech}, title = {FedXDS: Leveraging Model Attribution Methods to counteract Data Heterogeneity in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4572-4581} }
Retinex-MEF: Retinex-based Glare Effects Aware Unsupervised Multi-Exposure Image Fusion: Haowen Bai,

Jiangshe Zhang,

Zixiang Zhao,

Lilun Deng,

Yukun Cui,

Shuang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2025_ICCV, author = {Bai, Haowen and Zhang, Jiangshe and Zhao, Zixiang and Deng, Lilun and Cui, Yukun and Xu, Shuang}, title = {Retinex-MEF: Retinex-based Glare Effects Aware Unsupervised Multi-Exposure Image Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7251-7261} }
Structure Matters: Revisiting Boundary Refinement in Video Object Segmentation: Guanyi Qin,

Ziyue Wang,

Daiyun Shen,

Haofeng Liu,

Hantao Zhou,

Junde Wu,

Runze Hu,

Yueming Jin; [pdf] [arXiv]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Guanyi and Wang, Ziyue and Shen, Daiyun and Liu, Haofeng and Zhou, Hantao and Wu, Junde and Hu, Runze and Jin, Yueming}, title = {Structure Matters: Revisiting Boundary Refinement in Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14431-14442} }
Bilateral Collaboration with Large Vision-Language Models for Open Vocabulary Human-Object Interaction Detection: Yupeng Hu,

Changxing Ding,

Chang Sun,

Shaoli Huang,

Xiangmin Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Yupeng and Ding, Changxing and Sun, Chang and Huang, Shaoli and Xu, Xiangmin}, title = {Bilateral Collaboration with Large Vision-Language Models for Open Vocabulary Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20126-20136} }
GaSLight: Gaussian Splats for Spatially-Varying Lighting in HDR: Christophe Bolduc,

Yannick Hold-Geoffroy,

Jean-François Lalonde; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bolduc_2025_ICCV, author = {Bolduc, Christophe and Hold-Geoffroy, Yannick and Lalonde, Jean-Fran\c{c}ois}, title = {GaSLight: Gaussian Splats for Spatially-Varying Lighting in HDR}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29120-29130} }
Hate in Plain Sight: On the Risks of Moderating AI-Generated Hateful Illusions: Yiting Qu,

Ziqing Yang,

Yihan Ma,

Michael Backes,

Savvas Zannettou,

Yang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_ICCV, author = {Qu, Yiting and Yang, Ziqing and Ma, Yihan and Backes, Michael and Zannettou, Savvas and Zhang, Yang}, title = {Hate in Plain Sight: On the Risks of Moderating AI-Generated Hateful Illusions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19617-19627} }
Steering Guidance for Personalized Text-to-Image Diffusion Models: Sunghyun Park,

Seokeon Choi,

Hyoungwoo Park,

Sungrack Yun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Sunghyun and Choi, Seokeon and Park, Hyoungwoo and Yun, Sungrack}, title = {Steering Guidance for Personalized Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15907-15916} }
Improving Rectified Flow with Boundary Conditions: Xixi Hu,

Runlong Liao,

Keyang Xu,

Bo Liu,

Yeqing Li,

Eugene Ie,

Hongliang Fei,

Qiang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Xixi and Liao, Runlong and Xu, Keyang and Liu, Bo and Li, Yeqing and Ie, Eugene and Fei, Hongliang and Liu, Qiang}, title = {Improving Rectified Flow with Boundary Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18177-18186} }
ATAS: Any-to-Any Self-Distillation for Enhanced Open-Vocabulary Dense Prediction: Juan Yeo,

Soonwoo Cha,

Jiwoo Song,

Hyunbin Jin,

Taesup Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yeo_2025_ICCV, author = {Yeo, Juan and Cha, Soonwoo and Song, Jiwoo and Jin, Hyunbin and Kim, Taesup}, title = {ATAS: Any-to-Any Self-Distillation for Enhanced Open-Vocabulary Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20390-20400} }
Multimodal Prompt Alignment for Facial Expression Recognition: Fuyan Ma,

Yiran He,

Bin Sun,

Shutao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Fuyan and He, Yiran and Sun, Bin and Li, Shutao}, title = {Multimodal Prompt Alignment for Facial Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12581-12591} }
Laboring on less labors: RPCA Paradigm for Pan-sharpening: Honghui Xu,

Chuangjie Fang,

Yibin Wang,

Jie Wu,

Jianwei Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Honghui and Fang, Chuangjie and Wang, Yibin and Wu, Jie and Zheng, Jianwei}, title = {Laboring on less labors: RPCA Paradigm for Pan-sharpening}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11393-11402} }
MVQA: Mamba with Unified Sampling for Efficient Video Quality Assessment: Yachun Mi,

Yu Li,

Weicheng Meng,

Chaofeng Chen,

Chen Hui,

Shaohui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mi_2025_ICCV, author = {Mi, Yachun and Li, Yu and Meng, Weicheng and Chen, Chaofeng and Hui, Chen and Liu, Shaohui}, title = {MVQA: Mamba with Unified Sampling for Efficient Video Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18498-18509} }
Semantic-guided Camera Ray Regression for Visual Localization: Yesheng Zhang,

Xu Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yesheng and Zhao, Xu}, title = {Semantic-guided Camera Ray Regression for Visual Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25639-25648} }
HAMSt3R: Human-Aware Multi-view Stereo 3D Reconstruction: Sara Rojas,

Matthieu Armando,

Bernard Ghanem,

Philippe Weinzaepfel,

Vincent Leroy,

Grégory Rogez; [pdf] [supp]
[bibtex]
@InProceedings{Rojas_2025_ICCV, author = {Rojas, Sara and Armando, Matthieu and Ghanem, Bernard and Weinzaepfel, Philippe and Leroy, Vincent and Rogez, Gr\'egory}, title = {HAMSt3R: Human-Aware Multi-view Stereo 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5027-5037} }
TrajectoryCrafter: Redirecting Camera Trajectory for Monocular Videos via Diffusion Models: Mark Yu,

Wenbo Hu,

Jinbo Xing,

Ying Shan; [pdf] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Mark and Hu, Wenbo and Xing, Jinbo and Shan, Ying}, title = {TrajectoryCrafter: Redirecting Camera Trajectory for Monocular Videos via Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {100-111} }
AV-Flow: Transforming Text to Audio-Visual Human-like Interactions: Aggelina Chatziagapi,

Louis-Philippe Morency,

Hongyu Gong,

Michael Zollhöfer,

Dimitris Samaras,

Alexander Richard; [pdf] [supp]
[bibtex]
@InProceedings{Chatziagapi_2025_ICCV, author = {Chatziagapi, Aggelina and Morency, Louis-Philippe and Gong, Hongyu and Zollh\"ofer, Michael and Samaras, Dimitris and Richard, Alexander}, title = {AV-Flow: Transforming Text to Audio-Visual Human-like Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14270-14282} }
Multi-View Slot Attention Using Paraphrased Texts for Face Anti-Spoofing: Jeongmin Yu,

Susang Kim,

Kisu Lee,

Taekyoung Kwon,

Won-Yong Shin,

Ha Young Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Jeongmin and Kim, Susang and Lee, Kisu and Kwon, Taekyoung and Shin, Won-Yong and Kim, Ha Young}, title = {Multi-View Slot Attention Using Paraphrased Texts for Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21117-21128} }
Self-Supervised Monocular 4D Scene Reconstruction for Egocentric Videos: Chengbo Yuan,

Geng Chen,

Li Yi,

Yang Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Chengbo and Chen, Geng and Yi, Li and Gao, Yang}, title = {Self-Supervised Monocular 4D Scene Reconstruction for Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8863-8874} }
What we need is explicit controllability: Training 3D gaze estimator using only facial images: Tingwei Li,

Jun Bao,

Zhenzhong Kuang,

Buyu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Tingwei and Bao, Jun and Kuang, Zhenzhong and Liu, Buyu}, title = {What we need is explicit controllability: Training 3D gaze estimator using only facial images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11414-11424} }
WaveMamba: Wavelet-Driven Mamba Fusion for RGB-Infrared Object Detection: Haodong Zhu,

Wenhao Dong,

Linlin Yang,

Hong Li,

Yuguang Yang,

Yangyang Ren,

Qingcheng Zhu,

Zichao Feng,

Changbai Li,

Shaohui Lin,

Runqi Wang,

Xiaoyan Luo,

Baochang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Haodong and Dong, Wenhao and Yang, Linlin and Li, Hong and Yang, Yuguang and Ren, Yangyang and Zhu, Qingcheng and Feng, Zichao and Li, Changbai and Lin, Shaohui and Wang, Runqi and Luo, Xiaoyan and Zhang, Baochang}, title = {WaveMamba: Wavelet-Driven Mamba Fusion for RGB-Infrared Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11219-11229} }
CoST: Efficient Collaborative Perception From Unified Spatiotemporal Perspective: Zongheng Tang,

Yi Liu,

Yifan Sun,

Yulu Gao,

Jinyu Chen,

Runsheng Xu,

Si Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_ICCV, author = {Tang, Zongheng and Liu, Yi and Sun, Yifan and Gao, Yulu and Chen, Jinyu and Xu, Runsheng and Liu, Si}, title = {CoST: Efficient Collaborative Perception From Unified Spatiotemporal Perspective}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1120-1129} }
GCRayDiffusion: Pose-Free Surface Reconstruction via Geometric Consistent Ray Diffusion: Li-Heng Chen,

Zi-Xin Zou,

Chang Liu,

Tianjiao Jing,

Yan-Pei Cao,

Shi-Sheng Huang,

Hongbo Fu,

Hua Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Li-Heng and Zou, Zi-Xin and Liu, Chang and Jing, Tianjiao and Cao, Yan-Pei and Huang, Shi-Sheng and Fu, Hongbo and Huang, Hua}, title = {GCRayDiffusion: Pose-Free Surface Reconstruction via Geometric Consistent Ray Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25335-25345} }
AU-Blendshape for Fine-grained Stylized 3D Facial Expression Manipulation: Hao Li,

Ju Dai,

Feng Zhou,

Kaida Ning,

Lei Li,

Junjun Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Hao and Dai, Ju and Zhou, Feng and Ning, Kaida and Li, Lei and Pan, Junjun}, title = {AU-Blendshape for Fine-grained Stylized 3D Facial Expression Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12605-12614} }
Noise-Modeled Diffusion Models for Low-Light Spike Image Restoration: Ruonan Liu,

Lin Zhu,

Xijie Xiang,

Lizhi Wang,

Hua Huang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Ruonan and Zhu, Lin and Xiang, Xijie and Wang, Lizhi and Huang, Hua}, title = {Noise-Modeled Diffusion Models for Low-Light Spike Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4080-4089} }
Rethinking Discrete Tokens: Treating Them as Conditions for Continuous Autoregressive Image Synthesis: Peng Zheng,

Junke Wang,

Yi Chang,

Yizhou Yu,

Rui Ma,

Zuxuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Peng and Wang, Junke and Chang, Yi and Yu, Yizhou and Ma, Rui and Wu, Zuxuan}, title = {Rethinking Discrete Tokens: Treating Them as Conditions for Continuous Autoregressive Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17390-17400} }
MotionLab: Unified Human Motion Generation and Editing via the Motion-Condition-Motion Paradigm: Ziyan Guo,

Zeyu Hu,

De Wen Soh,

Na Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Ziyan and Hu, Zeyu and Soh, De Wen and Zhao, Na}, title = {MotionLab: Unified Human Motion Generation and Editing via the Motion-Condition-Motion Paradigm}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13869-13879} }
Continual Multiple Instance Learning with Enhanced Localization for Histopathological Whole Slide Image Analysis: Byung Hyun Lee,

Wongi Jeong,

Woojae Han,

Kyoungbun Lee,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Byung Hyun and Jeong, Wongi and Han, Woojae and Lee, Kyoungbun and Chun, Se Young}, title = {Continual Multiple Instance Learning with Enhanced Localization for Histopathological Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23232-23242} }
DisCoRD: Discrete Tokens to Continuous Motion via Rectified Flow Decoding: Jungbin Cho,

Junwan Kim,

Jisoo Kim,

Minseo Kim,

Mingu Kang,

Sungeun Hong,

Tae-Hyun Oh,

Youngjae Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cho_2025_ICCV, author = {Cho, Jungbin and Kim, Junwan and Kim, Jisoo and Kim, Minseo and Kang, Mingu and Hong, Sungeun and Oh, Tae-Hyun and Yu, Youngjae}, title = {DisCoRD: Discrete Tokens to Continuous Motion via Rectified Flow Decoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14602-14612} }
TREAD: Token Routing for Efficient Architecture-agnostic Diffusion Training: Felix Krause,

Timy Phan,

Ming Gui,

Stefan Andreas Baumann,

Vincent Tao Hu,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Krause_2025_ICCV, author = {Krause, Felix and Phan, Timy and Gui, Ming and Baumann, Stefan Andreas and Hu, Vincent Tao and Ommer, Bj\"orn}, title = {TREAD: Token Routing for Efficient Architecture-agnostic Diffusion Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15703-15713} }
Leveraging Debiased Cross-modal Attention Maps and Code-based Reasoning for Zero-shot Referring Expression Comprehension: Juntao Chen,

Wen Shen,

Zhihua Wei,

Lijun Sun,

Hongyun Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Juntao and Shen, Wen and Wei, Zhihua and Sun, Lijun and Zhang, Hongyun}, title = {Leveraging Debiased Cross-modal Attention Maps and Code-based Reasoning for Zero-shot Referring Expression Comprehension}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20413-20424} }
Controllable Weather Synthesis and Removal with Video Diffusion Models: Chih-Hao Lin,

Zian Wang,

Ruofan Liang,

Yuxuan Zhang,

Sanja Fidler,

Shenlong Wang,

Zan Gojcic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Chih-Hao and Wang, Zian and Liang, Ruofan and Zhang, Yuxuan and Fidler, Sanja and Wang, Shenlong and Gojcic, Zan}, title = {Controllable Weather Synthesis and Removal with Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13580-13591} }
Bias in Gender Bias Benchmarks: How Spurious Features Distort Evaluation: Yusuke Hirota,

Ryo Hachiuma,

Boyi Li,

Ximing Lu,

Michael Ross Boone,

Boris Ivanovic,

Yejin Choi,

Marco Pavone,

Yu-Chiang Frank Wang,

Noa Garcia,

Yuta Nakashima,

Chao-Han Huck Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hirota_2025_ICCV, author = {Hirota, Yusuke and Hachiuma, Ryo and Li, Boyi and Lu, Ximing and Boone, Michael Ross and Ivanovic, Boris and Choi, Yejin and Pavone, Marco and Wang, Yu-Chiang Frank and Garcia, Noa and Nakashima, Yuta and Yang, Chao-Han Huck}, title = {Bias in Gender Bias Benchmarks: How Spurious Features Distort Evaluation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8634-8644} }
Clink! Chop! Thud! - Learning Object Sounds from Real-World Interactions: Mengyu Yang,

Yiming Chen,

Haozheng Pei,

Siddhant Agarwal,

Arun Balajee Vasudevan,

James Hays; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Mengyu and Chen, Yiming and Pei, Haozheng and Agarwal, Siddhant and Vasudevan, Arun Balajee and Hays, James}, title = {Clink! Chop! Thud! - Learning Object Sounds from Real-World Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14549-14558} }
Instant GaussianImage: A Generalizable and Self-Adaptive Image Representation via 2D Gaussian Splatting: Zhaojie Zeng,

Yuesong Wang,

Tao Guan,

Chao Yang,

Lili Ju; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Zhaojie and Wang, Yuesong and Guan, Tao and Yang, Chao and Ju, Lili}, title = {Instant GaussianImage: A Generalizable and Self-Adaptive Image Representation via 2D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27896-27905} }
Learning to Inference Adaptively for Multimodal Large Language Models: Zhuoyan Xu,

Khoi Duc Nguyen,

Preeti Mukherjee,

Saurabh Bagchi,

Somali Chaterji,

Yingyu Liang,

Yin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Zhuoyan and Nguyen, Khoi Duc and Mukherjee, Preeti and Bagchi, Saurabh and Chaterji, Somali and Liang, Yingyu and Li, Yin}, title = {Learning to Inference Adaptively for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3552-3563} }
HazeFlow: Revisit Haze Physical Model as ODE and Non-Homogeneous Haze Generation for Real-World Dehazing: Junseong Shin,

Seungwoo Chung,

Yunjeong Yang,

Tae Hyun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shin_2025_ICCV, author = {Shin, Junseong and Chung, Seungwoo and Yang, Yunjeong and Kim, Tae Hyun}, title = {HazeFlow: Revisit Haze Physical Model as ODE and Non-Homogeneous Haze Generation for Real-World Dehazing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6263-6272} }
AcZeroTS: Active Learning for Zero-shot Tissue Segmentation in Pathology Images: Jiao Tang,

Junjie Zhou,

Bo Qian,

Peng Wan,

Yingli Zuo,

Wei Shao,

Daoqiang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2025_ICCV, author = {Tang, Jiao and Zhou, Junjie and Qian, Bo and Wan, Peng and Zuo, Yingli and Shao, Wei and Zhang, Daoqiang}, title = {AcZeroTS: Active Learning for Zero-shot Tissue Segmentation in Pathology Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23508-23518} }
FaceShield: Defending Facial Image against Deepfake Threats: Jaehwan Jeong,

Sumin In,

Sieun Kim,

Hannie Shin,

Jongheon Jeong,

Sang Ho Yoon,

Jaewook Chung,

Sangpil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_ICCV, author = {Jeong, Jaehwan and In, Sumin and Kim, Sieun and Shin, Hannie and Jeong, Jongheon and Yoon, Sang Ho and Chung, Jaewook and Kim, Sangpil}, title = {FaceShield: Defending Facial Image against Deepfake Threats}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10364-10374} }
SEAL: Semantic Aware Image Watermarking: Kasra Arabi,

R. Teal Witter,

Chinmay Hegde,

Niv Cohen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Arabi_2025_ICCV, author = {Arabi, Kasra and Witter, R. Teal and Hegde, Chinmay and Cohen, Niv}, title = {SEAL: Semantic Aware Image Watermarking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16196-16205} }
Modeling Saliency Dataset Bias: Matthias Kümmerer,

Harneet Singh Khanuja,

Matthias Bethge; [pdf] [supp]
[bibtex]
@InProceedings{Kummerer_2025_ICCV, author = {K\"ummerer, Matthias and Khanuja, Harneet Singh and Bethge, Matthias}, title = {Modeling Saliency Dataset Bias}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22077-22088} }
MCOP: Multi-UAV Collaborative Occupancy Prediction: Zefu Lin,

Wenbo Chen,

Xiaojuan Jin,

Yuran Yang,

Lue Fan,

Yixin Zhang,

Yufeng Zhang,

Zhaoxiang Zhang; [pdf]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Zefu and Chen, Wenbo and Jin, Xiaojuan and Yang, Yuran and Fan, Lue and Zhang, Yixin and Zhang, Yufeng and Zhang, Zhaoxiang}, title = {MCOP: Multi-UAV Collaborative Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27242-27251} }
Forecasting Continuous Non-Conservative Dynamical Systems in SO(3): Lennart Bastian,

Mohammad Rashed,

Nassir Navab,

Tolga Birdal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bastian_2025_ICCV, author = {Bastian, Lennart and Rashed, Mohammad and Navab, Nassir and Birdal, Tolga}, title = {Forecasting Continuous Non-Conservative Dynamical Systems in SO(3)}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14845-14855} }
HoliTracer: Holistic Vectorization of Geographic Objects from Large-Size Remote Sensing Imagery: Yu Wang,

Bo Dang,

Wanchun Li,

Wei Chen,

Yansheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yu and Dang, Bo and Li, Wanchun and Chen, Wei and Li, Yansheng}, title = {HoliTracer: Holistic Vectorization of Geographic Objects from Large-Size Remote Sensing Imagery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8482-8491} }
Tensor-aggregated LoRA in Federated Fine-tuning: Zhixuan Li,

Binqian Xu,

Xiangbo Shu,

Jiachao Zhang,

Yazhou Yao,

Guo-Sen Xie,

Jinhui Tang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhixuan and Xu, Binqian and Shu, Xiangbo and Zhang, Jiachao and Yao, Yazhou and Xie, Guo-Sen and Tang, Jinhui}, title = {Tensor-aggregated LoRA in Federated Fine-tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1058-1067} }
TextSSR: Diffusion-based Data Synthesis for Scene Text Recognition: Xingsong Ye,

Yongkun Du,

Yunbo Tao,

Zhineng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_ICCV, author = {Ye, Xingsong and Du, Yongkun and Tao, Yunbo and Chen, Zhineng}, title = {TextSSR: Diffusion-based Data Synthesis for Scene Text Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17464-17473} }
OVA-Fields: Weakly Supervised Open-Vocabulary Affordance Fields for Robot Operational Part Detection: Heng Su,

Mengying Xie,

Nieqing Cao,

Yan Ding,

Beichen Shao,

Xianlei Long,

Fuqiang Gu,

Chao Chen; [pdf] [supp]
[bibtex]
@InProceedings{Su_2025_ICCV, author = {Su, Heng and Xie, Mengying and Cao, Nieqing and Ding, Yan and Shao, Beichen and Long, Xianlei and Gu, Fuqiang and Chen, Chao}, title = {OVA-Fields: Weakly Supervised Open-Vocabulary Affordance Fields for Robot Operational Part Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6385-6395} }
Tree Skeletonization from 3D Point Clouds by Denoising Diffusion: Elias Ariel Marks,

Lucas Nunes,

Federico Magistri,

Matteo Sodano,

Rodrigo Marcuzzi,

Lars Zimmermann,

Jens Behley,

Cyrill Stachniss; [pdf] [supp]
[bibtex]
@InProceedings{Marks_2025_ICCV, author = {Marks, Elias Ariel and Nunes, Lucas and Magistri, Federico and Sodano, Matteo and Marcuzzi, Rodrigo and Zimmermann, Lars and Behley, Jens and Stachniss, Cyrill}, title = {Tree Skeletonization from 3D Point Clouds by Denoising Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27607-27617} }
DiMPLe - Disentangled Multi-Modal Prompt Learning: Enhancing Out-Of-Distribution Alignment with Invariant and Spurious Feature Separation: Umaima Rahman,

Mohammad Yaqub,

Dwarikanath Mahapatra; [pdf] [supp]
[bibtex]
@InProceedings{Rahman_2025_ICCV, author = {Rahman, Umaima and Yaqub, Mohammad and Mahapatra, Dwarikanath}, title = {DiMPLe - Disentangled Multi-Modal Prompt Learning: Enhancing Out-Of-Distribution Alignment with Invariant and Spurious Feature Separation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1634-1643} }
From Panels to Prose: Generating Literary Narratives from Comics: Ragav Sachdeva,

Andrew Zisserman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sachdeva_2025_ICCV, author = {Sachdeva, Ragav and Zisserman, Andrew}, title = {From Panels to Prose: Generating Literary Narratives from Comics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21864-21873} }
Collaborative Instance Object Navigation: Leveraging Uncertainty-Awareness to Minimize Human-Agent Dialogues: Francesco Taioli,

Edoardo Zorzi,

Gianni Franchi,

Alberto Castellini,

Alessandro Farinelli,

Marco Cristani,

Yiming Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Taioli_2025_ICCV, author = {Taioli, Francesco and Zorzi, Edoardo and Franchi, Gianni and Castellini, Alberto and Farinelli, Alessandro and Cristani, Marco and Wang, Yiming}, title = {Collaborative Instance Object Navigation: Leveraging Uncertainty-Awareness to Minimize Human-Agent Dialogues}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18781-18792} }
VMem: Consistent Interactive Video Scene Generation with Surfel-Indexed View Memory: Runjia Li,

Philip Torr,

Andrea Vedaldi,

Tomas Jakab; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Runjia and Torr, Philip and Vedaldi, Andrea and Jakab, Tomas}, title = {VMem: Consistent Interactive Video Scene Generation with Surfel-Indexed View Memory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25690-25699} }
Learning Null Geodesics for Gravitational Lensing Rendering in General Relativity: Mingyuan Sun,

Zheng Fang,

Jiaxu Wang,

Kunyi Zhang,

Qiang Zhang,

Renjing Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Mingyuan and Fang, Zheng and Wang, Jiaxu and Zhang, Kunyi and Zhang, Qiang and Xu, Renjing}, title = {Learning Null Geodesics for Gravitational Lensing Rendering in General Relativity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28473-28482} }
X-Capture: An Open-Source Portable Device for Multi-Sensory Learning: Samuel Clarke,

Suzannah Wistreich,

Yanjie Ze,

Jiajun Wu; [pdf] [supp]
[bibtex]
@InProceedings{Clarke_2025_ICCV, author = {Clarke, Samuel and Wistreich, Suzannah and Ze, Yanjie and Wu, Jiajun}, title = {X-Capture: An Open-Source Portable Device for Multi-Sensory Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6436-6446} }
Towards Accurate and Efficient 3D Object Detection for Autonomous Driving: A Mixture of Experts Computing System on Edge: Linshen Liu,

Boyan Su,

Junyue Jiang,

Guanlin Wu,

Cong Guo,

Ceyu Xu,

Hao Frank Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Linshen and Su, Boyan and Jiang, Junyue and Wu, Guanlin and Guo, Cong and Xu, Ceyu and Yang, Hao Frank}, title = {Towards Accurate and Efficient 3D Object Detection for Autonomous Driving: A Mixture of Experts Computing System on Edge}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25903-25913} }
UrbanLLaVA: A Multi-modal Large Language Model for Urban Intelligence: Jie Feng,

Shengyuan Wang,

Tianhui Liu,

Yanxin Xi,

Yong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Jie and Wang, Shengyuan and Liu, Tianhui and Xi, Yanxin and Li, Yong}, title = {UrbanLLaVA: A Multi-modal Large Language Model for Urban Intelligence}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6209-6219} }
TITAN-Guide: Taming Inference-Time Alignment for Guided Text-to-Video Diffusion Models: Christian Simon,

Masato Ishii,

Akio Hayakawa,

Zhi Zhong,

Shusuke Takahashi,

Takashi Shibuya,

Yuki Mitsufuji; [pdf] [supp]
[bibtex]
@InProceedings{Simon_2025_ICCV, author = {Simon, Christian and Ishii, Masato and Hayakawa, Akio and Zhong, Zhi and Takahashi, Shusuke and Shibuya, Takashi and Mitsufuji, Yuki}, title = {TITAN-Guide: Taming Inference-Time Alignment for Guided Text-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16662-16671} }
ART: Adaptive Relation Tuning for Generalized Relation Prediction: Gopika Sudhakaran,

Hikaru Shindo,

Patrick Schramowski,

Simone Schaub-Meyer,

Kristian Kersting,

Stefan Roth; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sudhakaran_2025_ICCV, author = {Sudhakaran, Gopika and Shindo, Hikaru and Schramowski, Patrick and Schaub-Meyer, Simone and Kersting, Kristian and Roth, Stefan}, title = {ART: Adaptive Relation Tuning for Generalized Relation Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16323-16332} }
VLM4D: Towards Spatiotemporal Awareness in Vision Language Models: Shijie Zhou,

Alexander Vilesov,

Xuehai He,

Ziyu Wan,

Shuwang Zhang,

Aditya Nagachandra,

Di Chang,

Dongdong Chen,

Xin Eric Wang,

Achuta Kadambi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Shijie and Vilesov, Alexander and He, Xuehai and Wan, Ziyu and Zhang, Shuwang and Nagachandra, Aditya and Chang, Di and Chen, Dongdong and Wang, Xin Eric and Kadambi, Achuta}, title = {VLM4D: Towards Spatiotemporal Awareness in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8600-8612} }
DimensionX: Create Any 3D and 4D Scenes from a Single Image with Decoupled Video Diffusion: Wenqiang Sun,

Shuo Chen,

Fangfu Liu,

Zilong Chen,

Yueqi Duan,

Jun Zhu,

Jun Zhang,

Yikai Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Wenqiang and Chen, Shuo and Liu, Fangfu and Chen, Zilong and Duan, Yueqi and Zhu, Jun and Zhang, Jun and Wang, Yikai}, title = {DimensionX: Create Any 3D and 4D Scenes from a Single Image with Decoupled Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13695-13706} }
Uncertainty-Driven Expert Control: Enhancing the Reliability of Medical Vision-Language Models: Xiao Liang,

Di Wang,

Zhicheng Jiao,

Ronghan Li,

Pengfei Yang,

Quan Wang,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Xiao and Wang, Di and Jiao, Zhicheng and Li, Ronghan and Yang, Pengfei and Wang, Quan and Chua, Tat-Seng}, title = {Uncertainty-Driven Expert Control: Enhancing the Reliability of Medical Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21144-21154} }
Mitigating Geometric Degradation in Fast DownSampling via FastAdapter for Point Cloud Segmentation: Shuofeng Sun,

Haibin Yan; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Shuofeng and Yan, Haibin}, title = {Mitigating Geometric Degradation in Fast DownSampling via FastAdapter for Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25983-25992} }
Dual-S3D: Hierarchical Dual-Path Selective SSM-CNN for High-Fidelity Implicit Reconstruction: Luoxi Zhang,

Pragyan Shrestha,

Yu Zhou,

Chun Xie,

Itaru Kitahara; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Luoxi and Shrestha, Pragyan and Zhou, Yu and Xie, Chun and Kitahara, Itaru}, title = {Dual-S3D: Hierarchical Dual-Path Selective SSM-CNN for High-Fidelity Implicit Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25104-25113} }
SparseFlex: High-Resolution and Arbitrary-Topology 3D Shape Modeling: Xianglong He,

Zi-Xin Zou,

Chia-Hao Chen,

Yuan-Chen Guo,

Ding Liang,

Chun Yuan,

Wanli Ouyang,

Yan-Pei Cao,

Yangguang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Xianglong and Zou, Zi-Xin and Chen, Chia-Hao and Guo, Yuan-Chen and Liang, Ding and Yuan, Chun and Ouyang, Wanli and Cao, Yan-Pei and Li, Yangguang}, title = {SparseFlex: High-Resolution and Arbitrary-Topology 3D Shape Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14822-14833} }
Multimodal LLM Guided Exploration and Active Mapping using Fisher Information: Wen Jiang,

Boshu Lei,

Katrina Ashton,

Kostas Daniilidis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Wen and Lei, Boshu and Ashton, Katrina and Daniilidis, Kostas}, title = {Multimodal LLM Guided Exploration and Active Mapping using Fisher Information}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5392-5404} }
Lidar Waveforms are Worth 40x128x33 Words: Dominik Scheuble,

Hanno Holzhüter,

Steven Peters,

Mario Bijelic,

Felix Heide; [pdf] [supp]
[bibtex]
@InProceedings{Scheuble_2025_ICCV, author = {Scheuble, Dominik and Holzh\"uter, Hanno and Peters, Steven and Bijelic, Mario and Heide, Felix}, title = {Lidar Waveforms are Worth 40x128x33 Words}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28913-28924} }
LoRA.rar: Learning to Merge LoRAs via Hypernetworks for Subject-Style Conditioned Image Generation: Donald Shenaj,

Ondrej Bohdal,

Mete Ozay,

Pietro Zanuttigh,

Umberto Michieli; [pdf] [supp]
[bibtex]
@InProceedings{Shenaj_2025_ICCV, author = {Shenaj, Donald and Bohdal, Ondrej and Ozay, Mete and Zanuttigh, Pietro and Michieli, Umberto}, title = {LoRA.rar: Learning to Merge LoRAs via Hypernetworks for Subject-Style Conditioned Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16132-16142} }
X-Fusion: Introducing New Modality to Frozen Large Language Models: Sicheng Mo,

Thao Nguyen,

Xun Huang,

Siddharth Srinivasan Iyer,

Yijun Li,

Yuchen Liu,

Abhishek Tandon,

Eli Shechtman,

Krishna Kumar Singh,

Yong Jae Lee,

Bolei Zhou,

Yuheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Mo_2025_ICCV, author = {Mo, Sicheng and Nguyen, Thao and Huang, Xun and Iyer, Siddharth Srinivasan and Li, Yijun and Liu, Yuchen and Tandon, Abhishek and Shechtman, Eli and Singh, Krishna Kumar and Lee, Yong Jae and Zhou, Bolei and Li, Yuheng}, title = {X-Fusion: Introducing New Modality to Frozen Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {228-238} }
KOEnsAttack: Towards Efficient Data-Free Black-Box Adversarial Attacks via Knowledge-Orthogonalized Substitute Ensembles: Chaoyong Yang,

Jia-Li Yin,

Bin Chen,

Zhaozhe Hu,

Xiaolei Liu,

Wei Lin; [pdf]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Chaoyong and Yin, Jia-Li and Chen, Bin and Hu, Zhaozhe and Liu, Xiaolei and Lin, Wei}, title = {KOEnsAttack: Towards Efficient Data-Free Black-Box Adversarial Attacks via Knowledge-Orthogonalized Substitute Ensembles}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3101-3110} }
Multimodal Large Language Model-Guided ISP Hyperparameter Optimization with Dynamic Preference Learning: Xinyu Sun,

Zhikun Zhao,

Congyan Lang,

Bing Li,

Juan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Xinyu and Zhao, Zhikun and Lang, Congyan and Li, Bing and Wang, Juan}, title = {Multimodal Large Language Model-Guided ISP Hyperparameter Optimization with Dynamic Preference Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {437-446} }
SALAD -- Semantics-Aware Logical Anomaly Detection: Matic Fučka,

Vitjan Zavrtanik,

Danijel Skočaj; [pdf] [supp]
[bibtex]
@InProceedings{Fucka_2025_ICCV, author = {Fu\v{c}ka, Matic and Zavrtanik, Vitjan and Sko\v{c}aj, Danijel}, title = {SALAD -- Semantics-Aware Logical Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21843-21852} }
Federated Continuous Category Discovery and Learning: Lixu Wang,

Chenxi Liu,

Junfeng Guo,

Qingqing Ye,

Heng Huang,

Haibo Hu,

Wei Dong; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Lixu and Liu, Chenxi and Guo, Junfeng and Ye, Qingqing and Huang, Heng and Hu, Haibo and Dong, Wei}, title = {Federated Continuous Category Discovery and Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2429-2439} }
Motion Synthesis with Sparse and Flexible Keyjoint Control: Inwoo Hwang,

Jinseok Bae,

Donggeun Lim,

Young Min Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hwang_2025_ICCV, author = {Hwang, Inwoo and Bae, Jinseok and Lim, Donggeun and Kim, Young Min}, title = {Motion Synthesis with Sparse and Flexible Keyjoint Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13203-13213} }
EVOLVE: Event-Guided Deformable Feature Transfer and Dual-Memory Refinement for Low-Light Video Object Segmentation: Jong-Hyeon Baek,

Jiwon Oh,

Yeong Jun Koh; [pdf] [supp]
[bibtex]
@InProceedings{Baek_2025_ICCV, author = {Baek, Jong-Hyeon and Oh, Jiwon and Koh, Yeong Jun}, title = {EVOLVE: Event-Guided Deformable Feature Transfer and Dual-Memory Refinement for Low-Light Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11273-11282} }
ARGUS: Hallucination and Omission Evaluation in Video-LLMs: Ruchit Rawal,

Reza Shirkavand,

Heng Huang,

Gowthami Somepalli,

Tom Goldstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rawal_2025_ICCV, author = {Rawal, Ruchit and Shirkavand, Reza and Huang, Heng and Somepalli, Gowthami and Goldstein, Tom}, title = {ARGUS: Hallucination and Omission Evaluation in Video-LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20280-20290} }
DiTaiListener: Controllable High Fidelity Listener Video Generation with Diffusion: Maksim Siniukov,

Di Chang,

Minh Tran,

Hongkun Gong,

Ashutosh Chaubey,

Mohammad Soleymani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Siniukov_2025_ICCV, author = {Siniukov, Maksim and Chang, Di and Tran, Minh and Gong, Hongkun and Chaubey, Ashutosh and Soleymani, Mohammad}, title = {DiTaiListener: Controllable High Fidelity Listener Video Generation with Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11991-12001} }
Humans as a Calibration Pattern: Dynamic 3D Scene Reconstruction from Unsynchronized and Uncalibrated Videos: Changwoon Choi,

Jeongjun Kim,

Geonho Cha,

Minkwan Kim,

Dongyoon Wee,

Young Min Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2025_ICCV, author = {Choi, Changwoon and Kim, Jeongjun and Cha, Geonho and Kim, Minkwan and Wee, Dongyoon and Kim, Young Min}, title = {Humans as a Calibration Pattern: Dynamic 3D Scene Reconstruction from Unsynchronized and Uncalibrated Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6598-6608} }
ISP2HRNet: Learning to Reconstruct High Resolution Image from Irregularly Sampled Pixels via Hierarchical Gradient Learning: Yuanlin Wang,

Ruiqin Xiong,

Rui Zhao,

Jin Wang,

Xiaopeng Fan,

Tiejun Huang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuanlin and Xiong, Ruiqin and Zhao, Rui and Wang, Jin and Fan, Xiaopeng and Huang, Tiejun}, title = {ISP2HRNet: Learning to Reconstruct High Resolution Image from Irregularly Sampled Pixels via Hierarchical Gradient Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11547-11557} }
Group Inertial Poser: Multi-Person Pose and Global Translation from Sparse Inertial Sensors and Ultra-Wideband Ranging: Ying Xue,

Jiaxi Jiang,

Rayan Armani,

Dominik Hollidt,

Yi-Chi Liao,

Christian Holz; [pdf]
[bibtex]
@InProceedings{Xue_2025_ICCV, author = {Xue, Ying and Jiang, Jiaxi and Armani, Rayan and Hollidt, Dominik and Liao, Yi-Chi and Holz, Christian}, title = {Group Inertial Poser: Multi-Person Pose and Global Translation from Sparse Inertial Sensors and Ultra-Wideband Ranging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24910-24921} }
NormalLoc: Visual Localization on Textureless 3D Models using Surface Normals: Jiro Abe,

Gaku Nakano,

Kazumine Ogura; [pdf] [supp]
[bibtex]
@InProceedings{Abe_2025_ICCV, author = {Abe, Jiro and Nakano, Gaku and Ogura, Kazumine}, title = {NormalLoc: Visual Localization on Textureless 3D Models using Surface Normals}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25421-25430} }
Light-A-Video: Training-free Video Relighting via Progressive Light Fusion: Yujie Zhou,

Jiazi Bu,

Pengyang Ling,

Pan Zhang,

Tong Wu,

Qidong Huang,

Jinsong Li,

Xiaoyi Dong,

Yuhang Zang,

Yuhang Cao,

Anyi Rao,

Jiaqi Wang,

Li Niu; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Yujie and Bu, Jiazi and Ling, Pengyang and Zhang, Pan and Wu, Tong and Huang, Qidong and Li, Jinsong and Dong, Xiaoyi and Zang, Yuhang and Cao, Yuhang and Rao, Anyi and Wang, Jiaqi and Niu, Li}, title = {Light-A-Video: Training-free Video Relighting via Progressive Light Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13315-13325} }
Decoupled Multi-Predictor Optimization for Inference-Efficient Model Tuning: Liwei Luo,

Shuaitengyuan Li,

Dongwei Ren,

Qilong Wang,

Pengfei Zhu,

Qinghua Hu; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Liwei and Li, Shuaitengyuan and Ren, Dongwei and Wang, Qilong and Zhu, Pengfei and Hu, Qinghua}, title = {Decoupled Multi-Predictor Optimization for Inference-Efficient Model Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3628-3638} }
Deterministic Object Pose Confidence Region Estimation: Jinghao Wang,

Zhang Li,

Zi Wang,

Banglei Guan,

Yang Shang,

Qifeng Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jinghao and Li, Zhang and Wang, Zi and Guan, Banglei and Shang, Yang and Yu, Qifeng}, title = {Deterministic Object Pose Confidence Region Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14866-14875} }
MedVSR: Medical Video Super-Resolution with Cross State-Space Propagation: Xinyu Liu,

Guolei Sun,

Cheng Wang,

Yixuan Yuan,

Ender Konukoglu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Xinyu and Sun, Guolei and Wang, Cheng and Yuan, Yixuan and Konukoglu, Ender}, title = {MedVSR: Medical Video Super-Resolution with Cross State-Space Propagation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11697-11707} }
MPBR: Multimodal Progressive Bidirectional Reasoning for Open-Set Fine-Grained Recognition: Junfu Tan,

Peiguang Jing,

Yu Zhu,

Yu Liu; [pdf]
[bibtex]
@InProceedings{Tan_2025_ICCV, author = {Tan, Junfu and Jing, Peiguang and Zhu, Yu and Liu, Yu}, title = {MPBR: Multimodal Progressive Bidirectional Reasoning for Open-Set Fine-Grained Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1282-1291} }
Domain-aware Category-level Geometry Learning Segmentation for 3D Point Clouds: Pei He,

Lingling Li,

Licheng Jiao,

Ronghua Shang,

Fang Liu,

Shuang Wang,

Xu Liu,

Wenping Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Pei and Li, Lingling and Jiao, Licheng and Shang, Ronghua and Liu, Fang and Wang, Shuang and Liu, Xu and Ma, Wenping}, title = {Domain-aware Category-level Geometry Learning Segmentation for 3D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28324-28333} }
Di[M]O: Distilling Masked Diffusion Models into One-step Generator: Yuanzhi Zhu,

Xi Wang,

Stéphane Lathuilière,

Vicky Kalogeiton; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yuanzhi and Wang, Xi and Lathuili\`ere, St\'ephane and Kalogeiton, Vicky}, title = {Di[M]O: Distilling Masked Diffusion Models into One-step Generator}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18606-18618} }
TerraMind: Large-Scale Generative Multimodality for Earth Observation: Johannes Jakubik,

Felix Yang,

Benedikt Blumenstiel,

Erik Scheurer,

Rocco Sedona,

Stefano Maurogiovanni,

Jente Bosmans,

Nikolaos Dionelis,

Valerio Marsocci,

Niklas Kopp,

Rahul Ramachandran,

Paolo Fraccaro,

Thomas Brunschwiler,

Gabriele Cavallaro,

Juan Bernabe-Moreno,

Nicolas Longépé; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jakubik_2025_ICCV, author = {Jakubik, Johannes and Yang, Felix and Blumenstiel, Benedikt and Scheurer, Erik and Sedona, Rocco and Maurogiovanni, Stefano and Bosmans, Jente and Dionelis, Nikolaos and Marsocci, Valerio and Kopp, Niklas and Ramachandran, Rahul and Fraccaro, Paolo and Brunschwiler, Thomas and Cavallaro, Gabriele and Bernabe-Moreno, Juan and Long\'ep\'e, Nicolas}, title = {TerraMind: Large-Scale Generative Multimodality for Earth Observation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7383-7394} }
An Efficient Post-hoc Framework for Reducing Task Discrepancy of Text Encoders for Composed Image Retrieval: Jaeseok Byun,

Seokhyeon Jeong,

Wonjae Kim,

Sanghyuk Chun,

Taesup Moon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Byun_2025_ICCV, author = {Byun, Jaeseok and Jeong, Seokhyeon and Kim, Wonjae and Chun, Sanghyuk and Moon, Taesup}, title = {An Efficient Post-hoc Framework for Reducing Task Discrepancy of Text Encoders for Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3895-3904} }
Q-Norm: Robust Representation Learning via Quality-Adaptive Normalization: Lanning Zhang,

Ying Zhou,

Fei Gao,

Ziyun Li,

Maoying Qiao,

Jinlan Xu,

Nannan Wang; [pdf]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Lanning and Zhou, Ying and Gao, Fei and Li, Ziyun and Qiao, Maoying and Xu, Jinlan and Wang, Nannan}, title = {Q-Norm: Robust Representation Learning via Quality-Adaptive Normalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13901-13911} }
VoiceCraft-Dub: Automated Video Dubbing with Neural Codec Language Models: Kim Sung-Bin,

Jeongsoo Choi,

Puyuan Peng,

Joon Son Chung,

Tae-Hyun Oh,

David Harwath; [pdf] [supp]
[bibtex]
@InProceedings{Sung-Bin_2025_ICCV, author = {Sung-Bin, Kim and Choi, Jeongsoo and Peng, Puyuan and Chung, Joon Son and Oh, Tae-Hyun and Harwath, David}, title = {VoiceCraft-Dub: Automated Video Dubbing with Neural Codec Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14623-14632} }
SUV: Suppressing Undesired Video Content via Semantic Modulation Based on Text Embeddings: Xiang Lv,

Mingwen Shao,

Lingzhuang Meng,

Chang Liu,

Yecong Wan,

Xinyuan Chen; [pdf] [supp]
[bibtex]
@InProceedings{Lv_2025_ICCV, author = {Lv, Xiang and Shao, Mingwen and Meng, Lingzhuang and Liu, Chang and Wan, Yecong and Chen, Xinyuan}, title = {SUV: Suppressing Undesired Video Content via Semantic Modulation Based on Text Embeddings}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18357-18366} }
FonTS: Text Rendering With Typography and Style Controls: Wenda Shi,

Yiren Song,

Dengming Zhang,

Jiaming Liu,

Xingxing Zou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Wenda and Song, Yiren and Zhang, Dengming and Liu, Jiaming and Zou, Xingxing}, title = {FonTS: Text Rendering With Typography and Style Controls}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18463-18474} }
Improving SAM for Camouflaged Object Detection via Dual Stream Adapters: Jiaming Liu,

Linghe Kong,

Guihai Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Jiaming and Kong, Linghe and Chen, Guihai}, title = {Improving SAM for Camouflaged Object Detection via Dual Stream Adapters}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21906-21916} }
DiSCO-3D : Discovering and Segmenting Sub-Concepts from Open-vocabulary Queries in NeRF: Doriand Petit,

Steve Bourgeois,

Vincent Gay-Bellile,

Florian Chabot,

Loïc Barthe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Petit_2025_ICCV, author = {Petit, Doriand and Bourgeois, Steve and Gay-Bellile, Vincent and Chabot, Florian and Barthe, Lo{\"\i}c}, title = {DiSCO-3D : Discovering and Segmenting Sub-Concepts from Open-vocabulary Queries in NeRF}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20043-20052} }
PRIMAL: Physically Reactive and Interactive Motor Model for Avatar Learning: Yan Zhang,

Yao Feng,

Alpár Cseke,

Nitin Saini,

Nathan Bajandas,

Nicolas Heron,

Michael J. Black; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yan and Feng, Yao and Cseke, Alp\'ar and Saini, Nitin and Bajandas, Nathan and Heron, Nicolas and Black, Michael J.}, title = {PRIMAL: Physically Reactive and Interactive Motor Model for Avatar Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12725-12736} }
VCA: Video Curious Agent for Long Video Understanding: Zeyuan Yang,

Delin Chen,

Xueyang Yu,

Maohao Shen,

Chuang Gan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zeyuan and Chen, Delin and Yu, Xueyang and Shen, Maohao and Gan, Chuang}, title = {VCA: Video Curious Agent for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20168-20179} }
Self-Reinforcing Prototype Evolution with Dual-Knowledge Cooperation for Semi-Supervised Lifelong Person Re-Identification: Kunlun Xu,

Fan Zhuo,

Jiangmeng Li,

Xu Zou,

Jiahuan Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Kunlun and Zhuo, Fan and Li, Jiangmeng and Zou, Xu and Zhou, Jiahuan}, title = {Self-Reinforcing Prototype Evolution with Dual-Knowledge Cooperation for Semi-Supervised Lifelong Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3564-3574} }
What to Distill? Fast Knowledge Distillation with Adaptive Sampling: Byungchul Chae,

Seonyeong Heo; [pdf] [supp]
[bibtex]
@InProceedings{Chae_2025_ICCV, author = {Chae, Byungchul and Heo, Seonyeong}, title = {What to Distill? Fast Knowledge Distillation with Adaptive Sampling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2407-2416} }
Revisiting Pool-based Prompt Learning for Few-shot Class-incremental Learning: Yongwei Jiang,

Yixiong Zou,

Yuhua Li,

Ruixuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Yongwei and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {Revisiting Pool-based Prompt Learning for Few-shot Class-incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1303-1313} }
HOMO-Feature: Cross-Arbitrary-Modal Image Matching with Homomorphism of Organized Major Orientation: Chenzhong Gao,

Wei Li,

Desheng Weng; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Chenzhong and Li, Wei and Weng, Desheng}, title = {HOMO-Feature: Cross-Arbitrary-Modal Image Matching with Homomorphism of Organized Major Orientation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10538-10548} }
ForeSight: Multi-View Streaming Joint Object Detection and Trajectory Forecasting: Sandro Papais,

Letian Wang,

Brian Cheong,

Steven L. Waslander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Papais_2025_ICCV, author = {Papais, Sandro and Wang, Letian and Cheong, Brian and Waslander, Steven L.}, title = {ForeSight: Multi-View Streaming Joint Object Detection and Trajectory Forecasting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25474-25484} }
SG-LDM: Semantic-Guided LiDAR Generation via Latent-Aligned Diffusion: Zhengkang Xiang,

Zizhao Li,

Amir Khodabandeh,

Kourosh Khoshelham; [pdf] [supp]
[bibtex]
@InProceedings{Xiang_2025_ICCV, author = {Xiang, Zhengkang and Li, Zizhao and Khodabandeh, Amir and Khoshelham, Kourosh}, title = {SG-LDM: Semantic-Guided LiDAR Generation via Latent-Aligned Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24965-24976} }
Dynamic Multi-Layer Null Space Projection for Vision-Language Continual Learning: Borui Kang,

Lei Wang,

Zhiping Wu,

Tao Feng,

Yawen Li,

Yang Gao,

Wenbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Kang_2025_ICCV, author = {Kang, Borui and Wang, Lei and Wu, Zhiping and Feng, Tao and Li, Yawen and Gao, Yang and Li, Wenbin}, title = {Dynamic Multi-Layer Null Space Projection for Vision-Language Continual Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2077-2086} }
AFUNet: Cross-Iterative Alignment-Fusion Synergy for HDR Reconstruction via Deep Unfolding Paradigm: Xinyue Li,

Zhangkai Ni,

Wenhan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xinyue and Ni, Zhangkai and Yang, Wenhan}, title = {AFUNet: Cross-Iterative Alignment-Fusion Synergy for HDR Reconstruction via Deep Unfolding Paradigm}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10666-10675} }
DiT4SR: Taming Diffusion Transformer for Real-World Image Super-Resolution: Zheng-Peng Duan,

Jiawei Zhang,

Xin Jin,

Ziheng Zhang,

Zheng Xiong,

Dongqing Zou,

Jimmy S. Ren,

Chunle Guo,

Chongyi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duan_2025_ICCV, author = {Duan, Zheng-Peng and Zhang, Jiawei and Jin, Xin and Zhang, Ziheng and Xiong, Zheng and Zou, Dongqing and Ren, Jimmy S. and Guo, Chunle and Li, Chongyi}, title = {DiT4SR: Taming Diffusion Transformer for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18948-18958} }
Balanced Sharpness-Aware Minimization for Imbalanced Regression: Yahao Liu,

Qin Wang,

Lixin Duan,

Wen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yahao and Wang, Qin and Duan, Lixin and Li, Wen}, title = {Balanced Sharpness-Aware Minimization for Imbalanced Regression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6242-6251} }
Dynamic-DINO: Fine-Grained Mixture of Experts Tuning for Real-time Open-Vocabulary Object Detection: Yehao Lu,

Minghe Weng,

Zekang Xiao,

Rui Jiang,

Wei Su,

Guangcong Zheng,

Ping Lu,

Xi Li; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Yehao and Weng, Minghe and Xiao, Zekang and Jiang, Rui and Su, Wei and Zheng, Guangcong and Lu, Ping and Li, Xi}, title = {Dynamic-DINO: Fine-Grained Mixture of Experts Tuning for Real-time Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20847-20856} }
Video Individual Counting for Moving Drones: Yaowu Fan,

Jia Wan,

Tao Han,

Antoni B. Chan,

Andy J. Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Yaowu and Wan, Jia and Han, Tao and Chan, Antoni B. and Ma, Andy J.}, title = {Video Individual Counting for Moving Drones}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12284-12293} }
BridgeDepth: Bridging Monocular and Stereo Reasoning with Latent Alignment: Tongfan Guan,

Jiaxin Guo,

Chen Wang,

Yun-Hui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2025_ICCV, author = {Guan, Tongfan and Guo, Jiaxin and Wang, Chen and Liu, Yun-Hui}, title = {BridgeDepth: Bridging Monocular and Stereo Reasoning with Latent Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27681-27691} }
SuperEvent: Cross-Modal Learning of Event-based Keypoint Detection for SLAM: Yannick Burkhardt,

Simon Schaefer,

Stefan Leutenegger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Burkhardt_2025_ICCV, author = {Burkhardt, Yannick and Schaefer, Simon and Leutenegger, Stefan}, title = {SuperEvent: Cross-Modal Learning of Event-based Keypoint Detection for SLAM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8918-8928} }
ViLLa: Video Reasoning Segmentation with Large Language Model: Rongkun Zheng,

Lu Qi,

Xi Chen,

Yi Wang,

Kun Wang,

Hengshuang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Rongkun and Qi, Lu and Chen, Xi and Wang, Yi and Wang, Kun and Zhao, Hengshuang}, title = {ViLLa: Video Reasoning Segmentation with Large Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23667-23677} }
Bridging Class Imbalance and Partial Labeling via Spectral-Balanced Energy Propagation for Skeleton-based Action Recognition: Yandan Wang,

Chenqi Guo,

Yinglong Ma,

Jiangyan Chen,

Yuan Gao,

Weiming Dong; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yandan and Guo, Chenqi and Ma, Yinglong and Chen, Jiangyan and Gao, Yuan and Dong, Weiming}, title = {Bridging Class Imbalance and Partial Labeling via Spectral-Balanced Energy Propagation for Skeleton-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10162-10172} }
PRVQL: Progressive Knowledge-guided Refinement for Robust Egocentric Visual Query Localization: Bing Fan,

Yunhe Feng,

Yapeng Tian,

James Chenhao Liang,

Yuewei Lin,

Yan Huang,

Heng Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Bing and Feng, Yunhe and Tian, Yapeng and Liang, James Chenhao and Lin, Yuewei and Huang, Yan and Fan, Heng}, title = {PRVQL: Progressive Knowledge-guided Refinement for Robust Egocentric Visual Query Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5156-5165} }
ARIG: Autoregressive Interactive Head Generation for Real-time Conversations: Ying Guo,

Xi Liu,

Cheng Zhen,

Pengfei Yan,

Xiaoming Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Ying and Liu, Xi and Zhen, Cheng and Yan, Pengfei and Wei, Xiaoming}, title = {ARIG: Autoregressive Interactive Head Generation for Real-time Conversations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12956-12965} }
RARE: Refine Any Registration of Pairwise Point Clouds via Zero-Shot Learning: Chengyu Zheng,

Jin Huang,

Honghua Chen,

Mingqiang Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Chengyu and Huang, Jin and Chen, Honghua and Wei, Mingqiang}, title = {RARE: Refine Any Registration of Pairwise Point Clouds via Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26549-26558} }
Bayesian-Inspired Space-Time Superpixels: Kent Gauen,

Stanley Chan; [pdf] [supp]
[bibtex]
@InProceedings{Gauen_2025_ICCV, author = {Gauen, Kent and Chan, Stanley}, title = {Bayesian-Inspired Space-Time Superpixels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5382-5391} }
FEVER-OOD: Free Energy Vulnerability Elimination for Robust Out-of-Distribution Detection: Brian K.S. Isaac-Medina,

Mauricio Che,

Yona Falinie A. Gaus,

Samet Akcay,

Toby P. Breckon; [pdf] [supp]
[bibtex]
@InProceedings{Isaac-Medina_2025_ICCV, author = {Isaac-Medina, Brian K.S. and Che, Mauricio and Gaus, Yona Falinie A. and Akcay, Samet and Breckon, Toby P.}, title = {FEVER-OOD: Free Energy Vulnerability Elimination for Robust Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4529-4538} }
TRNAS: A Training-Free Robust Neural Architecture Search: Yeming Yang,

Qingling Zhu,

Jianping Luo,

Ka-Chun Wong,

Qiuzhen Lin,

Jianqiang Li; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Yeming and Zhu, Qingling and Luo, Jianping and Wong, Ka-Chun and Lin, Qiuzhen and Li, Jianqiang}, title = {TRNAS: A Training-Free Robust Neural Architecture Search}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2336-2345} }
Hierarchical Divide-and-Conquer Grouping for Classification Adaptation of Pre-Trained Models: Ziqian Lu,

Yunlong Yu,

Qinyue Tong,

Jun Liu; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Ziqian and Yu, Yunlong and Tong, Qinyue and Liu, Jun}, title = {Hierarchical Divide-and-Conquer Grouping for Classification Adaptation of Pre-Trained Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3575-3584} }
Text-IRSTD: Leveraging Semantic Text to Promote Infrared Small Target Detection in Complex Scenes: Feng Huang,

Shuyuan Zheng,

Zhaobing Qiu,

Huanxian Liu,

Huanxin Bai,

Liqiong Chen; [pdf]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Feng and Zheng, Shuyuan and Qiu, Zhaobing and Liu, Huanxian and Bai, Huanxin and Chen, Liqiong}, title = {Text-IRSTD: Leveraging Semantic Text to Promote Infrared Small Target Detection in Complex Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10635-10644} }
IntrinsicControlNet: Cross-distribution Image Generation with Real and Unreal: Jiayuan Lu,

Rengan Xie,

Zixuan Xie,

Zhizhen Wu,

Dianbing Xi,

Qi Ye,

Rui Wang,

Hujun Bao,

Yuchi Huo; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Jiayuan and Xie, Rengan and Xie, Zixuan and Wu, Zhizhen and Xi, Dianbing and Ye, Qi and Wang, Rui and Bao, Hujun and Huo, Yuchi}, title = {IntrinsicControlNet: Cross-distribution Image Generation with Real and Unreal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27315-27325} }
Hypergraph Clustering Network with Partial Attribute Imputation: Qianqian Wang,

Bowen Zhao,

Zhengming Ding,

Wei Feng,

Quanxue Gao; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Qianqian and Zhao, Bowen and Ding, Zhengming and Feng, Wei and Gao, Quanxue}, title = {Hypergraph Clustering Network with Partial Attribute Imputation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2697-2706} }
Flash-VStream: Efficient Real-Time Understanding for Long Video Streams: Haoji Zhang,

Yiqin Wang,

Yansong Tang,

Yong Liu,

Jiashi Feng,

Xiaojie Jin; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Haoji and Wang, Yiqin and Tang, Yansong and Liu, Yong and Feng, Jiashi and Jin, Xiaojie}, title = {Flash-VStream: Efficient Real-Time Understanding for Long Video Streams}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21059-21069} }
RoboTron-Drive: All-in-One Large Multimodal Model for Autonomous Driving: Zhijian Huang,

Chengjian Feng,

Feng Yan,

Baihui Xiao,

Zequn Jie,

Yujie Zhong,

Xiaodan Liang,

Lin Ma; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Zhijian and Feng, Chengjian and Yan, Feng and Xiao, Baihui and Jie, Zequn and Zhong, Yujie and Liang, Xiaodan and Ma, Lin}, title = {RoboTron-Drive: All-in-One Large Multimodal Model for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8011-8021} }
Dual-Process Image Generation: Grace Luo,

Jonathan Granskog,

Aleksander Holynski,

Trevor Darrell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Grace and Granskog, Jonathan and Holynski, Aleksander and Darrell, Trevor}, title = {Dual-Process Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17972-17983} }
GroundFlow: A Plug-in Module for Temporal Reasoning on 3D Point Cloud Sequential Grounding: Zijun Lin,

Shuting He,

Cheston Tan,

Bihan Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Zijun and He, Shuting and Tan, Cheston and Wen, Bihan}, title = {GroundFlow: A Plug-in Module for Temporal Reasoning on 3D Point Cloud Sequential Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28774-28784} }
Long-Tailed Classification with Multi-Granularity Semantics: Yuting Liu,

Liu Yang,

Yu Wang; [pdf]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yuting and Yang, Liu and Wang, Yu}, title = {Long-Tailed Classification with Multi-Granularity Semantics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4285-4294} }
DAA*: Deep Angular A Star for Image-based Path Planning: Zhiwei Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Zhiwei}, title = {DAA*: Deep Angular A Star for Image-based Path Planning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25284-25293} }
TrackVerse: A Large-Scale Object-Centric Video Dataset for Image-Level Representation Learning: Yibing Wei,

Samuel Church,

Victor Suciu,

Jinhong Lin,

Cheng-En Wu,

Pedro Morgado; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Yibing and Church, Samuel and Suciu, Victor and Lin, Jinhong and Wu, Cheng-En and Morgado, Pedro}, title = {TrackVerse: A Large-Scale Object-Centric Video Dataset for Image-Level Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11153-11163} }
Perspective-Invariant 3D Object Detection: Ao Liang,

Lingdong Kong,

Dongyue Lu,

Youquan Liu,

Jian Fang,

Huaici Zhao,

Wei Tsang Ooi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Ao and Kong, Lingdong and Lu, Dongyue and Liu, Youquan and Fang, Jian and Zhao, Huaici and Ooi, Wei Tsang}, title = {Perspective-Invariant 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27725-27738} }
Long-LRM: Long-sequence Large Reconstruction Model for Wide-coverage Gaussian Splats: Chen Ziwen,

Hao Tan,

Kai Zhang,

Sai Bi,

Fujun Luan,

Yicong Hong,

Li Fuxin,

Zexiang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Ziwen_2025_ICCV, author = {Ziwen, Chen and Tan, Hao and Zhang, Kai and Bi, Sai and Luan, Fujun and Hong, Yicong and Fuxin, Li and Xu, Zexiang}, title = {Long-LRM: Long-sequence Large Reconstruction Model for Wide-coverage Gaussian Splats}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4349-4359} }
Unlearning the Noisy Correspondence Makes CLIP More Robust: Haochen Han,

Alex Jinpeng Wang,

Peijun Ye,

Fangming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Haochen and Wang, Alex Jinpeng and Ye, Peijun and Liu, Fangming}, title = {Unlearning the Noisy Correspondence Makes CLIP More Robust}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4518-4528} }
HumanSAM: Classifying Human-centric Forgery Videos in Human Spatial, Appearance, and Motion Anomaly: Chang Liu,

Yunfan Ye,

Fan Zhang,

Qingyang Zhou,

Yuchuan Luo,

Zhiping Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Chang and Ye, Yunfan and Zhang, Fan and Zhou, Qingyang and Luo, Yuchuan and Cai, Zhiping}, title = {HumanSAM: Classifying Human-centric Forgery Videos in Human Spatial, Appearance, and Motion Anomaly}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14028-14038} }
An Information-Theoretic Regularizer for Lossy Neural Image Compression: Yingwen Zhang,

Meng Wang,

Xihua Sheng,

Peilin Chen,

Junru Li,

Li Zhang,

Shiqi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yingwen and Wang, Meng and Sheng, Xihua and Chen, Peilin and Li, Junru and Zhang, Li and Wang, Shiqi}, title = {An Information-Theoretic Regularizer for Lossy Neural Image Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15573-15582} }
ReTracker: Exploring Image Matching for Robust Online Any Point Tracking: Dongli Tan,

Xingyi He,

Sida Peng,

Yiqing Gong,

Xing Zhu,

Jiaming Sun,

Ruizhen Hu,

Yujun Shen,

Hujun Bao,

Xiaowei Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2025_ICCV, author = {Tan, Dongli and He, Xingyi and Peng, Sida and Gong, Yiqing and Zhu, Xing and Sun, Jiaming and Hu, Ruizhen and Shen, Yujun and Bao, Hujun and Zhou, Xiaowei}, title = {ReTracker: Exploring Image Matching for Robust Online Any Point Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4306-4316} }
DC-AE 1.5: Accelerating Diffusion Model Convergence with Structured Latent Space: Junyu Chen,

Dongyun Zou,

Wenkun He,

Junsong Chen,

Enze Xie,

Song Han,

Han Cai; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Junyu and Zou, Dongyun and He, Wenkun and Chen, Junsong and Xie, Enze and Han, Song and Cai, Han}, title = {DC-AE 1.5: Accelerating Diffusion Model Convergence with Structured Latent Space}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19628-19637} }
V2M4: 4D Mesh Animation Reconstruction from a Single Monocular Video: Jianqi Chen,

Biao Zhang,

Xiangjun Tang,

Peter Wonka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jianqi and Zhang, Biao and Tang, Xiangjun and Wonka, Peter}, title = {V2M4: 4D Mesh Animation Reconstruction from a Single Monocular Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11643-11653} }
MixA: A Mixed Attention approach with Stable Lightweight Linear Attention to enhance Efficiency of Vision Transformers at the Edge: Sabbir Ahmed,

Jingtao Li,

Weiming Zhuang,

Chen Chen,

Lingjuan Lyu; [pdf] [supp]
[bibtex]
@InProceedings{Ahmed_2025_ICCV, author = {Ahmed, Sabbir and Li, Jingtao and Zhuang, Weiming and Chen, Chen and Lyu, Lingjuan}, title = {MixA: A Mixed Attention approach with Stable Lightweight Linear Attention to enhance Efficiency of Vision Transformers at the Edge}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21187-21196} }
MUSE: Multi-Subject Unified Synthesis via Explicit Layout Semantic Expansion: Fei Peng,

Junqiang Wu,

Yan Li,

Tingting Gao,

Di Zhang,

Huiyuan Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_ICCV, author = {Peng, Fei and Wu, Junqiang and Li, Yan and Gao, Tingting and Zhang, Di and Fu, Huiyuan}, title = {MUSE: Multi-Subject Unified Synthesis via Explicit Layout Semantic Expansion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15885-15895} }
OpenSubstance: A High-quality Measured Dataset of Multi-View and -Lighting Images and Shapes: Fan Pei,

Jinchen Bai,

Xiang Feng,

Zoubin Bi,

Kun Zhou,

Hongzhi Wu; [pdf]
[bibtex]
@InProceedings{Pei_2025_ICCV, author = {Pei, Fan and Bai, Jinchen and Feng, Xiang and Bi, Zoubin and Zhou, Kun and Wu, Hongzhi}, title = {OpenSubstance: A High-quality Measured Dataset of Multi-View and -Lighting Images and Shapes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5221-5231} }
Overcoming Dual Drift for Continual Long-Tailed Visual Question Answering: Feifei Zhang,

Zhihao Wang,

Xi Zhang,

Changsheng Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Feifei and Wang, Zhihao and Zhang, Xi and Xu, Changsheng}, title = {Overcoming Dual Drift for Continual Long-Tailed Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4413-4423} }
LIRA: Inferring Segmentation in Large Multi-modal Models with Local Interleaved Region Assistance: Zhang Li,

Biao Yang,

Qiang Liu,

Shuo Zhang,

Zhiyin Ma,

Liang Yin,

Linger Deng,

Yabo Sun,

Yuliang Liu,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhang and Yang, Biao and Liu, Qiang and Zhang, Shuo and Ma, Zhiyin and Yin, Liang and Deng, Linger and Sun, Yabo and Liu, Yuliang and Bai, Xiang}, title = {LIRA: Inferring Segmentation in Large Multi-modal Models with Local Interleaved Region Assistance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24056-24067} }
GARF: Learning Generalizable 3D Reassembly for Real-World Fractures: Sihang Li,

Zeyu Jiang,

Grace Chen,

Chenyang Xu,

Siqi Tan,

Xue Wang,

Irving Fang,

Kristof Zyskowski,

Shannon P. McPherron,

Radu Iovita,

Chen Feng,

Jing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Sihang and Jiang, Zeyu and Chen, Grace and Xu, Chenyang and Tan, Siqi and Wang, Xue and Fang, Irving and Zyskowski, Kristof and McPherron, Shannon P. and Iovita, Radu and Feng, Chen and Zhang, Jing}, title = {GARF: Learning Generalizable 3D Reassembly for Real-World Fractures}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5711-5721} }
Adapting In-Domain Few-Shot Segmentation to New Domains without Source Domain Retraining: Qi Fan,

Kaiqi Liu,

Nian Liu,

Hisham Cholakkal,

Rao Muhammad Anwer,

Wenbin Li,

Yang Gao; [pdf]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Qi and Liu, Kaiqi and Liu, Nian and Cholakkal, Hisham and Anwer, Rao Muhammad and Li, Wenbin and Gao, Yang}, title = {Adapting In-Domain Few-Shot Segmentation to New Domains without Source Domain Retraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21429-21439} }
Exploring the Adversarial Vulnerabilities of Vision-Language-Action Models in Robotics: Taowen Wang,

Cheng Han,

James Liang,

Wenhao Yang,

Dongfang Liu,

Luna Xinyu Zhang,

Qifan Wang,

Jiebo Luo,

Ruixiang Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Taowen and Han, Cheng and Liang, James and Yang, Wenhao and Liu, Dongfang and Zhang, Luna Xinyu and Wang, Qifan and Luo, Jiebo and Tang, Ruixiang}, title = {Exploring the Adversarial Vulnerabilities of Vision-Language-Action Models in Robotics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6948-6958} }
Scaling Language-Free Visual Representation Learning: David Fan,

Shengbang Tong,

Jiachen Zhu,

Koustuv Sinha,

Zhuang Liu,

Xinlei Chen,

Michael Rabbat,

Nicolas Ballas,

Yann LeCun,

Amir Bar,

Saining Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, David and Tong, Shengbang and Zhu, Jiachen and Sinha, Koustuv and Liu, Zhuang and Chen, Xinlei and Rabbat, Michael and Ballas, Nicolas and LeCun, Yann and Bar, Amir and Xie, Saining}, title = {Scaling Language-Free Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {370-382} }
HUG: Hierarchical Urban Gaussian Splatting with Block-Based Reconstruction for Large-Scale Aerial Scenes: Mai Su,

Zhongtao Wang,

Huishan Au,

Yilong Li,

Xizhe Cao,

Chengwei Pan,

Yisong Chen,

Guoping Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_ICCV, author = {Su, Mai and Wang, Zhongtao and Au, Huishan and Li, Yilong and Cao, Xizhe and Pan, Chengwei and Chen, Yisong and Wang, Guoping}, title = {HUG: Hierarchical Urban Gaussian Splatting with Block-Based Reconstruction for Large-Scale Aerial Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28839-28848} }
ESSENTIAL: Episodic and Semantic Memory Integration for Video Class-Incremental Learning: Jongseo Lee,

Kyungho Bae,

Kyle Min,

Gyeong-Moon Park,

Jinwoo Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Jongseo and Bae, Kyungho and Min, Kyle and Park, Gyeong-Moon and Choi, Jinwoo}, title = {ESSENTIAL: Episodic and Semantic Memory Integration for Video Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17546-17556} }
Move to Understand a 3D Scene: Bridging Visual Grounding and Exploration for Efficient and Versatile Embodied Navigation: Ziyu Zhu,

Xilin Wang,

Yixuan Li,

Zhuofan Zhang,

Xiaojian Ma,

Yixin Chen,

Baoxiong Jia,

Wei Liang,

Qian Yu,

Zhidong Deng,

Siyuan Huang,

Qing Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Ziyu and Wang, Xilin and Li, Yixuan and Zhang, Zhuofan and Ma, Xiaojian and Chen, Yixin and Jia, Baoxiong and Liang, Wei and Yu, Qian and Deng, Zhidong and Huang, Siyuan and Li, Qing}, title = {Move to Understand a 3D Scene: Bridging Visual Grounding and Exploration for Efficient and Versatile Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8120-8132} }
Exploring The Visual Feature Space for Multimodal Neural Decoding: Weihao Xia,

Cengiz Oztireli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_ICCV, author = {Xia, Weihao and Oztireli, Cengiz}, title = {Exploring The Visual Feature Space for Multimodal Neural Decoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4370-4379} }
INS-MMBench: A Comprehensive Benchmark for Evaluating LVLMs' Performance in Insurance: Chenwei Lin,

Hanjia Lyu,

Xian Xu,

Jiebo Luo; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Chenwei and Lyu, Hanjia and Xu, Xian and Luo, Jiebo}, title = {INS-MMBench: A Comprehensive Benchmark for Evaluating LVLMs' Performance in Insurance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9036-9047} }
Continual Personalization for Diffusion Models: Yu-Chien Liao,

Jr-Jen Chen,

Chi-Pin Huang,

Ci-Siang Lin,

Meng-Lin Wu,

Yu-Chiang Frank Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Yu-Chien and Chen, Jr-Jen and Huang, Chi-Pin and Lin, Ci-Siang and Wu, Meng-Lin and Wang, Yu-Chiang Frank}, title = {Continual Personalization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15511-15520} }
Creation-MMBench: Assessing Context-Aware Creative Intelligence in MLLMs: Xinyu Fang,

Zhijian Chen,

Kai Lan,

Lixin Ma,

Shengyuan Ding,

Yingji Liang,

Xiangyu Zhao,

Farong Wen,

Zicheng Zhang,

Guofeng Zhang,

Haodong Duan,

Kai Chen,

Dahua Lin; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Xinyu and Chen, Zhijian and Lan, Kai and Ma, Lixin and Ding, Shengyuan and Liang, Yingji and Zhao, Xiangyu and Wen, Farong and Zhang, Zicheng and Zhang, Guofeng and Duan, Haodong and Chen, Kai and Lin, Dahua}, title = {Creation-MMBench: Assessing Context-Aware Creative Intelligence in MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {447-456} }
Seam360GS: Seamless 360deg Gaussian Splatting from Real-World Omnidirectional Images: Changha Shin,

Woong Oh Cho,

Seon Joo Kim; [pdf] [supp]
[bibtex]
@InProceedings{Shin_2025_ICCV, author = {Shin, Changha and Cho, Woong Oh and Kim, Seon Joo}, title = {Seam360GS: Seamless 360deg Gaussian Splatting from Real-World Omnidirectional Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28970-28979} }
Boosting Adversarial Transferability via Residual Perturbation Attack: Jinjia Peng,

Zeze Tao,

Huibing Wang,

Meng Wang,

Yang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_ICCV, author = {Peng, Jinjia and Tao, Zeze and Wang, Huibing and Wang, Meng and Wang, Yang}, title = {Boosting Adversarial Transferability via Residual Perturbation Attack}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1261-1270} }
LDPose: Towards Inclusive Human Pose Estimation for Limb-Deficient Individuals in the Wild: Jiaying Ying,

Heming Du,

Kaihao Zhang,

Lincheng Li,

Xin Yu; [pdf] [supp]
[bibtex]
@InProceedings{Ying_2025_ICCV, author = {Ying, Jiaying and Du, Heming and Zhang, Kaihao and Li, Lincheng and Yu, Xin}, title = {LDPose: Towards Inclusive Human Pose Estimation for Limb-Deficient Individuals in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9865-9875} }
Talking to DINO: Bridging Self-Supervised Vision Backbones with Language for Open-Vocabulary Segmentation: Luca Barsellotti,

Lorenzo Bianchi,

Nicola Messina,

Fabio Carrara,

Marcella Cornia,

Lorenzo Baraldi,

Fabrizio Falchi,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Barsellotti_2025_ICCV, author = {Barsellotti, Luca and Bianchi, Lorenzo and Messina, Nicola and Carrara, Fabio and Cornia, Marcella and Baraldi, Lorenzo and Falchi, Fabrizio and Cucchiara, Rita}, title = {Talking to DINO: Bridging Self-Supervised Vision Backbones with Language for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22025-22035} }
Language-Driven Multi-Label Zero-Shot Learning with Semantic Granularity: Shouwen Wang,

Qian Wan,

Junbin Gao,

Zhigang Zeng; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Shouwen and Wan, Qian and Gao, Junbin and Zeng, Zhigang}, title = {Language-Driven Multi-Label Zero-Shot Learning with Semantic Granularity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1968-1978} }
Beyond Perspective: Neural 360-Degree Video Compression: Andy Regensky,

Marc Windsheimer,

Fabian Brand,

Andre Kaup; [pdf] [supp]
[bibtex]
@InProceedings{Regensky_2025_ICCV, author = {Regensky, Andy and Windsheimer, Marc and Brand, Fabian and Kaup, Andre}, title = {Beyond Perspective: Neural 360-Degree Video Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16143-16153} }
Differentiable Room Acoustic Rendering with Multi-View Vision Priors: Derong Jin,

Ruohan Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Derong and Gao, Ruohan}, title = {Differentiable Room Acoustic Rendering with Multi-View Vision Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {37-47} }
An Inversion-based Measure of Memorization for Diffusion Models: Zhe Ma,

Qingming Li,

Xuhong Zhang,

Tianyu Du,

Ruixiao Lin,

Zonghui Wang,

Shouling Ji,

Wenzhi Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Zhe and Li, Qingming and Zhang, Xuhong and Du, Tianyu and Lin, Ruixiao and Wang, Zonghui and Ji, Shouling and Chen, Wenzhi}, title = {An Inversion-based Measure of Memorization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16959-16969} }
AgroBench: Vision-Language Model Benchmark in Agriculture: Risa Shinoda,

Nakamasa Inoue,

Hirokatsu Kataoka,

Masaki Onishi,

Yoshitaka Ushiku; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shinoda_2025_ICCV, author = {Shinoda, Risa and Inoue, Nakamasa and Kataoka, Hirokatsu and Onishi, Masaki and Ushiku, Yoshitaka}, title = {AgroBench: Vision-Language Model Benchmark in Agriculture}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7634-7644} }
ROAR: Reducing Inversion Error in Generative Image Watermarking: Hanyi Wang,

Han Fang,

Shi-Lin Wang,

Ee-Chien Chang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Hanyi and Fang, Han and Wang, Shi-Lin and Chang, Ee-Chien}, title = {ROAR: Reducing Inversion Error in Generative Image Watermarking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19742-19751} }
Adding Additional Control to One-Step Diffusion with Joint Distribution Matching: Yihong Luo,

Tianyang Hu,

Yifan Song,

Jiacheng Sun,

Zhenguo Li,

Jing Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Yihong and Hu, Tianyang and Song, Yifan and Sun, Jiacheng and Li, Zhenguo and Tang, Jing}, title = {Adding Additional Control to One-Step Diffusion with Joint Distribution Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4009-4018} }
TimeBooth: Disentangled Facial Invariant Representation for Diverse and Personalized Face Aging: Zepeng Su,

Zhulin Liu,

Zongyan Zhang,

Tong Zhang,

C.L.Philip Chen; [pdf] [supp]
[bibtex]
@InProceedings{Su_2025_ICCV, author = {Su, Zepeng and Liu, Zhulin and Zhang, Zongyan and Zhang, Tong and Chen, C.L.Philip}, title = {TimeBooth: Disentangled Facial Invariant Representation for Diverse and Personalized Face Aging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12147-12157} }
Flexi-FSCIL: Adaptive Knowledge Retention for Breaking the Stability-Plasticity Dilemma in Few-Shot Class-Incremental Learning: Wufei Xie,

Yalin Wang,

Chenliang Liu,

Zhaohui Jiang,

Xue Yang; [pdf]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Wufei and Wang, Yalin and Liu, Chenliang and Jiang, Zhaohui and Yang, Xue}, title = {Flexi-FSCIL: Adaptive Knowledge Retention for Breaking the Stability-Plasticity Dilemma in Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2451-2460} }
Learning Beyond Still Frames: Scaling Vision-Language Models with Video: Yiyuan Zhang,

Handong Li,

Jing Liu,

Xiangyu Yue; [pdf]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yiyuan and Li, Handong and Liu, Jing and Yue, Xiangyu}, title = {Learning Beyond Still Frames: Scaling Vision-Language Models with Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22425-22435} }
MultiADS: Defect-aware Supervision for Multi-type Anomaly Detection and Segmentation in Zero-Shot Learning: Ylli Sadikaj,

Hongkuan Zhou,

Lavdim Halilaj,

Stefan Schmid,

Steffen Staab,

Claudia Plant; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sadikaj_2025_ICCV, author = {Sadikaj, Ylli and Zhou, Hongkuan and Halilaj, Lavdim and Schmid, Stefan and Staab, Steffen and Plant, Claudia}, title = {MultiADS: Defect-aware Supervision for Multi-type Anomaly Detection and Segmentation in Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22978-22988} }
SEGS-SLAM: Structure-enhanced 3D Gaussian Splatting SLAM with Appearance Embedding: Tianci Wen,

Zhiang Liu,

Yongchun Fang; [pdf] [supp]
[bibtex]
@InProceedings{Wen_2025_ICCV, author = {Wen, Tianci and Liu, Zhiang and Fang, Yongchun}, title = {SEGS-SLAM: Structure-enhanced 3D Gaussian Splatting SLAM with Appearance Embedding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28103-28113} }
Images as Noisy Labels: Unleashing the Potential of the Diffusion Model for Open-Vocabulary Semantic Segmentation: Fan Li,

Xuanbin Wang,

Xuan Wang,

Zhaoxiang Zhang,

Yuelei Xu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Fan and Wang, Xuanbin and Wang, Xuan and Zhang, Zhaoxiang and Xu, Yuelei}, title = {Images as Noisy Labels: Unleashing the Potential of the Diffusion Model for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24255-24265} }
Tile-wise vs. Image-wise: Random-Tile Loss and Training Paradigm for Gaussian Splatting: Xiaoyu Zhang,

Weihong Pan,

Xiaojun Xiang,

Hongjia Zhai,

Liyang Zhou,

Hanqing Jiang,

Guofeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiaoyu and Pan, Weihong and Xiang, Xiaojun and Zhai, Hongjia and Zhou, Liyang and Jiang, Hanqing and Zhang, Guofeng}, title = {Tile-wise vs. Image-wise: Random-Tile Loss and Training Paradigm for Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26923-26932} }
How To Make Your Cell Tracker Say "I dunno!": Richard D. Paul,

Johannes Seiffarth,

David Rügamer,

Katharina Nöh,

Hanno Scharr; [pdf] [supp]
[bibtex]
@InProceedings{Paul_2025_ICCV, author = {Paul, Richard D. and Seiffarth, Johannes and R\"ugamer, David and N\"oh, Katharina and Scharr, Hanno}, title = {How To Make Your Cell Tracker Say ''I dunno!''}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6914-6923} }
Training-free and Adaptive Sparse Attention for Efficient Long Video Generation: Yifei Xia,

Suhan Ling,

Fangcheng Fu,

Yujie Wang,

Huixia Li,

Xuefeng Xiao,

Bin Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_ICCV, author = {Xia, Yifei and Ling, Suhan and Fu, Fangcheng and Wang, Yujie and Li, Huixia and Xiao, Xuefeng and Cui, Bin}, title = {Training-free and Adaptive Sparse Attention for Efficient Long Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15982-15993} }
Super Resolved Imaging with Adaptive Optics: Robin Swanson,

Esther Y. H. Lin,

Masen Lamb,

Suresh Sivanandam,

Kiriakos N. Kutulakos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Swanson_2025_ICCV, author = {Swanson, Robin and Lin, Esther Y. H. and Lamb, Masen and Sivanandam, Suresh and Kutulakos, Kiriakos N.}, title = {Super Resolved Imaging with Adaptive Optics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29142-29152} }
BillBoard Splatting (BBSplat): Learnable Textured Primitives for Novel View Synthesis: David Svitov,

Pietro Morerio,

Lourdes Agapito,

Alessio Del Bue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Svitov_2025_ICCV, author = {Svitov, David and Morerio, Pietro and Agapito, Lourdes and Del Bue, Alessio}, title = {BillBoard Splatting (BBSplat): Learnable Textured Primitives for Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25029-25039} }
SDFit: 3D Object Pose and Shape by Fitting a Morphable SDF to a Single Image: Dimitrije Antić,

Georgios Paschalidis,

Shashank Tripathi,

Theo Gevers,

Sai Kumar Dwivedi,

Dimitrios Tzionas; [pdf] [supp]
[bibtex]
@InProceedings{Antic_2025_ICCV, author = {Anti\'c, Dimitrije and Paschalidis, Georgios and Tripathi, Shashank and Gevers, Theo and Dwivedi, Sai Kumar and Tzionas, Dimitrios}, title = {SDFit: 3D Object Pose and Shape by Fitting a Morphable SDF to a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9616-9626} }
VRM: Knowledge Distillation via Virtual Relation Matching: Weijia Zhang,

Fei Xie,

Weidong Cai,

Chao Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Weijia and Xie, Fei and Cai, Weidong and Ma, Chao}, title = {VRM: Knowledge Distillation via Virtual Relation Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2707-2717} }
SC-Lane: Slope-aware and Consistent Road Height Estimation Framework for 3D Lane Detection: Chaesong Park,

Eunbin Seo,

Jihyeon Hwang,

Jongwoo Lim; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Chaesong and Seo, Eunbin and Hwang, Jihyeon and Lim, Jongwoo}, title = {SC-Lane: Slope-aware and Consistent Road Height Estimation Framework for 3D Lane Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28407-28416} }
Parametric Shadow Control for Portrait Generation in Text-to-Image Diffusion Models: Haoming Cai,

Tsung-Wei Huang,

Shiv Gehlot,

Brandon Y. Feng,

Sachin Shah,

Guan-Ming Su,

Christopher Metzler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2025_ICCV, author = {Cai, Haoming and Huang, Tsung-Wei and Gehlot, Shiv and Feng, Brandon Y. and Shah, Sachin and Su, Guan-Ming and Metzler, Christopher}, title = {Parametric Shadow Control for Portrait Generation in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18207-18217} }
ZIUM: Zero-Shot Intent-Aware Adversarial Attack on Unlearned Models: Hyun Jun Yook,

Ga San Jhun,

Jae Hyun Cho,

Min Jeon,

Donghyun Kim,

Tae Hyung Kim,

Youn Kyu Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yook_2025_ICCV, author = {Yook, Hyun Jun and Jhun, Ga San and Cho, Jae Hyun and Jeon, Min and Kim, Donghyun and Kim, Tae Hyung and Lee, Youn Kyu}, title = {ZIUM: Zero-Shot Intent-Aware Adversarial Attack on Unlearned Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3926-3935} }
Beyond Next-Token: Next-X Prediction for Autoregressive Visual Generation: Sucheng Ren,

Qihang Yu,

Ju He,

Xiaohui Shen,

Alan Yuille,

Liang-Chieh Chen; [pdf] [supp]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Sucheng and Yu, Qihang and He, Ju and Shen, Xiaohui and Yuille, Alan and Chen, Liang-Chieh}, title = {Beyond Next-Token: Next-X Prediction for Autoregressive Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15781-15791} }
GECO: Geometrically Consistent Embedding with Lightspeed Inference: Regine Hartwig,

Dominik Muhle,

Riccardo Marin,

Daniel Cremers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hartwig_2025_ICCV, author = {Hartwig, Regine and Muhle, Dominik and Marin, Riccardo and Cremers, Daniel}, title = {GECO: Geometrically Consistent Embedding with Lightspeed Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9309-9319} }
Marigold-DC: Zero-Shot Monocular Depth Completion with Guided Diffusion: Massimiliano Viola,

Kevin Qu,

Nando Metzger,

Bingxin Ke,

Alexander Becker,

Konrad Schindler,

Anton Obukhov; [pdf]
[bibtex]
@InProceedings{Viola_2025_ICCV, author = {Viola, Massimiliano and Qu, Kevin and Metzger, Nando and Ke, Bingxin and Becker, Alexander and Schindler, Konrad and Obukhov, Anton}, title = {Marigold-DC: Zero-Shot Monocular Depth Completion with Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5359-5370} }
MedSegFactory: Text-Guided Generation of Medical Image-Mask Pairs: Jiawei Mao,

Yuhan Wang,

Yucheng Tang,

Daguang Xu,

Kang Wang,

Yang Yang,

Zongwei Zhou,

Yuyin Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Mao_2025_ICCV, author = {Mao, Jiawei and Wang, Yuhan and Tang, Yucheng and Xu, Daguang and Wang, Kang and Yang, Yang and Zhou, Zongwei and Zhou, Yuyin}, title = {MedSegFactory: Text-Guided Generation of Medical Image-Mask Pairs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21525-21535} }
TurboTrain: Towards Efficient and Balanced Multi-Task Learning for Multi-Agent Perception and Prediction: Zewei Zhou,

Seth Z. Zhao,

Tianhui Cai,

Zhiyu Huang,

Bolei Zhou,

Jiaqi Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zewei and Zhao, Seth Z. and Cai, Tianhui and Huang, Zhiyu and Zhou, Bolei and Ma, Jiaqi}, title = {TurboTrain: Towards Efficient and Balanced Multi-Task Learning for Multi-Agent Perception and Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4391-4402} }
FiVE-Bench: A Fine-grained Video Editing Benchmark for Evaluating Emerging Diffusion and Rectified Flow Models: Minghan Li,

Chenxi Xie,

Yichen Wu,

Lei Zhang,

Mengyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Minghan and Xie, Chenxi and Wu, Yichen and Zhang, Lei and Wang, Mengyu}, title = {FiVE-Bench: A Fine-grained Video Editing Benchmark for Evaluating Emerging Diffusion and Rectified Flow Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16672-16681} }
ReAL-AD: Towards Human-Like Reasoning in End-to-End Autonomous Driving: Yuhang Lu,

Jiadong Tu,

Yuexin Ma,

Xinge Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Yuhang and Tu, Jiadong and Ma, Yuexin and Zhu, Xinge}, title = {ReAL-AD: Towards Human-Like Reasoning in End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27783-27793} }
ReME: A Data-Centric Framework for Training-Free Open-Vocabulary Segmentation: Xiwei Xuan,

Ziquan Deng,

Kwan-Liu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xuan_2025_ICCV, author = {Xuan, Xiwei and Deng, Ziquan and Ma, Kwan-Liu}, title = {ReME: A Data-Centric Framework for Training-Free Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20954-20965} }
Sparse Fine-Tuning of Transformers for Generative Tasks: Wei Chen,

Jingxi Yu,

Zichen Miao,

Qiang Qiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Wei and Yu, Jingxi and Miao, Zichen and Qiu, Qiang}, title = {Sparse Fine-Tuning of Transformers for Generative Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18703-18713} }
Unlocking Constraints: Source-Free Occlusion-Aware Seamless Segmentation: Yihong Cao,

Jiaming Zhang,

Xu Zheng,

Hao Shi,

Kunyu Peng,

Hang Liu,

Kailun Yang,

Hui Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Yihong and Zhang, Jiaming and Zheng, Xu and Shi, Hao and Peng, Kunyu and Liu, Hang and Yang, Kailun and Zhang, Hui}, title = {Unlocking Constraints: Source-Free Occlusion-Aware Seamless Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8961-8972} }
A Visual Leap in CLIP Compositionality Reasoning through Generation of Counterfactual Sets: Zexi Jia,

Chuanwei Huang,

Hongyan Fei,

Yeshuang Zhu,

Zhiqiang Yuan,

Ying Deng,

Jiapei Zhang,

Jinchao Zhang,

Jie Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2025_ICCV, author = {Jia, Zexi and Huang, Chuanwei and Fei, Hongyan and Zhu, Yeshuang and Yuan, Zhiqiang and Deng, Ying and Zhang, Jiapei and Zhang, Jinchao and Zhou, Jie}, title = {A Visual Leap in CLIP Compositionality Reasoning through Generation of Counterfactual Sets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23498-23507} }
Boosting Adversarial Transferability via Negative Hessian Trace Regularization: Yunfei Long,

Zilin Tian,

Liguo Zhang,

Huosheng Xu; [pdf] [supp]
[bibtex]
@InProceedings{Long_2025_ICCV, author = {Long, Yunfei and Tian, Zilin and Zhang, Liguo and Xu, Huosheng}, title = {Boosting Adversarial Transferability via Negative Hessian Trace Regularization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2386-2395} }
AutoOcc: Automatic Open-Ended Semantic Occupancy Annotation via Vision-Language Guided Gaussian Splatting: Xiaoyu Zhou,

Jingqi Wang,

Yongtao Wang,

Yufei Wei,

Nan Dong,

Ming-Hsuan Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Xiaoyu and Wang, Jingqi and Wang, Yongtao and Wei, Yufei and Dong, Nan and Yang, Ming-Hsuan}, title = {AutoOcc: Automatic Open-Ended Semantic Occupancy Annotation via Vision-Language Guided Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3367-3377} }
Scalable Dual Fingerprinting for Hierarchical Attribution of Text-to-Image Models: Jianwei Fei,

Yunshu Dai,

Peipeng Yu,

Zhe Kong,

Jiantao Zhou,

Zhihua Xia; [pdf]
[bibtex]
@InProceedings{Fei_2025_ICCV, author = {Fei, Jianwei and Dai, Yunshu and Yu, Peipeng and Kong, Zhe and Zhou, Jiantao and Xia, Zhihua}, title = {Scalable Dual Fingerprinting for Hierarchical Attribution of Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15025-15034} }
Improving Noise Efficiency in Privacy-preserving Dataset Distillation: Runkai Zheng,

Vishnu Asutosh Dasu,

Yinong Oliver Wang,

Haohan Wang,

Fernando De La Torre; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Runkai and Dasu, Vishnu Asutosh and Wang, Yinong Oliver and Wang, Haohan and De La Torre, Fernando}, title = {Improving Noise Efficiency in Privacy-preserving Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4838-4847} }
Parameter-Efficient Adaptation of Geospatial Foundation Models through Embedding Deflection: Romain Thoreau,

Valerio Marsocci,

Dawa Derksen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thoreau_2025_ICCV, author = {Thoreau, Romain and Marsocci, Valerio and Derksen, Dawa}, title = {Parameter-Efficient Adaptation of Geospatial Foundation Models through Embedding Deflection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9594-9604} }
UniFuse: A Unified All-in-One Framework for Multi-Modal Medical Image Fusion Under Diverse Degradations and Misalignments: Dayong Su,

Yafei Zhang,

Huafeng Li,

Jinxing Li,

Yu Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_ICCV, author = {Su, Dayong and Zhang, Yafei and Li, Huafeng and Li, Jinxing and Liu, Yu}, title = {UniFuse: A Unified All-in-One Framework for Multi-Modal Medical Image Fusion Under Diverse Degradations and Misalignments}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14238-14247} }
Coupling the Generator with Teacher for Effective Data-Free Knowledge Distillation: Xu Chen,

Yang Li,

Yahong Han,

Guangquan Xu,

Jialie Shen; [pdf]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Xu and Li, Yang and Han, Yahong and Xu, Guangquan and Shen, Jialie}, title = {Coupling the Generator with Teacher for Effective Data-Free Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2152-2160} }
MOSCATO: Predicting Multiple Object State Change Through Actions: Parnian Zameni,

Yuhan Shen,

Ehsan Elhamifar; [pdf] [supp]
[bibtex]
@InProceedings{Zameni_2025_ICCV, author = {Zameni, Parnian and Shen, Yuhan and Elhamifar, Ehsan}, title = {MOSCATO: Predicting Multiple Object State Change Through Actions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11600-11611} }
Harnessing Input-Adaptive Inference for Efficient VLN: Dongwoo Kang,

Akhil Perincherry,

Zachary Coalson,

Aiden Gabriel,

Stefan Lee,

Sanghyun Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_ICCV, author = {Kang, Dongwoo and Perincherry, Akhil and Coalson, Zachary and Gabriel, Aiden and Lee, Stefan and Hong, Sanghyun}, title = {Harnessing Input-Adaptive Inference for Efficient VLN}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8219-8229} }
LoftUp: Learning a Coordinate-Based Feature Upsampler for Vision Foundation Models: Haiwen Huang,

Anpei Chen,

Volodymyr Havrylov,

Andreas Geiger,

Dan Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Haiwen and Chen, Anpei and Havrylov, Volodymyr and Geiger, Andreas and Zhang, Dan}, title = {LoftUp: Learning a Coordinate-Based Feature Upsampler for Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9913-9923} }
EMatch: A Unified Framework for Event-based Optical Flow and Stereo Matching: Pengjie Zhang,

Lin Zhu,

Xiao Wang,

Lizhi Wang,

Hua Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Pengjie and Zhu, Lin and Wang, Xiao and Wang, Lizhi and Huang, Hua}, title = {EMatch: A Unified Framework for Event-based Optical Flow and Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5845-5855} }
Superpowering Open-Vocabulary Object Detectors for X-ray Vision: Pablo Garcia-Fernandez,

Lorenzo Vaquero,

Mingxuan Liu,

Feng Xue,

Daniel Cores,

Nicu Sebe,

Manuel Mucientes,

Elisa Ricci; [pdf] [supp]
[bibtex]
@InProceedings{Garcia-Fernandez_2025_ICCV, author = {Garcia-Fernandez, Pablo and Vaquero, Lorenzo and Liu, Mingxuan and Xue, Feng and Cores, Daniel and Sebe, Nicu and Mucientes, Manuel and Ricci, Elisa}, title = {Superpowering Open-Vocabulary Object Detectors for X-ray Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20770-20779} }
LVBench: An Extreme Long Video Understanding Benchmark: Weihan Wang,

Zehai He,

Wenyi Hong,

Yean Cheng,

Xiaohan Zhang,

Ji Qi,

Ming Ding,

Xiaotao Gu,

Shiyu Huang,

Bin Xu,

Yuxiao Dong,

Jie Tang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Weihan and He, Zehai and Hong, Wenyi and Cheng, Yean and Zhang, Xiaohan and Qi, Ji and Ding, Ming and Gu, Xiaotao and Huang, Shiyu and Xu, Bin and Dong, Yuxiao and Tang, Jie}, title = {LVBench: An Extreme Long Video Understanding Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22958-22967} }
Scalable Image Tokenization with Index Backpropagation Quantization: Fengyuan Shi,

Zhuoyan Luo,

Yixiao Ge,

Yujiu Yang,

Ying Shan,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Fengyuan and Luo, Zhuoyan and Ge, Yixiao and Yang, Yujiu and Shan, Ying and Wang, Limin}, title = {Scalable Image Tokenization with Index Backpropagation Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16037-16046} }
InsViE-1M: Effective Instruction-based Video Editing with Elaborate Dataset Construction: Yuhui Wu,

Liyi Chen,

Ruibin Li,

Shihao Wang,

Chenxi Xie,

Lei Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Yuhui and Chen, Liyi and Li, Ruibin and Wang, Shihao and Xie, Chenxi and Zhang, Lei}, title = {InsViE-1M: Effective Instruction-based Video Editing with Elaborate Dataset Construction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16692-16701} }
SpatialCrafter: Unleashing the Imagination of Video Diffusion Models for Scene Reconstruction from Limited Observations: Songchun Zhang,

Huiyao Xu,

Sitong Guo,

Zhongwei Xie,

Hujun Bao,

Weiwei Xu,

Changqing Zou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Songchun and Xu, Huiyao and Guo, Sitong and Xie, Zhongwei and Bao, Hujun and Xu, Weiwei and Zou, Changqing}, title = {SpatialCrafter: Unleashing the Imagination of Video Diffusion Models for Scene Reconstruction from Limited Observations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27794-27805} }
Multi-Granular Spatio-Temporal Token Merging for Training-Free Acceleration of Video LLMs: Jeongseok Hyun,

Sukjun Hwang,

Su Ho Han,

Taeoh Kim,

Inwoong Lee,

Dongyoon Wee,

Joon-Young Lee,

Seon Joo Kim,

Minho Shim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hyun_2025_ICCV, author = {Hyun, Jeongseok and Hwang, Sukjun and Han, Su Ho and Kim, Taeoh and Lee, Inwoong and Wee, Dongyoon and Lee, Joon-Young and Kim, Seon Joo and Shim, Minho}, title = {Multi-Granular Spatio-Temporal Token Merging for Training-Free Acceleration of Video LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23990-24000} }
ONLY: One-Layer Intervention Sufficiently Mitigates Hallucinations in Large Vision-Language Models: Zifu Wan,

Ce Zhang,

Silong Yong,

Martin Q. Ma,

Simon Stepputtis,

Louis-Philippe Morency,

Deva Ramanan,

Katia Sycara,

Yaqi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wan_2025_ICCV, author = {Wan, Zifu and Zhang, Ce and Yong, Silong and Ma, Martin Q. and Stepputtis, Simon and Morency, Louis-Philippe and Ramanan, Deva and Sycara, Katia and Xie, Yaqi}, title = {ONLY: One-Layer Intervention Sufficiently Mitigates Hallucinations in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3225-3234} }
Inverse Image-Based Rendering for Light Field Generation from Single Images: Hyunjun Jung,

Hae-Gon Jeon; [pdf] [supp]
[bibtex]
@InProceedings{Jung_2025_ICCV, author = {Jung, Hyunjun and Jeon, Hae-Gon}, title = {Inverse Image-Based Rendering for Light Field Generation from Single Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24739-24749} }
Morph: A Motion-free Physics Optimization Framework for Human Motion Generation: Zhuo Li,

Mingshuang Luo,

Ruibing Hou,

Xin Zhao,

Hao Liu,

Hong Chang,

Zimo Liu,

Chen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhuo and Luo, Mingshuang and Hou, Ruibing and Zhao, Xin and Liu, Hao and Chang, Hong and Liu, Zimo and Li, Chen}, title = {Morph: A Motion-free Physics Optimization Framework for Human Motion Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14580-14589} }
Generalized and Efficient 2D Gaussian Splatting for Arbitrary-scale Super-Resolution: Du Chen,

Liyi Chen,

Zhengqiang Zhang,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Du and Chen, Liyi and Zhang, Zhengqiang and Zhang, Lei}, title = {Generalized and Efficient 2D Gaussian Splatting for Arbitrary-scale Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26435-26445} }
Pretend Benign: A Stealthy Adversarial Attack by Exploiting Vulnerabilities in Cooperative Perception: Hongwei Lin,

Dongyu Pan,

Qiming Xia,

Hai Wu,

Cheng Wang,

Siqi Shen,

Chenglu Wen; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Hongwei and Pan, Dongyu and Xia, Qiming and Wu, Hai and Wang, Cheng and Shen, Siqi and Wen, Chenglu}, title = {Pretend Benign: A Stealthy Adversarial Attack by Exploiting Vulnerabilities in Cooperative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19947-19956} }
ProbMED: A Probabilistic Framework for Medical Multimodal Binding: Yuan Gao,

Sangwook Kim,

Jianzhong You,

Chris McIntosh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Yuan and Kim, Sangwook and You, Jianzhong and McIntosh, Chris}, title = {ProbMED: A Probabilistic Framework for Medical Multimodal Binding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20157-20167} }
Kestrel: 3D Multimodal LLM for Part-Aware Grounded Description: Mahmoud Ahmed,

Junjie Fei,

Jian Ding,

Eslam Mohamed Bakr,

Mohamed Elhoseiny; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ahmed_2025_ICCV, author = {Ahmed, Mahmoud and Fei, Junjie and Ding, Jian and Bakr, Eslam Mohamed and Elhoseiny, Mohamed}, title = {Kestrel: 3D Multimodal LLM for Part-Aware Grounded Description}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8973-8983} }
Precise Action-to-Video Generation Through Visual Action Prompts: Yuang Wang,

Chao Wen,

Haoyu Guo,

Sida Peng,

Minghan Qin,

Hujun Bao,

Xiaowei Zhou,

Ruizhen Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuang and Wen, Chao and Guo, Haoyu and Peng, Sida and Qin, Minghan and Bao, Hujun and Zhou, Xiaowei and Hu, Ruizhen}, title = {Precise Action-to-Video Generation Through Visual Action Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12713-12724} }
QuickSplat: Fast 3D Surface Reconstruction via Learned Gaussian Initialization: Yueh-Cheng Liu,

Lukas Höllein,

Matthias Nießner,

Angela Dai; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yueh-Cheng and H\"ollein, Lukas and Nie{\ss}ner, Matthias and Dai, Angela}, title = {QuickSplat: Fast 3D Surface Reconstruction via Learned Gaussian Initialization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27851-27861} }
EquiCaps: Predictor-Free Pose-Aware Pre-Trained Capsule Networks: Athinoulla Konstantinou,

Georgios Leontidis,

Mamatha Thota,

Aiden Durrant; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Konstantinou_2025_ICCV, author = {Konstantinou, Athinoulla and Leontidis, Georgios and Thota, Mamatha and Durrant, Aiden}, title = {EquiCaps: Predictor-Free Pose-Aware Pre-Trained Capsule Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7947-7957} }
Feature Decomposition-Recomposition in Large Vision-Language Model for Few-Shot Class-Incremental Learning: Zongyao Xue,

Meina Kan,

Shiguang Shan,

Xilin Chen; [pdf] [supp]
[bibtex]
@InProceedings{Xue_2025_ICCV, author = {Xue, Zongyao and Kan, Meina and Shan, Shiguang and Chen, Xilin}, title = {Feature Decomposition-Recomposition in Large Vision-Language Model for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3153-3162} }
Taming Flow Matching with Unbalanced Optimal Transport into Fast Pansharpening: Zihan Cao,

Yu Zhong,

Liang-Jian Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Zihan and Zhong, Yu and Deng, Liang-Jian}, title = {Taming Flow Matching with Unbalanced Optimal Transport into Fast Pansharpening}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2803-2813} }
Learning to Unlearn while Retaining: Combating Gradient Conflicts in Machine Unlearning: Gaurav Patel,

Qiang Qiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Patel_2025_ICCV, author = {Patel, Gaurav and Qiu, Qiang}, title = {Learning to Unlearn while Retaining: Combating Gradient Conflicts in Machine Unlearning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4211-4221} }
Scaling and Taming Adversarial Training with Synthetic Data: Juntao Wu,

Xianting Huang,

Yu Chen,

Shuai Pang,

Ke Wang; [pdf]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Juntao and Huang, Xianting and Chen, Yu and Pang, Shuai and Wang, Ke}, title = {Scaling and Taming Adversarial Training with Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2951-2960} }
FramePainter: Endowing Interactive Image Editing with Video Diffusion Priors: Yabo Zhang,

Xinpeng Zhou,

Yihan Zeng,

Hang Xu,

Hui Li,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yabo and Zhou, Xinpeng and Zeng, Yihan and Xu, Hang and Li, Hui and Zuo, Wangmeng}, title = {FramePainter: Endowing Interactive Image Editing with Video Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18121-18131} }
MetaScope: Optics-Driven Neural Network for Ultra-Micro Metalens Endoscopy: Wuyang Li,

Wentao Pan,

Xiaoyuan Liu,

Zhendong Luo,

Chenxin Li,

Hengyu Liu,

Din Ping Tsai,

Mu Ku Chen,

Yixuan Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Wuyang and Pan, Wentao and Liu, Xiaoyuan and Luo, Zhendong and Li, Chenxin and Liu, Hengyu and Tsai, Din Ping and Chen, Mu Ku and Yuan, Yixuan}, title = {MetaScope: Optics-Driven Neural Network for Ultra-Micro Metalens Endoscopy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25938-25950} }
Removing Cost Volumes from Optical Flow Estimators: Simon Kiefhaber,

Stefan Roth,

Simone Schaub-Meyer; [pdf] [supp]
[bibtex]
@InProceedings{Kiefhaber_2025_ICCV, author = {Kiefhaber, Simon and Roth, Stefan and Schaub-Meyer, Simone}, title = {Removing Cost Volumes from Optical Flow Estimators}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {79-89} }
SparseRecon: Neural Implicit Surface Reconstruction from Sparse Views with Feature and Depth Consistencies: Liang Han,

Xu Zhang,

Haichuan Song,

Kanle Shi,

Yu-Shen Liu,

Zhizhong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Liang and Zhang, Xu and Song, Haichuan and Shi, Kanle and Liu, Yu-Shen and Han, Zhizhong}, title = {SparseRecon: Neural Implicit Surface Reconstruction from Sparse Views with Feature and Depth Consistencies}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28514-28524} }
LMM4LMM: Benchmarking and Evaluating Large-multimodal Image Generation with LMMs: Jiarui Wang,

Huiyu Duan,

Yu Zhao,

Juntong Wang,

Guangtao Zhai,

Xiongkuo Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jiarui and Duan, Huiyu and Zhao, Yu and Wang, Juntong and Zhai, Guangtao and Min, Xiongkuo}, title = {LMM4LMM: Benchmarking and Evaluating Large-multimodal Image Generation with LMMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17312-17323} }
Leveraging the Power of MLLMs for Gloss-Free Sign Language Translation: Jungeun Kim,

Hyeongwoo Jeon,

Jongseong Bae,

Ha Young Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jungeun and Jeon, Hyeongwoo and Bae, Jongseong and Kim, Ha Young}, title = {Leveraging the Power of MLLMs for Gloss-Free Sign Language Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21048-21058} }
Cross-modal Ship Re-Identification via Optical and SAR Imagery: A Novel Dataset and Method: Han Wang,

Shengyang Li,

Jian Yang,

Yuxuan Liu,

Yixuan Lv,

Zhuang Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Han and Li, Shengyang and Yang, Jian and Liu, Yuxuan and Lv, Yixuan and Zhou, Zhuang}, title = {Cross-modal Ship Re-Identification via Optical and SAR Imagery: A Novel Dataset and Method}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7873-7883} }
Hybrid-grained Feature Aggregation with Coarse-to-fine Language Guidance for Self-supervised Monocular Depth Estimation: Wenyao Zhang,

Hongsi Liu,

Bohan Li,

Jiawei He,

Zekun Qi,

Yunnan Wang,

Shengyang Zhao,

Xinqiang Yu,

Wenjun Zeng,

Xin Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wenyao and Liu, Hongsi and Li, Bohan and He, Jiawei and Qi, Zekun and Wang, Yunnan and Zhao, Shengyang and Yu, Xinqiang and Zeng, Wenjun and Jin, Xin}, title = {Hybrid-grained Feature Aggregation with Coarse-to-fine Language Guidance for Self-supervised Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6678-6692} }
GaussianSpeech: Audio-Driven Personalized 3D Gaussian Avatars: Shivangi Aneja,

Artem Sevastopolsky,

Tobias Kirschstein,

Justus Thies,

Angela Dai,

Matthias Nießner; [pdf] [supp]
[bibtex]
@InProceedings{Aneja_2025_ICCV, author = {Aneja, Shivangi and Sevastopolsky, Artem and Kirschstein, Tobias and Thies, Justus and Dai, Angela and Nie{\ss}ner, Matthias}, title = {GaussianSpeech: Audio-Driven Personalized 3D Gaussian Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13065-13075} }
MonoFusion: Sparse-View 4D Reconstruction via Monocular Fusion: Zihan Wang,

Jeff Tan,

Tarasha Khurana,

Neehar Peri,

Deva Ramanan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zihan and Tan, Jeff and Khurana, Tarasha and Peri, Neehar and Ramanan, Deva}, title = {MonoFusion: Sparse-View 4D Reconstruction via Monocular Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8252-8263} }
SIGMAN: Scaling 3D Human Gaussian Generation with Millions of Assets: Yuhang Yang,

Fengqi Liu,

Yixing Lu,

Qin Zhao,

Pingyu Wu,

Wei Zhai,

Ran Yi,

Yang Cao,

Lizhuang Ma,

Zheng-Jun Zha,

Junting Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Yuhang and Liu, Fengqi and Lu, Yixing and Zhao, Qin and Wu, Pingyu and Zhai, Wei and Yi, Ran and Cao, Yang and Ma, Lizhuang and Zha, Zheng-Jun and Dong, Junting}, title = {SIGMAN: Scaling 3D Human Gaussian Generation with Millions of Assets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5122-5133} }
Reusing Computation in Text-to-Image Diffusion for Efficient Generation of Image Sets: Dale Decatur,

Thibault Groueix,

Wang Yifan,

Rana Hanocka,

Vladimir Kim,

Matheus Gadelha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Decatur_2025_ICCV, author = {Decatur, Dale and Groueix, Thibault and Yifan, Wang and Hanocka, Rana and Kim, Vladimir and Gadelha, Matheus}, title = {Reusing Computation in Text-to-Image Diffusion for Efficient Generation of Image Sets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16482-16491} }
Dita: Scaling Diffusion Transformer for Generalist Vision-Language-Action Policy: Zhi Hou,

Tianyi Zhang,

Yuwen Xiong,

Haonan Duan,

Hengjun Pu,

Ronglei Tong,

Chengyang Zhao,

Xizhou Zhu,

Yu Qiao,

Jifeng Dai,

Yuntao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2025_ICCV, author = {Hou, Zhi and Zhang, Tianyi and Xiong, Yuwen and Duan, Haonan and Pu, Hengjun and Tong, Ronglei and Zhao, Chengyang and Zhu, Xizhou and Qiao, Yu and Dai, Jifeng and Chen, Yuntao}, title = {Dita: Scaling Diffusion Transformer for Generalist Vision-Language-Action Policy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7686-7697} }
Tree-NeRV: Efficient Non-Uniform Sampling for Neural Video Representation via Tree-Structured Feature Grids: Jiancheng Zhao,

Yifan Zhan,

Qingtian Zhu,

Mingze Ma,

Muyao Niu,

Zunian Wan,

Xiang Ji,

Yinqiang Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Jiancheng and Zhan, Yifan and Zhu, Qingtian and Ma, Mingze and Niu, Muyao and Wan, Zunian and Ji, Xiang and Zheng, Yinqiang}, title = {Tree-NeRV: Efficient Non-Uniform Sampling for Neural Video Representation via Tree-Structured Feature Grids}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15076-15085} }
GLEAM: Enhanced Transferable Adversarial Attacks for Vision-Language Pre-training Models via Global-Local Transformations: Yunqi Liu,

Xue Ouyang,

Xiaohui Cui; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yunqi and Ouyang, Xue and Cui, Xiaohui}, title = {GLEAM: Enhanced Transferable Adversarial Attacks for Vision-Language Pre-training Models via Global-Local Transformations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1665-1674} }
UniMLVG: Unified Framework for Multi-view Long Video Generation with Comprehensive Control Capabilities for Autonomous Driving: Rui Chen,

Zehuan Wu,

Yichen Liu,

Yuxin Guo,

Jingcheng Ni,

Haifeng Xia,

Siyu Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Rui and Wu, Zehuan and Liu, Yichen and Guo, Yuxin and Ni, Jingcheng and Xia, Haifeng and Xia, Siyu}, title = {UniMLVG: Unified Framework for Multi-view Long Video Generation with Comprehensive Control Capabilities for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25453-25463} }
CaliMatch: Adaptive Calibration for Improving Safe Semi-supervised Learning: Jinsoo Bae,

Seoung Bum Kim,

Hyungrok Do; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bae_2025_ICCV, author = {Bae, Jinsoo and Kim, Seoung Bum and Do, Hyungrok}, title = {CaliMatch: Adaptive Calibration for Improving Safe Semi-supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2867-2876} }
CogCM: Cognition-Inspired Contextual Modeling for Audio-Visual Speech Enhancement: Feixiang Wang,

Shuang Yang,

Shiguang Shan,

Xilin Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Feixiang and Yang, Shuang and Shan, Shiguang and Chen, Xilin}, title = {CogCM: Cognition-Inspired Contextual Modeling for Audio-Visual Speech Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21408-21418} }
Scalable Ranked Preference Optimization for Text-to-Image Generation: Shyamgopal Karthik,

Huseyin Coskun,

Zeynep Akata,

Sergey Tulyakov,

Jian Ren,

Anil Kag; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karthik_2025_ICCV, author = {Karthik, Shyamgopal and Coskun, Huseyin and Akata, Zeynep and Tulyakov, Sergey and Ren, Jian and Kag, Anil}, title = {Scalable Ranked Preference Optimization for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18399-18410} }
Aligning Vision to Language: Annotation-Free Multimodal Knowledge Graph Construction for Enhanced LLMs Reasoning: Junming Liu,

Siyuan Meng,

Yanting Gao,

Song Mao,

Pinlong Cai,

Guohang Yan,

Yirong Chen,

Zilin Bian,

Ding Wang,

Botian Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Junming and Meng, Siyuan and Gao, Yanting and Mao, Song and Cai, Pinlong and Yan, Guohang and Chen, Yirong and Bian, Zilin and Wang, Ding and Shi, Botian}, title = {Aligning Vision to Language: Annotation-Free Multimodal Knowledge Graph Construction for Enhanced LLMs Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {981-992} }
QR-LoRA: Efficient and Disentangled Fine-tuning via QR Decomposition for Customized Generation: Jiahui Yang,

Yongjia Ma,

Donglin Di,

Jianxun Cui,

Hao Li,

Wei Chen,

Yan Xie,

Xun Yang,

Wangmeng Zuo; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Jiahui and Ma, Yongjia and Di, Donglin and Cui, Jianxun and Li, Hao and Chen, Wei and Xie, Yan and Yang, Xun and Zuo, Wangmeng}, title = {QR-LoRA: Efficient and Disentangled Fine-tuning via QR Decomposition for Customized Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17587-17597} }
Uncertainty-Aware Diffusion-Guided Refinement of 3D Scenes: Sarosij Bose,

Arindam Dutta,

Sayak Nag,

Junge Zhang,

Jiachen Li,

Konstantinos Karydis,

Amit K. Roy-Chowdhury; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bose_2025_ICCV, author = {Bose, Sarosij and Dutta, Arindam and Nag, Sayak and Zhang, Junge and Li, Jiachen and Karydis, Konstantinos and Roy-Chowdhury, Amit K.}, title = {Uncertainty-Aware Diffusion-Guided Refinement of 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28271-28281} }
GestureLSM: Latent Shortcut based Co-Speech Gesture Generation with Spatial-Temporal Modeling: Pinxin Liu,

Luchuan Song,

Junhua Huang,

Haiyang Liu,

Chenliang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Pinxin and Song, Luchuan and Huang, Junhua and Liu, Haiyang and Xu, Chenliang}, title = {GestureLSM: Latent Shortcut based Co-Speech Gesture Generation with Spatial-Temporal Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10929-10939} }
LLaFEA: Frame-Event Complementary Fusion for Fine-Grained Spatiotemporal Understanding in LMMs: Hanyu Zhou,

Gim Hee Lee; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Hanyu and Lee, Gim Hee}, title = {LLaFEA: Frame-Event Complementary Fusion for Fine-Grained Spatiotemporal Understanding in LMMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22294-22304} }
GCAV: A Global Concept Activation Vector Framework for Cross-Layer Consistency in Interpretability: Zhenghao He,

Sanchit Sinha,

Guangzhi Xiong,

Aidong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Zhenghao and Sinha, Sanchit and Xiong, Guangzhi and Zhang, Aidong}, title = {GCAV: A Global Concept Activation Vector Framework for Cross-Layer Consistency in Interpretability}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {614-623} }
FastPoint: Accelerating 3D Point Cloud Model Inference via Sample Point Distance Prediction: Donghyun Lee,

Dawoon Jeong,

Jae W. Lee,

Hongil Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Donghyun and Jeong, Dawoon and Lee, Jae W. and Yoon, Hongil}, title = {FastPoint: Accelerating 3D Point Cloud Model Inference via Sample Point Distance Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25114-25123} }
GSRecon: Efficient Generalizable Gaussian Splatting for Surface Reconstruction from Sparse Views: Hang Yang,

Le Hui,

Jianjun Qian,

Jin Xie,

Jian Yang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Hang and Hui, Le and Qian, Jianjun and Xie, Jin and Yang, Jian}, title = {GSRecon: Efficient Generalizable Gaussian Splatting for Surface Reconstruction from Sparse Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25346-25356} }
WAVE: Warp-Based View Guidance for Consistent Novel View Synthesis Using a Single Image: Jiwoo Park,

Tae Eun Choi,

Youngjun Jun,

Seong Jae Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Jiwoo and Choi, Tae Eun and Jun, Youngjun and Hwang, Seong Jae}, title = {WAVE: Warp-Based View Guidance for Consistent Novel View Synthesis Using a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11906-11915} }
Blended Point Cloud Diffusion for Localized Text-guided Shape Editing: Etai Sella,

Noam Atia,

Ron Mokady,

Hadar Averbuch-Elor; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sella_2025_ICCV, author = {Sella, Etai and Atia, Noam and Mokady, Ron and Averbuch-Elor, Hadar}, title = {Blended Point Cloud Diffusion for Localized Text-guided Shape Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19119-19129} }
Beyond [cls]: Exploring the True Potential of Masked Image Modeling Representations: Marcin Przewięźlikowski,

Randall Balestriero,

Wojciech Jasiński,

Marek Śmieja,

Bartosz Zieliński; [pdf] [supp]
[bibtex]
@InProceedings{Przewiezlikowski_2025_ICCV, author = {Przewi\k{e}\'zlikowski, Marcin and Balestriero, Randall and Jasi\'nski, Wojciech and \'Smieja, Marek and Zieli\'nski, Bartosz}, title = {Beyond [cls]: Exploring the True Potential of Masked Image Modeling Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23442-23452} }
AnimateAnyMesh: A Feed-Forward 4D Foundation Model for Text-Driven Universal Mesh Animation: Zijie Wu,

Chaohui Yu,

Fan Wang,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Zijie and Yu, Chaohui and Wang, Fan and Bai, Xiang}, title = {AnimateAnyMesh: A Feed-Forward 4D Foundation Model for Text-Driven Universal Mesh Animation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13557-13568} }
PhysRig: Differentiable Physics-Based Skinning and Rigging Framework for Realistic Articulated Object Modeling: Hao Zhang,

Haolan Xu,

Chun Feng,

Varun Jampani,

Narendra Ahuja; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hao and Xu, Haolan and Feng, Chun and Jampani, Varun and Ahuja, Narendra}, title = {PhysRig: Differentiable Physics-Based Skinning and Rigging Framework for Realistic Articulated Object Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6609-6620} }
SpikePack: Enhanced Information Flow in Spiking Neural Networks with High Hardware Compatibility: Guobin Shen,

Jindong Li,

Tenglong Li,

Dongcheng Zhao,

Yi Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_ICCV, author = {Shen, Guobin and Li, Jindong and Li, Tenglong and Zhao, Dongcheng and Zeng, Yi}, title = {SpikePack: Enhanced Information Flow in Spiking Neural Networks with High Hardware Compatibility}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23385-23395} }
Frequency-Aware Autoregressive Modeling for Efficient High-Resolution Image Synthesis: Zhuokun Chen,

Jugang Fan,

Zhuowei Yu,

Bohan Zhuang,

Mingkui Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Zhuokun and Fan, Jugang and Yu, Zhuowei and Zhuang, Bohan and Tan, Mingkui}, title = {Frequency-Aware Autoregressive Modeling for Efficient High-Resolution Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17140-17149} }
Efficient Track Anything: Yunyang Xiong,

Chong Zhou,

Xiaoyu Xiang,

Lemeng Wu,

Chenchen Zhu,

Zechun Liu,

Saksham Suri,

Balakrishnan Varadarajan,

Ramya Akula,

Forrest Iandola,

Raghuraman Krishnamoorthi,

Bilge Soran,

Vikas Chandra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2025_ICCV, author = {Xiong, Yunyang and Zhou, Chong and Xiang, Xiaoyu and Wu, Lemeng and Zhu, Chenchen and Liu, Zechun and Suri, Saksham and Varadarajan, Balakrishnan and Akula, Ramya and Iandola, Forrest and Krishnamoorthi, Raghuraman and Soran, Bilge and Chandra, Vikas}, title = {Efficient Track Anything}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11513-11524} }
VFlowOpt: A Token Pruning Framework for LMMs with Visual Information Flow-Guided Optimization: Sihan Yang,

Runsen Xu,

Chenhang Cui,

Tai Wang,

Dahua Lin,

Jiangmiao Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Sihan and Xu, Runsen and Cui, Chenhang and Wang, Tai and Lin, Dahua and Pang, Jiangmiao}, title = {VFlowOpt: A Token Pruning Framework for LMMs with Visual Information Flow-Guided Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23924-23934} }
CameraCtrl II: Dynamic Scene Exploration via Camera-controlled Video Diffusion Models: Hao He,

Ceyuan Yang,

Shanchuan Lin,

Yinghao Xu,

Meng Wei,

Liangke Gui,

Qi Zhao,

Gordon Wetzstein,

Lu Jiang,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Hao and Yang, Ceyuan and Lin, Shanchuan and Xu, Yinghao and Wei, Meng and Gui, Liangke and Zhao, Qi and Wetzstein, Gordon and Jiang, Lu and Li, Hongsheng}, title = {CameraCtrl II: Dynamic Scene Exploration via Camera-controlled Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13416-13426} }
Text-guided Visual Prompt DINO for Generic Segmentation: Yuchen Guan,

Chong Sun,

Canmiao Fu,

Zhipeng Huang,

Chun Yuan,

Chen Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Guan_2025_ICCV, author = {Guan, Yuchen and Sun, Chong and Fu, Canmiao and Huang, Zhipeng and Yuan, Chun and Li, Chen}, title = {Text-guided Visual Prompt DINO for Generic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21288-21298} }
Open-set Cross Modal Generalization via Multimodal Unified Representation: Hai Huang,

Yan Xia,

Shulei Wang,

Hanting Wang,

Minghui Fang,

Shengpeng Ji,

Sashuai Zhou,

Tao Jin,

Zhou Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Hai and Xia, Yan and Wang, Shulei and Wang, Hanting and Fang, Minghui and Ji, Shengpeng and Zhou, Sashuai and Jin, Tao and Zhao, Zhou}, title = {Open-set Cross Modal Generalization via Multimodal Unified Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {541-551} }
PLADIS: Pushing the Limits of Attention in Diffusion Models at Inference Time by Leveraging Sparsity: Kwanyoung Kim,

Byeongsu Sim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Kwanyoung and Sim, Byeongsu}, title = {PLADIS: Pushing the Limits of Attention in Diffusion Models at Inference Time by Leveraging Sparsity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16238-16248} }
GT-Loc: Unifying When and Where in Images Through a Joint Embedding Space: David G. Shatwell,

Ishan Rajendrakumar Dave,

Sirnam Swetha,

Mubarak Shah; [pdf] [supp]
[bibtex]
@InProceedings{Shatwell_2025_ICCV, author = {Shatwell, David G. and Dave, Ishan Rajendrakumar and Swetha, Sirnam and Shah, Mubarak}, title = {GT-Loc: Unifying When and Where in Images Through a Joint Embedding Space}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1-11} }
Robust Test-Time Adaptation for Single Image Denoising Using Deep Gaussian Prior: Qing Ma,

Pengwei Liang,

Xiong Zhou,

Jiayi Ma,

Junjun Jiang,

Zhe Peng; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Qing and Liang, Pengwei and Zhou, Xiong and Ma, Jiayi and Jiang, Junjun and Peng, Zhe}, title = {Robust Test-Time Adaptation for Single Image Denoising Using Deep Gaussian Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11230-11240} }
MATE: Motion-Augmented Temporal Consistency for Event-based Point Tracking: Han Han,

Wei Zhai,

Yang Cao,

Bin Li,

Zheng-jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Han and Zhai, Wei and Cao, Yang and Li, Bin and Zha, Zheng-jun}, title = {MATE: Motion-Augmented Temporal Consistency for Event-based Point Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8340-8349} }
Diffusion-Based Extreme High-speed Scenes Reconstruction with the Complementary Vision Sensor: Yapeng Meng,

Yihan Lin,

Taoyi Wang,

Yuguo Chen,

Lijian Wang,

Rong Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Meng_2025_ICCV, author = {Meng, Yapeng and Lin, Yihan and Wang, Taoyi and Chen, Yuguo and Wang, Lijian and Zhao, Rong}, title = {Diffusion-Based Extreme High-speed Scenes Reconstruction with the Complementary Vision Sensor}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5701-5710} }
RnGCam: High-speed video from rolling & global shutter measurements: Kevin Tandi,

Xiang Dai,

Chinmay Talegaonkar,

Gal Mishne,

Nick Antipa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tandi_2025_ICCV, author = {Tandi, Kevin and Dai, Xiang and Talegaonkar, Chinmay and Mishne, Gal and Antipa, Nick}, title = {RnGCam: High-speed video from rolling \& global shutter measurements}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8830-8840} }
OV3D-CG: Open-vocabulary 3D Instance Segmentation with Contextual Guidance: Mingquan Zhou,

Chen He,

Ruiping Wang,

Xilin Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Mingquan and He, Chen and Wang, Ruiping and Chen, Xilin}, title = {OV3D-CG: Open-vocabulary 3D Instance Segmentation with Contextual Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5305-5314} }
FG-OrIU: Towards Better Forgetting via Feature-Gradient Orthogonality for Incremental Unlearning: Qian Feng,

JiaHang Tu,

Mintong Kang,

Hanbin Zhao,

Chao Zhang,

Hui Qian; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Qian and Tu, JiaHang and Kang, Mintong and Zhao, Hanbin and Zhang, Chao and Qian, Hui}, title = {FG-OrIU: Towards Better Forgetting via Feature-Gradient Orthogonality for Incremental Unlearning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1957-1967} }
InfiniCube: Unbounded and Controllable Dynamic 3D Driving Scene Generation with World-Guided Video Models: Yifan Lu,

Xuanchi Ren,

Jiawei Yang,

Tianchang Shen,

Zhangjie Wu,

Jun Gao,

Yue Wang,

Siheng Chen,

Mike Chen,

Sanja Fidler,

Jiahui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Yifan and Ren, Xuanchi and Yang, Jiawei and Shen, Tianchang and Wu, Zhangjie and Gao, Jun and Wang, Yue and Chen, Siheng and Chen, Mike and Fidler, Sanja and Huang, Jiahui}, title = {InfiniCube: Unbounded and Controllable Dynamic 3D Driving Scene Generation with World-Guided Video Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27272-27283} }
Scene Graph Guided Generation: Enable Accurate Relations Generation in Text-to-Image Models via Textural Rectification: Guibao Shen,

Luozhou Wang,

Jiantao Lin,

Wenhang Ge,

Chaozhe Zhang,

Xin Tao,

Di Zhang,

Pengfei Wan,

Guangyong Chen,

Yijun Li,

Ying-cong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_ICCV, author = {Shen, Guibao and Wang, Luozhou and Lin, Jiantao and Ge, Wenhang and Zhang, Chaozhe and Tao, Xin and Zhang, Di and Wan, Pengfei and Chen, Guangyong and Li, Yijun and Chen, Ying-cong}, title = {Scene Graph Guided Generation: Enable Accurate Relations Generation in Text-to-Image Models via Textural Rectification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15437-15446} }
After the Party: Navigating the Mapping From Color to Ambient Lighting: Florin-Alexandru Vasluianu,

Tim Seizinger,

Zongwei Wu,

Radu Timofte; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vasluianu_2025_ICCV, author = {Vasluianu, Florin-Alexandru and Seizinger, Tim and Wu, Zongwei and Timofte, Radu}, title = {After the Party: Navigating the Mapping From Color to Ambient Lighting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9218-9229} }
Disentangled Clothed Avatar Generation with Layered Representation: Weitian Zhang,

Yichao Yan,

Sijing Wu,

Manwen Liao,

Xiaokang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Weitian and Yan, Yichao and Wu, Sijing and Liao, Manwen and Yang, Xiaokang}, title = {Disentangled Clothed Avatar Generation with Layered Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11327-11338} }
Teleportraits: Training-Free People Insertion into Any Scene: Jialu Gao,

K J Joseph,

Fernando De La Torre; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Jialu and Joseph, K J and De La Torre, Fernando}, title = {Teleportraits: Training-Free People Insertion into Any Scene}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18866-18875} }
DepthSync: Diffusion Guidance-Based Depth Synchronization for Scale- and Geometry-Consistent Video Depth Estimation: Yue-Jiang Dong,

Wang Zhao,

Jiale Xu,

Ying Shan,

Song-Hai Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Yue-Jiang and Zhao, Wang and Xu, Jiale and Shan, Ying and Zhang, Song-Hai}, title = {DepthSync: Diffusion Guidance-Based Depth Synchronization for Scale- and Geometry-Consistent Video Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5415-5425} }
InfoBridge: Balanced Multimodal Integration through Conditional Dependency Modeling: Chenxin Li,

Yifan Liu,

Panwang Pan,

Hengyu Liu,

Xinyu Liu,

Wuyang Li,

Cheng Wang,

Weihao Yu,

Yiyang Lin,

Yixuan Yuan; [pdf]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Chenxin and Liu, Yifan and Pan, Panwang and Liu, Hengyu and Liu, Xinyu and Li, Wuyang and Wang, Cheng and Yu, Weihao and Lin, Yiyang and Yuan, Yixuan}, title = {InfoBridge: Balanced Multimodal Integration through Conditional Dependency Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {393-404} }
Towards Explicit Exoskeleton for the Reconstruction of Complicated 3D Human Avatars: Yifan Zhan,

Qingtian Zhu,

Muyao Niu,

Mingze Ma,

Jiancheng Zhao,

Zhihang Zhong,

Xiao Sun,

Yu Qiao,

Yinqiang Zheng; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhan_2025_ICCV, author = {Zhan, Yifan and Zhu, Qingtian and Niu, Muyao and Ma, Mingze and Zhao, Jiancheng and Zhong, Zhihang and Sun, Xiao and Qiao, Yu and Zheng, Yinqiang}, title = {Towards Explicit Exoskeleton for the Reconstruction of Complicated 3D Human Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14259-14269} }
CABLD: Contrast-Agnostic Brain Landmark Detection with Consistency-Based Regularization: Soorena Salari,

Arash Harirpoush,

Hassan Rivaz,

Yiming Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Salari_2025_ICCV, author = {Salari, Soorena and Harirpoush, Arash and Rivaz, Hassan and Xiao, Yiming}, title = {CABLD: Contrast-Agnostic Brain Landmark Detection with Consistency-Based Regularization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20991-21002} }
One Object, Multiple Lies: A Benchmark for Cross-task Adversarial Attack on Unified Vision-Language Models: Jiale Zhao,

Xinyang Jiang,

Junyao Gao,

Yuhao Xue,

Cairong Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Jiale and Jiang, Xinyang and Gao, Junyao and Xue, Yuhao and Zhao, Cairong}, title = {One Object, Multiple Lies: A Benchmark for Cross-task Adversarial Attack on Unified Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {187-196} }
A Differentiable Wave Optics Model for End-to-End Computational Imaging System Optimization: Chi-Jui Ho,

Yash Belhe,

Steve Rotenberg,

Ravi Ramamoorthi,

Tzu-Mao Li,

Nicholas Antipa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ho_2025_ICCV, author = {Ho, Chi-Jui and Belhe, Yash and Rotenberg, Steve and Ramamoorthi, Ravi and Li, Tzu-Mao and Antipa, Nicholas}, title = {A Differentiable Wave Optics Model for End-to-End Computational Imaging System Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28042-28051} }
CHROME: Clothed Human Reconstruction with Occlusion-Resilience and Multiview-Consistency from a Single Image: Arindam Dutta,

Meng Zheng,

Zhongpai Gao,

Benjamin Planche,

Anwesa Choudhuri,

Terrence Chen,

Amit K. Roy-Chowdhury,

Ziyan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dutta_2025_ICCV, author = {Dutta, Arindam and Zheng, Meng and Gao, Zhongpai and Planche, Benjamin and Choudhuri, Anwesa and Chen, Terrence and Roy-Chowdhury, Amit K. and Wu, Ziyan}, title = {CHROME: Clothed Human Reconstruction with Occlusion-Resilience and Multiview-Consistency from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9124-9135} }
H3R: Hybrid Multi-view Correspondence for Generalizable 3D Reconstruction: Heng Jia,

Linchao Zhu,

Na Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2025_ICCV, author = {Jia, Heng and Zhu, Linchao and Zhao, Na}, title = {H3R: Hybrid Multi-view Correspondence for Generalizable 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7655-7665} }
Capturing head avatar with hand contacts from a monocular video: Haonan He,

Yufeng Zheng,

Jie Song; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Haonan and Zheng, Yufeng and Song, Jie}, title = {Capturing head avatar with hand contacts from a monocular video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13099-13108} }
DoppDrive: Doppler-Driven Temporal Aggregation for Improved Radar Object Detection: Yuval Haitman,

Oded Bialer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Haitman_2025_ICCV, author = {Haitman, Yuval and Bialer, Oded}, title = {DoppDrive: Doppler-Driven Temporal Aggregation for Improved Radar Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26085-26094} }
3D Gaussian Map with Open-Set Semantic Grouping for Vision-Language Navigation: Jianzhe Gao,

Rui Liu,

Wenguan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Jianzhe and Liu, Rui and Wang, Wenguan}, title = {3D Gaussian Map with Open-Set Semantic Grouping for Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9252-9262} }
M-SpecGene: Generalized Foundation Model for RGBT Multispectral Vision: Kailai Zhou,

Fuqiang Yang,

Shixian Wang,

Bihan Wen,

Chongde Zi,

Linsen Chen,

Qiu Shen,

Xun Cao; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Kailai and Yang, Fuqiang and Wang, Shixian and Wen, Bihan and Zi, Chongde and Chen, Linsen and Shen, Qiu and Cao, Xun}, title = {M-SpecGene: Generalized Foundation Model for RGBT Multispectral Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7861-7872} }
YOLOE: Real-Time Seeing Anything: Ao Wang,

Lihao Liu,

Hui Chen,

Zijia Lin,

Jungong Han,

Guiguang Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ao and Liu, Lihao and Chen, Hui and Lin, Zijia and Han, Jungong and Ding, Guiguang}, title = {YOLOE: Real-Time Seeing Anything}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24591-24602} }
Joint Learning of Pose Regression and Denoising Diffusion with Score Scaling Sampling for Category-level 6D Pose Estimation: Seunghyun Lee,

Tae-Kyun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Seunghyun and Kim, Tae-Kyun}, title = {Joint Learning of Pose Regression and Denoising Diffusion with Score Scaling Sampling for Category-level 6D Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5757-5768} }
Beyond Text-Visual Attention: Exploiting Visual Cues for Effective Token Pruning in VLMs: Qizhe Zhang,

Aosong Cheng,

Ming Lu,

Renrui Zhang,

Zhiyong Zhuo,

Jiajun Cao,

Shaobo Guo,

Qi She,

Shanghang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Qizhe and Cheng, Aosong and Lu, Ming and Zhang, Renrui and Zhuo, Zhiyong and Cao, Jiajun and Guo, Shaobo and She, Qi and Zhang, Shanghang}, title = {Beyond Text-Visual Attention: Exploiting Visual Cues for Effective Token Pruning in VLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20857-20867} }
Event-guided HDR Reconstruction with Diffusion Priors: Yixin Yang,

Jiawei Zhang,

Yang Zhang,

Yunxuan Wei,

Dongqing Zou,

Jimmy S. Ren,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Yixin and Zhang, Jiawei and Zhang, Yang and Wei, Yunxuan and Zou, Dongqing and Ren, Jimmy S. and Shi, Boxin}, title = {Event-guided HDR Reconstruction with Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11787-11796} }
Continual Adaptation: Environment-Conditional Parameter Generation for Object Detection in Dynamic Scenarios: Deng Li,

Aming Wu,

Yang Li,

Yaowei Wang,

Yahong Han; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Deng and Wu, Aming and Li, Yang and Wang, Yaowei and Han, Yahong}, title = {Continual Adaptation: Environment-Conditional Parameter Generation for Object Detection in Dynamic Scenarios}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4434-4443} }
CATP-LLM: Empowering Large Language Models for Cost-Aware Tool Planning: Duo Wu,

Jinghe Wang,

Yuan Meng,

Yanning Zhang,

Le Sun,

Zhi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Duo and Wang, Jinghe and Meng, Yuan and Zhang, Yanning and Sun, Le and Wang, Zhi}, title = {CATP-LLM: Empowering Large Language Models for Cost-Aware Tool Planning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8699-8709} }
MoMaps: Semantics-Aware Scene Motion Generation with Motion Maps: Jiahui Lei,

Kyle Genova,

George Kopanas,

Noah Snavely,

Leonidas Guibas; [pdf] [arXiv]
[bibtex]
@InProceedings{Lei_2025_ICCV, author = {Lei, Jiahui and Genova, Kyle and Kopanas, George and Snavely, Noah and Guibas, Leonidas}, title = {MoMaps: Semantics-Aware Scene Motion Generation with Motion Maps}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10022-10031} }
WarpHE4D: Dense 4D Head Map toward Full Head Reconstruction: Jongseob Yun,

Yong-Hoon Kwon,

Min-Gyu Park,

Ju-Mi Kang,

Min-Ho Lee,

Inho Chang,

Ju Hong Yoon,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Yun_2025_ICCV, author = {Yun, Jongseob and Kwon, Yong-Hoon and Park, Min-Gyu and Kang, Ju-Mi and Lee, Min-Ho and Chang, Inho and Yoon, Ju Hong and Yoon, Kuk-Jin}, title = {WarpHE4D: Dense 4D Head Map toward Full Head Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11480-11490} }
Perceive, Understand and Restore: Real-World Image Super-Resolution with Autoregressive Multimodal Generative Models: Hongyang Wei,

Shuaizheng Liu,

Chun Yuan,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Hongyang and Liu, Shuaizheng and Yuan, Chun and Zhang, Lei}, title = {Perceive, Understand and Restore: Real-World Image Super-Resolution with Autoregressive Multimodal Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18640-18650} }
TRKT: Weakly Supervised Dynamic Scene Graph Generation with Temporal-enhanced Relation-aware Knowledge Transferring: Zhu Xu,

Ting Lei,

Zhimin Li,

Guan Wang,

Qingchao Chen,

Yuxin Peng,

Yang Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Zhu and Lei, Ting and Li, Zhimin and Wang, Guan and Chen, Qingchao and Peng, Yuxin and Liu, Yang}, title = {TRKT: Weakly Supervised Dynamic Scene Graph Generation with Temporal-enhanced Relation-aware Knowledge Transferring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15812-15821} }
Progressive Distribution Bridging: Unsupervised Adaptation for Large-scale Pre-trained Models via Adaptive Auxiliary Data: Weinan He,

Yixin Zhang,

Zilei Wang; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Weinan and Zhang, Yixin and Wang, Zilei}, title = {Progressive Distribution Bridging: Unsupervised Adaptation for Large-scale Pre-trained Models via Adaptive Auxiliary Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3280-3292} }
Griffon v2: Advancing Multimodal Perception with High-Resolution Scaling and Visual-Language Co-Referring: Yufei Zhan,

Shurong Zheng,

Yousong Zhu,

Hongyin Zhao,

Fan Yang,

Ming Tang,

Jinqiao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2025_ICCV, author = {Zhan, Yufei and Zheng, Shurong and Zhu, Yousong and Zhao, Hongyin and Yang, Fan and Tang, Ming and Wang, Jinqiao}, title = {Griffon v2: Advancing Multimodal Perception with High-Resolution Scaling and Visual-Language Co-Referring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22947-22957} }
TARO: Timestep-Adaptive Representation Alignment with Onset-Aware Conditioning for Synchronized Video-to-Audio Synthesis: Tri Ton,

Ji Woo Hong,

Chang D. Yoo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ton_2025_ICCV, author = {Ton, Tri and Hong, Ji Woo and Yoo, Chang D.}, title = {TARO: Timestep-Adaptive Representation Alignment with Onset-Aware Conditioning for Synchronized Video-to-Audio Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14228-14237} }
VPR-Cloak: A First Look at Privacy Cloak Against Visual Place Recognition: Shuting Dong,

Mingzhi Chen,

Feng Lu,

Hao Yu,

Guanghao Li,

Zhe Wu,

Ming Tang,

Chun Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Shuting and Chen, Mingzhi and Lu, Feng and Yu, Hao and Li, Guanghao and Wu, Zhe and Tang, Ming and Yuan, Chun}, title = {VPR-Cloak: A First Look at Privacy Cloak Against Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7197-7208} }
SDFormer: Vision-based 3D Semantic Scene Completion via SAM-assisted Dual-channel Voxel Transformer: Yujie Xue,

Huilong Pi,

Jiapeng Zhang,

Yunchuan Qin,

Zhuo Tang,

Kenli Li,

Ruihui Li; [pdf] [supp]
[bibtex]
@InProceedings{Xue_2025_ICCV, author = {Xue, Yujie and Pi, Huilong and Zhang, Jiapeng and Qin, Yunchuan and Tang, Zhuo and Li, Kenli and Li, Ruihui}, title = {SDFormer: Vision-based 3D Semantic Scene Completion via SAM-assisted Dual-channel Voxel Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26837-26847} }
ModSkill: Physical Character Skill Modularization: Yiming Huang,

Zhiyang Dou,

Lingjie Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Yiming and Dou, Zhiyang and Liu, Lingjie}, title = {ModSkill: Physical Character Skill Modularization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12394-12404} }
Improving Multimodal Learning via Imbalanced Learning: Shicai Wei,

Chunbo Luo,

Yang Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Shicai and Luo, Chunbo and Luo, Yang}, title = {Improving Multimodal Learning via Imbalanced Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2250-2259} }
INTER: Mitigating Hallucination in Large Vision-Language Models by Interaction Guidance Sampling: Xin Dong,

Shichao Dong,

Jin Wang,

Jing Huang,

Li Zhou,

Zenghui Sun,

Lihua Jing,

Jinsong Lan,

Xiaoyong Zhu,

Bo Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Xin and Dong, Shichao and Wang, Jin and Huang, Jing and Zhou, Li and Sun, Zenghui and Jing, Lihua and Lan, Jinsong and Zhu, Xiaoyong and Zheng, Bo}, title = {INTER: Mitigating Hallucination in Large Vision-Language Models by Interaction Guidance Sampling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2534-2544} }
ForestFormer3D: A Unified Framework for End-to-End Segmentation of Forest LiDAR 3D Point Clouds: Binbin Xiang,

Maciej Wielgosz,

Stefano Puliti,

Kamil Král,

Martin Krůček,

Azim Missarov,

Rasmus Astrup; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2025_ICCV, author = {Xiang, Binbin and Wielgosz, Maciej and Puliti, Stefano and Kr\'al, Kamil and Kr\r{u}\v{c}ek, Martin and Missarov, Azim and Astrup, Rasmus}, title = {ForestFormer3D: A Unified Framework for End-to-End Segmentation of Forest LiDAR 3D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24717-24727} }
Medical World Model: Yijun Yang,

Zhao-Yang Wang,

Qiuping Liu,

Shuwen Sun,

Kang Wang,

Rama Chellappa,

Zongwei Zhou,

Alan Yuille,

Lei Zhu,

Yu-Dong Zhang,

Jieneng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Yijun and Wang, Zhao-Yang and Liu, Qiuping and Sun, Shuwen and Wang, Kang and Chellappa, Rama and Zhou, Zongwei and Yuille, Alan and Zhu, Lei and Zhang, Yu-Dong and Chen, Jieneng}, title = {Medical World Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8319-8329} }
AccidentalGS: 3D Gaussian Splatting from Accidental Camera Motion: Mao Mao,

Xujie Shen,

Guyuan Chen,

Boming Zhao,

Jiarui Hu,

Hujun Bao,

Zhaopeng Cui; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2025_ICCV, author = {Mao, Mao and Shen, Xujie and Chen, Guyuan and Zhao, Boming and Hu, Jiarui and Bao, Hujun and Cui, Zhaopeng}, title = {AccidentalGS: 3D Gaussian Splatting from Accidental Camera Motion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27445-27455} }
RoMo: Robust Motion Segmentation Improves Structure from Motion: Lily Goli,

Sara Sabour,

Mark Matthews,

Marcus A. Brubaker,

Dmitry Lagun,

Alec Jacobson,

David J. Fleet,

Saurabh Saxena,

Andrea Tagliasacchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Goli_2025_ICCV, author = {Goli, Lily and Sabour, Sara and Matthews, Mark and Brubaker, Marcus A. and Lagun, Dmitry and Jacobson, Alec and Fleet, David J. and Saxena, Saurabh and Tagliasacchi, Andrea}, title = {RoMo: Robust Motion Segmentation Improves Structure from Motion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6155-6164} }
CAFA: a Controllable Automatic Foley Artist: Roi Benita,

Michael Finkelson,

Tavi Halperin,

Gleb Sterkin,

Yossi Adi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Benita_2025_ICCV, author = {Benita, Roi and Finkelson, Michael and Halperin, Tavi and Sterkin, Gleb and Adi, Yossi}, title = {CAFA: a Controllable Automatic Foley Artist}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15917-15926} }
Neuroverse3D: Developing In-Context Learning Universal Model for Neuroimaging in 3D: Jiesi Hu,

Hanyang Peng,

Yanwu Yang,

Xutao Guo,

Yang Shang,

Pengcheng Shi,

Chenfei Ye,

Ting Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Jiesi and Peng, Hanyang and Yang, Yanwu and Guo, Xutao and Shang, Yang and Shi, Pengcheng and Ye, Chenfei and Ma, Ting}, title = {Neuroverse3D: Developing In-Context Learning Universal Model for Neuroimaging in 3D}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21721-21731} }
ProSAM: Enhancing the Robustness of SAM-based Visual Reference Segmentation with Probabilistic Prompts: Xiaoqi Wang,

Clint Sebastian,

Wenbin He,

Liu Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xiaoqi and Sebastian, Clint and He, Wenbin and Ren, Liu}, title = {ProSAM: Enhancing the Robustness of SAM-based Visual Reference Segmentation with Probabilistic Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20487-20496} }
TF-TI2I: Training-Free Text-and-Image-to-Image Generation via Multi-Modal Implicit-Context Learning In Text-to-Image Models: Teng-Fang Hsiao,

Bo-Kai Ruan,

Yi-Lun Wu,

Tzu-Ling Lin,

Hong-Han Shuai; [pdf] [supp]
[bibtex]
@InProceedings{Hsiao_2025_ICCV, author = {Hsiao, Teng-Fang and Ruan, Bo-Kai and Wu, Yi-Lun and Lin, Tzu-Ling and Shuai, Hong-Han}, title = {TF-TI2I: Training-Free Text-and-Image-to-Image Generation via Multi-Modal Implicit-Context Learning In Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18377-18387} }
Allowing Oscillation Quantization: Overcoming Solution Space Limitation in Low Bit-Width Quantization: Weiying Xie,

Zihan Meng,

Jitao Ma,

Wenjin Guo,

Haowei Li,

Haonan Qin,

Leyuan Fang,

Yunsong Li; [pdf]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Weiying and Meng, Zihan and Ma, Jitao and Guo, Wenjin and Li, Haowei and Qin, Haonan and Fang, Leyuan and Li, Yunsong}, title = {Allowing Oscillation Quantization: Overcoming Solution Space Limitation in Low Bit-Width Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24615-24624} }
CARP: Visuomotor Policy Learning via Coarse-to-Fine Autoregressive Prediction: Zhefei Gong,

Pengxiang Ding,

Shangke Lyu,

Siteng Huang,

Mingyang Sun,

Wei Zhao,

Zhaoxin Fan,

Donglin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2025_ICCV, author = {Gong, Zhefei and Ding, Pengxiang and Lyu, Shangke and Huang, Siteng and Sun, Mingyang and Zhao, Wei and Fan, Zhaoxin and Wang, Donglin}, title = {CARP: Visuomotor Policy Learning via Coarse-to-Fine Autoregressive Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13460-13470} }
SAMora: Enhancing SAM through Hierarchical Self-Supervised Pre-Training for Medical Images: Shuhang Chen,

Hangjie Yuan,

Pengwei Liu,

Hanxue Gu,

Tao Feng,

Dong Ni; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Shuhang and Yuan, Hangjie and Liu, Pengwei and Gu, Hanxue and Feng, Tao and Ni, Dong}, title = {SAMora: Enhancing SAM through Hierarchical Self-Supervised Pre-Training for Medical Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21209-21219} }
MagicMotion: Controllable Video Generation with Dense-to-Sparse Trajectory Guidance: Quanhao Li,

Zhen Xing,

Rui Wang,

Hui Zhang,

Qi Dai,

Zuxuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Quanhao and Xing, Zhen and Wang, Rui and Zhang, Hui and Dai, Qi and Wu, Zuxuan}, title = {MagicMotion: Controllable Video Generation with Dense-to-Sparse Trajectory Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12112-12123} }
Less Static, More Private: Towards Transferable Privacy-Preserving Action Recognition by Generative Decoupled Learning: Zhi-Wei Xia,

Kun-Yu Lin,

Yuan-Ming Li,

Wei-Jin Huang,

Xian-Tuo Tan,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Xia_2025_ICCV, author = {Xia, Zhi-Wei and Lin, Kun-Yu and Li, Yuan-Ming and Huang, Wei-Jin and Tan, Xian-Tuo and Zheng, Wei-Shi}, title = {Less Static, More Private: Towards Transferable Privacy-Preserving Action Recognition by Generative Decoupled Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12894-12903} }
PathDiff: Histopathology Image Synthesis with Unpaired Text and Mask Conditions: Mahesh Bhosale,

Abdul Wasi,

Yuanhao Zhai,

Yunjie Tian,

Samuel Border,

Nan Xi,

Pinaki Sarder,

Junsong Yuan,

David Doermann,

Xuan Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bhosale_2025_ICCV, author = {Bhosale, Mahesh and Wasi, Abdul and Zhai, Yuanhao and Tian, Yunjie and Border, Samuel and Xi, Nan and Sarder, Pinaki and Yuan, Junsong and Doermann, David and Gong, Xuan}, title = {PathDiff: Histopathology Image Synthesis with Unpaired Text and Mask Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22415-22424} }
Robust Low-light Scene Restoration via Illumination Transition: Ze Li,

Feng Zhang,

Xiatian Zhu,

Meng Zhang,

Yanghong Zhou,

P. Y. Mok; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Ze and Zhang, Feng and Zhu, Xiatian and Zhang, Meng and Zhou, Yanghong and Mok, P. Y.}, title = {Robust Low-light Scene Restoration via Illumination Transition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6188-6197} }
GVDepth: Zero-Shot Monocular Depth Estimation for Ground Vehicles based on Probabilistic Cue Fusion: Karlo Koledić,

Luka Petrović,

Ivan Marković,

Ivan Petrović; [pdf] [supp]
[bibtex]
@InProceedings{Koledic_2025_ICCV, author = {Koledi\'c, Karlo and Petrovi\'c, Luka and Markovi\'c, Ivan and Petrovi\'c, Ivan}, title = {GVDepth: Zero-Shot Monocular Depth Estimation for Ground Vehicles based on Probabilistic Cue Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26126-26135} }
VISION-XL: High Definition Video Inverse Problem Solver using Latent Image Diffusion Models: Taesung Kwon,

Jong Chul Ye; [pdf] [supp]
[bibtex]
@InProceedings{Kwon_2025_ICCV, author = {Kwon, Taesung and Ye, Jong Chul}, title = {VISION-XL: High Definition Video Inverse Problem Solver using Latent Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10465-10474} }
Robin3D: Improving 3D Large Language Model via Robust Instruction Tuning: Weitai Kang,

Haifeng Huang,

Yuzhang Shang,

Mubarak Shah,

Yan Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_ICCV, author = {Kang, Weitai and Huang, Haifeng and Shang, Yuzhang and Shah, Mubarak and Yan, Yan}, title = {Robin3D: Improving 3D Large Language Model via Robust Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3905-3915} }
Mixture of Experts Guided by Gaussian Splatters Matters: A new Approach to Weakly-Supervised Video Anomaly Detection: Giacomo D' Amicantonio,

Snehashis Majhi,

Quan Kong,

Lorenzo Garattoni,

Gianpiero Francesca,

Francois Bremond,

Egor Bondarev; [pdf] [supp]
[bibtex]
@InProceedings{Amicantonio_2025_ICCV, author = {Amicantonio, Giacomo D' and Majhi, Snehashis and Kong, Quan and Garattoni, Lorenzo and Francesca, Gianpiero and Bremond, Francois and Bondarev, Egor}, title = {Mixture of Experts Guided by Gaussian Splatters Matters: A new Approach to Weakly-Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10275-10285} }
ProJudge: A Multi-Modal Multi-Discipline Benchmark and Instruction-Tuning Dataset for MLLM-based Process Judges: Jiaxin Ai,

Pengfei Zhou,

Zhaopan Xu,

Ming Li,

Fanrui Zhang,

Zizhen Li,

Jianwen Sun,

Yukang Feng,

Baojin Huang,

Zhongyuan Wang,

Kaipeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ai_2025_ICCV, author = {Ai, Jiaxin and Zhou, Pengfei and Xu, Zhaopan and Li, Ming and Zhang, Fanrui and Li, Zizhen and Sun, Jianwen and Feng, Yukang and Huang, Baojin and Wang, Zhongyuan and Zhang, Kaipeng}, title = {ProJudge: A Multi-Modal Multi-Discipline Benchmark and Instruction-Tuning Dataset for MLLM-based Process Judges}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4681-4690} }
GloPER: Unsupervised Animal Pattern Extraction from Local Reconstruction: Bowen Chen,

Yun Sing Koh,

Gillian Dobbie; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Bowen and Koh, Yun Sing and Dobbie, Gillian}, title = {GloPER: Unsupervised Animal Pattern Extraction from Local Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6519-6529} }
One Trajectory, One Token: Grounded Video Tokenization via Panoptic Sub-object Trajectory: Chenhao Zheng,

Jieyu Zhang,

Mohammadreza Salehi,

Ziqi Gao,

Vishnu Iyengar,

Norimasa Kobori,

Quan Kong,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Chenhao and Zhang, Jieyu and Salehi, Mohammadreza and Gao, Ziqi and Iyengar, Vishnu and Kobori, Norimasa and Kong, Quan and Krishna, Ranjay}, title = {One Trajectory, One Token: Grounded Video Tokenization via Panoptic Sub-object Trajectory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23156-23166} }
Kaleidoscopic Background Attack: Disrupting Pose Estimation with Multi-Fold Radial Symmetry Textures: Xinlong Ding,

Hongwei Yu,

Jiawei Li,

Feifan Li,

Yu Shang,

Bochao Zou,

Huimin Ma,

Jiansheng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2025_ICCV, author = {Ding, Xinlong and Yu, Hongwei and Li, Jiawei and Li, Feifan and Shang, Yu and Zou, Bochao and Ma, Huimin and Chen, Jiansheng}, title = {Kaleidoscopic Background Attack: Disrupting Pose Estimation with Multi-Fold Radial Symmetry Textures}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28483-28492} }
COSTARR: Consolidated Open Set Technique with Attenuation for Robust Recognition: Ryan Rabinowitz,

Steve Cruz,

Walter Scheirer,

Terrance E. Boult; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rabinowitz_2025_ICCV, author = {Rabinowitz, Ryan and Cruz, Steve and Scheirer, Walter and Boult, Terrance E.}, title = {COSTARR: Consolidated Open Set Technique with Attenuation for Robust Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4146-4155} }
Agreement aware and dissimilarity oriented GLOM: Ru Zeng,

Yan Song,

Yang Zhang,

Yanling Hu,

Hui Yu; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Ru and Song, Yan and Zhang, Yang and Hu, Yanling and Yu, Hui}, title = {Agreement aware and dissimilarity oriented GLOM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24351-24359} }
Att-Adapter: A Robust and Precise Domain-Specific Multi-Attributes T2I Diffusion Adapter via Conditional Variational Autoencoder: Wonwoong Cho,

Yan-Ying Chen,

Matthew Klenk,

David I. Inouye,

Yanxia Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2025_ICCV, author = {Cho, Wonwoong and Chen, Yan-Ying and Klenk, Matthew and Inouye, David I. and Zhang, Yanxia}, title = {Att-Adapter: A Robust and Precise Domain-Specific Multi-Attributes T2I Diffusion Adapter via Conditional Variational Autoencoder}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15626-15635} }
PERSONA: Personalized Whole-Body 3D Avatar with Pose-Driven Deformations from a Single Image: Geonhee Sim,

Gyeongsik Moon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sim_2025_ICCV, author = {Sim, Geonhee and Moon, Gyeongsik}, title = {PERSONA: Personalized Whole-Body 3D Avatar with Pose-Driven Deformations from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12670-12680} }
PanoSplatt3R: Leveraging Perspective Pretraining for Generalized Unposed Wide-Baseline Panorama Reconstruction: Jiahui Ren,

Mochu Xiang,

Jiajun Zhu,

Yuchao Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Jiahui and Xiang, Mochu and Zhu, Jiajun and Dai, Yuchao}, title = {PanoSplatt3R: Leveraging Perspective Pretraining for Generalized Unposed Wide-Baseline Panorama Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28959-28969} }
CCMNet: Leveraging Calibrated Color Correction Matrices for Cross-Camera Color Constancy: Dongyoung Kim,

Mahmoud Afifi,

Dongyun Kim,

Michael S. Brown,

Seon Joo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Dongyoung and Afifi, Mahmoud and Kim, Dongyun and Brown, Michael S. and Kim, Seon Joo}, title = {CCMNet: Leveraging Calibrated Color Correction Matrices for Cross-Camera Color Constancy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6198-6208} }
Seeing 3D Through 2D Lenses: 3D Few-Shot Class-Incremental Learning via Cross-Modal Geometric Rectification: Tuo Xiang,

Xuemiao Xu,

Bangzhen Liu,

Jinyi Li,

Yong Li,

Shengfeng He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2025_ICCV, author = {Xiang, Tuo and Xu, Xuemiao and Liu, Bangzhen and Li, Jinyi and Li, Yong and He, Shengfeng}, title = {Seeing 3D Through 2D Lenses: 3D Few-Shot Class-Incremental Learning via Cross-Modal Geometric Rectification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6761-6771} }
Secure On-Device Video OOD Detection Without Backpropagation: Shawn Li,

Peilin Cai,

Yuxiao Zhou,

Zhiyu Ni,

Renjie Liang,

You Qin,

Yi Nian,

Zhengzhong Tu,

Xiyang Hu,

Yue Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Shawn and Cai, Peilin and Zhou, Yuxiao and Ni, Zhiyu and Liang, Renjie and Qin, You and Nian, Yi and Tu, Zhengzhong and Hu, Xiyang and Zhao, Yue}, title = {Secure On-Device Video OOD Detection Without Backpropagation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {112-121} }
SmolDocling: An ultra-compact vision-language model for end-to-end multi-modal document conversion: Ahmed Nassar,

Matteo Omenetti,

Maksym Lysak,

Nikolaos Livathinos,

Christoph Auer,

Lucas Morin,

Rafael Teixeira de Lima,

Yusik Kim,

A. Said Gurbuz,

Michele Dolfi,

Peter W. J. Staar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nassar_2025_ICCV, author = {Nassar, Ahmed and Omenetti, Matteo and Lysak, Maksym and Livathinos, Nikolaos and Auer, Christoph and Morin, Lucas and de Lima, Rafael Teixeira and Kim, Yusik and Gurbuz, A. Said and Dolfi, Michele and Staar, Peter W. J.}, title = {SmolDocling: An ultra-compact vision-language model for end-to-end multi-modal document conversion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21972-21983} }
Zero-Shot Composed Image Retrieval via Dual-Stream Instruction-Aware Distillation: Wenliang Zhong,

Rob Barton,

Weizhi An,

Feng Jiang,

Hehuan Ma,

Yuzhi Guo,

Abhishek Dan,

Shioulin Sam,

Karim Bouyarmane,

Junzhou Huang; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Wenliang and Barton, Rob and An, Weizhi and Jiang, Feng and Ma, Hehuan and Guo, Yuzhi and Dan, Abhishek and Sam, Shioulin and Bouyarmane, Karim and Huang, Junzhou}, title = {Zero-Shot Composed Image Retrieval via Dual-Stream Instruction-Aware Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22221-22231} }
Underwater Visual SLAM with Depth Uncertainty and Medium Modeling: Rui Liu,

Sheng Fan,

Wenguan Wang,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Rui and Fan, Sheng and Wang, Wenguan and Yang, Yi}, title = {Underwater Visual SLAM with Depth Uncertainty and Medium Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {970-980} }
Blind Video Super-Resolution based on Implicit Kernels: Qiang Zhu,

Yuxuan Jiang,

Shuyuan Zhu,

Fan Zhang,

David Bull,

Bing Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Qiang and Jiang, Yuxuan and Zhu, Shuyuan and Zhang, Fan and Bull, David and Zeng, Bing}, title = {Blind Video Super-Resolution based on Implicit Kernels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10971-10981} }
AMDANet: Attention-Driven Multi-Perspective Discrepancy Alignment for RGB-Infrared Image Fusion and Segmentation: Haifeng Zhong,

Fan Tang,

Zhuo Chen,

Hyung Jin Chang,

Yixing Gao; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Haifeng and Tang, Fan and Chen, Zhuo and Chang, Hyung Jin and Gao, Yixing}, title = {AMDANet: Attention-Driven Multi-Perspective Discrepancy Alignment for RGB-Infrared Image Fusion and Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10645-10655} }
Local Scale Equivariance with Latent Deep Equilibrium Canonicalizer: Md Ashiqur Rahman,

Chiao-An Yang,

Michael N. Cheng,

Lim Jun Hao,

Jeremiah Jiang,

Teck-Yian Lim,

Raymond A. Yeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rahman_2025_ICCV, author = {Rahman, Md Ashiqur and Yang, Chiao-An and Cheng, Michael N. and Hao, Lim Jun and Jiang, Jeremiah and Lim, Teck-Yian and Yeh, Raymond A.}, title = {Local Scale Equivariance with Latent Deep Equilibrium Canonicalizer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10527-10537} }
Auxiliary Prompt Tuning of Vision-Language Models for Few-Shot Out-of-Distribution Detection: Wenjun Miao,

Guansong Pang,

Zihan Wang,

Jin Zheng,

Xiao Bai; [pdf]
[bibtex]
@InProceedings{Miao_2025_ICCV, author = {Miao, Wenjun and Pang, Guansong and Wang, Zihan and Zheng, Jin and Bai, Xiao}, title = {Auxiliary Prompt Tuning of Vision-Language Models for Few-Shot Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4776-4785} }
TACO: Taming Diffusion for in-the-wild Video Amodal Completion: Ruijie Lu,

Yixin Chen,

Yu Liu,

Jiaxiang Tang,

Junfeng Ni,

Diwen Wan,

Gang Zeng,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Ruijie and Chen, Yixin and Liu, Yu and Tang, Jiaxiang and Ni, Junfeng and Wan, Diwen and Zeng, Gang and Huang, Siyuan}, title = {TACO: Taming Diffusion for in-the-wild Video Amodal Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13638-13650} }
UniGS: Modeling Unitary 3D Gaussians for Novel View Synthesis from Sparse-view Images: Jiamin Wu,

Kenkun Liu,

Xiaoke Jiang,

Yuan Yao,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Jiamin and Liu, Kenkun and Jiang, Xiaoke and Yao, Yuan and Zhang, Lei}, title = {UniGS: Modeling Unitary 3D Gaussians for Novel View Synthesis from Sparse-view Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26241-26251} }
ObjectGS: Object-aware Scene Reconstruction and Scene Understanding via Gaussian Splatting: Ruijie Zhu,

Mulin Yu,

Linning Xu,

Lihan Jiang,

Yixuan Li,

Tianzhu Zhang,

Jiangmiao Pang,

Bo Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Ruijie and Yu, Mulin and Xu, Linning and Jiang, Lihan and Li, Yixuan and Zhang, Tianzhu and Pang, Jiangmiao and Dai, Bo}, title = {ObjectGS: Object-aware Scene Reconstruction and Scene Understanding via Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8350-8360} }
SP2T: Sparse Proxy Attention for Dual-stream Point Transformer: Jiaxu Wan,

Hong Zhang,

Ziqi He,

Yangyan Deng,

Qishu Wang,

Ding Yuan,

Yifan Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wan_2025_ICCV, author = {Wan, Jiaxu and Zhang, Hong and He, Ziqi and Deng, Yangyan and Wang, Qishu and Yuan, Ding and Yang, Yifan}, title = {SP2T: Sparse Proxy Attention for Dual-stream Point Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27885-27895} }
Scoring, Remember, and Reference: Catching Camouflaged Objects in Videos: Yu'ang Feng,

Shuyong Gao,

Fuzhen Yan,

Yicheng Song,

Lingyi Hong,

Junjie Hu,

Wenqiang Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Yu'ang and Gao, Shuyong and Yan, Fuzhen and Song, Yicheng and Hong, Lingyi and Hu, Junjie and Zhang, Wenqiang}, title = {Scoring, Remember, and Reference: Catching Camouflaged Objects in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13043-13052} }
Task-Oriented Human Grasp Synthesis via Context- and Task-Aware Diffusers: An-Lun Liu,

Yu-Wei Chao,

Yi-Ting Chen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, An-Lun and Chao, Yu-Wei and Chen, Yi-Ting}, title = {Task-Oriented Human Grasp Synthesis via Context- and Task-Aware Diffusers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10375-10385} }; Back