Papers
- Back
kh: Symmetry Understanding of 3D Shapes via Chirality Disentanglement-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Weikang and Wei{\ss}berg, Tobias and El Amrani, Nafie and Bernard, Florian}, title = {kh: Symmetry Understanding of 3D Shapes via Chirality Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28292-28302} }
Efficient Adaptation of Pre-trained Vision Transformer underpinned by Approximately Orthogonal Fine-Tuning Strategy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Yiting and Luo, Hao and Sun, Yuan and Yan, Qingsen and Zhang, Haokui and Dong, Wei and Wang, Guoqing and Wang, Peng and Yang, Yang and Shen, Hengtao}, title = {Efficient Adaptation of Pre-trained Vision Transformer underpinned by Approximately Orthogonal Fine-Tuning Strategy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4878-4887} }
MM-IFEngine: Towards Multimodal Instruction Following-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2025_ICCV, author = {Ding, Shengyuan and Wu, Shenxi and Zhao, Xiangyu and Zang, Yuhang and Duan, Haodong and Dong, Xiaoyi and Zhang, Pan and Cao, Yuhang and Lin, Dahua and Wang, Jiaqi}, title = {MM-IFEngine: Towards Multimodal Instruction Following}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1099-1109} }
Who is a Better Talker: Subjective and Objective Quality Assessment for AI-Generated Talking Heads-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Yingjie and Cao, Jiezhang and Zhang, Zicheng and Wen, Farong and Jiang, Yanwei and Jia, Jun and Liu, Xiaohong and Min, Xiongkuo and Zhai, Guangtao}, title = {Who is a Better Talker: Subjective and Objective Quality Assessment for AI-Generated Talking Heads}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12201-12211} }
LayerAnimate: Layer-level Control for Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Yuxue and Fan, Lue and Lin, Zuzeng and Wang, Feng and Zhang, Zhaoxiang}, title = {LayerAnimate: Layer-level Control for Animation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10865-10874} }
Towards a Unified Copernicus Foundation Model for Earth Vision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yi and Xiong, Zhitong and Liu, Chenying and Stewart, Adam J. and Dujardin, Thomas and Bountos, Nikolaos Ioannis and Zavras, Angelos and Gerken, Franziska and Papoutsis, Ioannis and Leal-Taix\'e, Laura and Zhu, Xiao Xiang}, title = {Towards a Unified Copernicus Foundation Model for Earth Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9888-9899} }
ROADWork: A Dataset and Benchmark for Learning to Recognize, Observe, Analyze and Drive Through Work Zones-
[pdf]
[supp]
[bibtex]@InProceedings{Ghosh_2025_ICCV, author = {Ghosh, Anurag and Zheng, Shen and Tamburo, Robert and Vuong, Khiem and Alvarez-Padilla, Juan and Zhu, Hailiang and Cardei, Michael and Dunn, Nicholas and Mertz, Christoph and Narasimhan, Srinivasa G.}, title = {ROADWork: A Dataset and Benchmark for Learning to Recognize, Observe, Analyze and Drive Through Work Zones}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6132-6142} }
Gradient Decomposition and Alignment for Incremental Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Wenlong and Zhang, Shizhou and Cheng, De and Xing, Yinghui and Liang, Guoqiang and Wang, Peng and Zhang, Yanning}, title = {Gradient Decomposition and Alignment for Incremental Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4486-4495} }
One Polyp Identifies All: One-Shot Polyp Segmentation with SAM via Cascaded Priors and Iterative Prompt Evolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2025_ICCV, author = {Mao, Xinyu and Xing, Xiaohan and Meng, Fei and Liu, Jianbang and Bai, Fan and Nie, Qiang and Meng, Max}, title = {One Polyp Identifies All: One-Shot Polyp Segmentation with SAM via Cascaded Priors and Iterative Prompt Evolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24182-24191} }
Gradient Extrapolation for Debiased Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Asaad_2025_ICCV, author = {Asaad, Ihab and Shadaydeh, Maha and Denzler, Joachim}, title = {Gradient Extrapolation for Debiased Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3819-3829} }
From Gaze to Movement: Predicting Visual Attention for Autonomous Driving Human-Machine Interaction based on Programmatic Imitation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Yexin and Lin, Yongbin and Yue, Lishengsa and Yao, Zhihong and Wang, Jie}, title = {From Gaze to Movement: Predicting Visual Attention for Autonomous Driving Human-Machine Interaction based on Programmatic Imitation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26146-26155} }
Less-to-More Generalization: Unlocking More Controllability by In-Context Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Shaojin and Huang, Mengqi and Wu, Wenxu and Cheng, Yufeng and Ding, Fei and He, Qian}, title = {Less-to-More Generalization: Unlocking More Controllability by In-Context Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18682-18692} }
Improving Large Vision and Language Models by Learning from a Panel of Peers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hernandez_2025_ICCV, author = {Hernandez, Jefferson and Shi, Jing and Jenni, Simon and Ordonez, Vicente and Kafle, Kushal}, title = {Improving Large Vision and Language Models by Learning from a Panel of Peers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1402-1412} }
Federated Representation Angle Learning-
[pdf]
[bibtex]@InProceedings{Yi_2025_ICCV, author = {Yi, Liping and Yu, Han and Wang, Gang and Liu, Xiaoguang and Li, Xiaoxiao}, title = {Federated Representation Angle Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1314-1324} }
Why LVLMs Are More Prone to Hallucinations in Longer Responses: The Role of Context-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Ge and Qian, Jiaye and Tang, Jiajin and Yang, Sibei}, title = {Why LVLMs Are More Prone to Hallucinations in Longer Responses: The Role of Context}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4101-4113} }
Training-Free Personalization via Retrieval and Reasoning on Fingerprints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Das_2025_ICCV, author = {Das, Deepayan and Talon, Davide and Wang, Yiming and Mancini, Massimiliano and Ricci, Elisa}, title = {Training-Free Personalization via Retrieval and Reasoning on Fingerprints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9683-9692} }
How Far are AI-generated Videos from Simulating the 3D Visual World: A Learned 3D Evaluation Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2025_ICCV, author = {Chang, Chirui and Liu, Jiahui and Liu, Zhengzhe and Lyu, Xiaoyang and Huang, Yi-Hua and Tao, Xin and Wan, Pengfei and Zhang, Di and Qi, Xiaojuan}, title = {How Far are AI-generated Videos from Simulating the 3D Visual World: A Learned 3D Evaluation Approach}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10307-10317} }
Rethinking Detecting Salient and Camouflaged Objects in Unconstrained Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zhangjun and Li, Yiping and Zhong, Chunlin and Huang, Jianuo and Pei, Jialun and Li, Hua and Tang, He}, title = {Rethinking Detecting Salient and Camouflaged Objects in Unconstrained Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22372-22382} }
OccluGaussian: Occlusion-Aware Gaussian Splatting for Large Scene Reconstruction and Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Shiyong and Tang, Xiao and Li, Zhihao and He, Yingfan and Ye, Chongjie and Liu, Jianzhuang and Huang, Binxiao and Zhou, Shunbo and Wu, Xiaofei}, title = {OccluGaussian: Occlusion-Aware Gaussian Splatting for Large Scene Reconstruction and Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26643-26652} }
VisionMath: Vision-Form Mathematical Problem-Solving-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Zongyang and Chen, Yuxin and Zhang, Ziqi and Qi, Zhongang and Yuan, Chunfeng and Zhu, Shaojie and Zhuo, Chengxiang and Li, Bing and Liu, Ye and Li, Zang and Shan, Ying and Hu, Weiming}, title = {VisionMath: Vision-Form Mathematical Problem-Solving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1162-1172} }
Unsupervised RGB-D Point Cloud Registration for Scenes with Low Overlap and Photometric Inconsistency-
[pdf]
[supp]
[bibtex]@InProceedings{Shou_2025_ICCV, author = {Shou, Yejun and Wang, Haocheng and Shen, Lingfeng and Zheng, Qian and Pan, Gang and Cao, Yanlong}, title = {Unsupervised RGB-D Point Cloud Registration for Scenes with Low Overlap and Photometric Inconsistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24868-24877} }
CWNet: Causal Wavelet Network for Low-Light Image Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Tongshun and Liu, Pingping and Lu, Yubing and Cai, Mengen and Zhang, Zijian and Zhang, Zhe and Zhou, Qiuzhan}, title = {CWNet: Causal Wavelet Network for Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8789-8799} }
Demeter: A Parametric Model of Crop Plant Morphology from the Real World-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Tianhang and Zhai, Albert J. and Chen, Evan Z. and Zhou, Rui and Deng, Yawen and Li, Zitong and Zhao, Kejie and Shiu, Janice and Zhao, Qianyu and Xu, Yide and Wang, Xinlei and Shen, Yuan and Wang, Sheng and Ainsworth, Lisa and Guan, Kaiyu and Wang, Shenlong}, title = {Demeter: A Parametric Model of Crop Plant Morphology from the Real World}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28740-28751} }
VideoLLaMB: Long Streaming Video Understanding with Recurrent Memory Bridges-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuxuan and Song, Yiqi and Xie, Cihang and Liu, Yang and Zheng, Zilong}, title = {VideoLLaMB: Long Streaming Video Understanding with Recurrent Memory Bridges}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24170-24181} }
Automated Red Teaming for Text-to-Image Models through Feedback-Guided Prompt Iteration with Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Wei and Chen, Kangjie and Qiu, Jiawei and Zhang, Yuyang and Wang, Run and Mao, Jin and Zhang, Tianwei and Wang, Lina}, title = {Automated Red Teaming for Text-to-Image Models through Feedback-Guided Prompt Iteration with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18575-18584} }
CoA-VLA: Improving Vision-Language-Action Models via Visual-Text Chain-of-Affordance-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Jinming and Zhu, Yichen and Tang, Zhibin and Wen, Junjie and Zhu, Minjie and Liu, Xiaoyu and Li, Chengmeng and Cheng, Ran and Peng, Yaxin and Peng, Yan and Feng, Feifei}, title = {CoA-VLA: Improving Vision-Language-Action Models via Visual-Text Chain-of-Affordance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9759-9769} }
HiERO: Understanding the Hierarchy of Human Behavior Enhances Reasoning on Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peirone_2025_ICCV, author = {Peirone, Simone Alberto and Pistilli, Francesca and Averta, Giuseppe}, title = {HiERO: Understanding the Hierarchy of Human Behavior Enhances Reasoning on Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19862-19871} }
FVGen: Accelerating Novel-View Synthesis with Adversarial Video Diffusion Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Teng_2025_ICCV, author = {Teng, Wenbin and Chen, Gonglin and Chen, Haiwei and Zhao, Yajie}, title = {FVGen: Accelerating Novel-View Synthesis with Adversarial Video Diffusion Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26095-26105} }
ZFusion: Efficient Deep Compositional Zero-shot Learning for Blind Image Super-Resolution with Generative Diffusion Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Esmaeilzehi_2025_ICCV, author = {Esmaeilzehi, Alireza and Zaredar, Hossein and Tian, Yapeng and Seyyed-Kalantari, Laleh}, title = {ZFusion: Efficient Deep Compositional Zero-shot Learning for Blind Image Super-Resolution with Generative Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12338-12348} }
Doodle Your Keypoints: Sketch-Based Few-Shot Keypoint Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maity_2025_ICCV, author = {Maity, Subhajit and Bhunia, Ayan Kumar and Koley, Subhadeep and Chowdhury, Pinaki Nath and Sain, Aneeshan and Song, Yi-Zhe}, title = {Doodle Your Keypoints: Sketch-Based Few-Shot Keypoint Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {284-296} }
Open-Vocabulary Octree-Graph for 3D Scene Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zhigang and Su, Yifei and Li, Chenhui and Wang, Dong and Huang, Yan and Li, Xuelong and Zhao, Bin}, title = {Open-Vocabulary Octree-Graph for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7037-7047} }
FlexGen: Flexible Multi-View Generation from Text and Image Inputs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Xinli and Ge, Wenhang and Lin, Jiantao and Feng, Jiawei and Xu, Lie and Zhao, Hanfeng and Zhang, Shunsi and Chen, Ying-Cong}, title = {FlexGen: Flexible Multi-View Generation from Text and Image Inputs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18714-18724} }
SummDiff: Generative Modeling of Video Summarization with Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Kwanseok and Hahm, Jaehoon and Kim, Sumin and Sul, Jinhwan and Kim, Byunghak and Lee, Joonseok}, title = {SummDiff: Generative Modeling of Video Summarization with Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15096-15106} }
FlowDPS : Flow-Driven Posterior Sampling for Inverse Problems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jeongsol and Kim, Bryan Sangwoo and Ye, Jong Chul}, title = {FlowDPS : Flow-Driven Posterior Sampling for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12328-12337} }
Head2Body: Body Pose Generation from Multi-sensory Head-mounted Inputs-
[pdf]
[supp]
[bibtex]@InProceedings{Tran_2025_ICCV, author = {Tran, Minh and Mao, Hongda and Chen, Qingshuang and Kim, Yelin}, title = {Head2Body: Body Pose Generation from Multi-sensory Head-mounted Inputs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6849-6858} }
Closed-Loop Transfer for Weakly-supervised Affordance Grounding-
[pdf]
[bibtex]@InProceedings{Tang_2025_ICCV, author = {Tang, Jiajin and Wei, Zhengxuan and Zheng, Ge and Yang, Sibei}, title = {Closed-Loop Transfer for Weakly-supervised Affordance Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9530-9539} }
OminiControl: Minimal and Universal Control for Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2025_ICCV, author = {Tan, Zhenxiong and Liu, Songhua and Yang, Xingyi and Xue, Qiaochu and Wang, Xinchao}, title = {OminiControl: Minimal and Universal Control for Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14940-14950} }
Zeroth-Order Fine-Tuning of LLMs in Random Subspaces-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Ziming and Zhou, Pan and Wang, Sike and Li, Jia and Tian, Mi and Huang, Hua}, title = {Zeroth-Order Fine-Tuning of LLMs in Random Subspaces}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4475-4485} }
G2D: Boosting Multimodal Learning with Gradient-Guided Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Rakib_2025_ICCV, author = {Rakib, Mohammed and Bagavathi, Arunkumar}, title = {G2D: Boosting Multimodal Learning with Gradient-Guided Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4059-4068} }
AIComposer: Any Style and Content Image Composition via Feature Integration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Haowen and Fan, Zhenfeng and Wen, Zhang and Zhu, Zhengzhou and Li, Yunjin}, title = {AIComposer: Any Style and Content Image Composition via Feature Integration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16840-16850} }
PAN-Crafter: Learning Modality-Consistent Alignment for PAN-Sharpening-
[pdf]
[supp]
[bibtex]@InProceedings{Do_2025_ICCV, author = {Do, Jeonghyeok and Kim, Sungpyo and Youk, Geunhyuk and Lee, Jaehyup and Kim, Munchurl}, title = {PAN-Crafter: Learning Modality-Consistent Alignment for PAN-Sharpening}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4242-4252} }
M2SFormer: Multi-Spectral and Multi-Scale Attention with Edge-Aware Difficulty Guidance for Image Forgery Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2025_ICCV, author = {Nam, Ju-Hyeon and Moon, Dong-Hyun and Lee, Sang-Chul}, title = {M2SFormer: Multi-Spectral and Multi-Scale Attention with Edge-Aware Difficulty Guidance for Image Forgery Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15927-15938} }
Pinco: Position-induced Consistent Adapter for Diffusion Transformer in Foreground-conditioned Inpainting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Guangben and Du, Yuzhen and Tang, Yizhe and Sun, Zhimin and Yi, Ran and Qi, Yifan and Wang, Tianyi and Ma, Lizhuang and Zou, Fangyuan}, title = {Pinco: Position-induced Consistent Adapter for Diffusion Transformer in Foreground-conditioned Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15266-15276} }
ReconDreamer++: Harmonizing Generative and Reconstructive Models for Driving Scene Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Guosheng and Wang, Xiaofeng and Ni, Chaojun and Zhu, Zheng and Qin, Wenkang and Huang, Guan and Wang, Xingang}, title = {ReconDreamer++: Harmonizing Generative and Reconstructive Models for Driving Scene Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26718-26728} }
SyncDiff: Synchronized Motion Diffusion for Multi-Body Human-Object Interaction Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Wenkun and Liu, Yun and Liu, Ruitao and Yi, Li}, title = {SyncDiff: Synchronized Motion Diffusion for Multi-Body Human-Object Interaction Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11731-11743} }
Rethinking Few Shot CLIP Benchmarks: A Critical Analysis in the Inductive Setting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kravets_2025_ICCV, author = {Kravets, Alexey and Chen, Da and Namboodiri, Vinay P.}, title = {Rethinking Few Shot CLIP Benchmarks: A Critical Analysis in the Inductive Setting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1902-1911} }
Mind the Gap: Aligning Vision Foundation Models to Image Feature Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yuhan and Fu, Jingwen and Wu, Yang and Wu, Kangyi and Li, Pengna and Wu, Jiayi and Zhou, Sanping and Xin, Jingmin}, title = {Mind the Gap: Aligning Vision Foundation Models to Image Feature Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20313-20323} }
CoStoDet-DDPM: Collaborative Training of Stochastic and Deterministic Models Improves Surgical Workflow Anticipation and Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Kaixiang and Li, Xin and Li, Qiang and Wang, Zhiwei}, title = {CoStoDet-DDPM: Collaborative Training of Stochastic and Deterministic Models Improves Surgical Workflow Anticipation and Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23741-23751} }
GSOT3D: Towards Generic 3D Single Object Tracking in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiao_2025_ICCV, author = {Jiao, Yifan and Li, Yunhao and Ding, Junhua and Yang, Qing and Fu, Song and Fan, Heng and Zhang, Libo}, title = {GSOT3D: Towards Generic 3D Single Object Tracking in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5469-5478} }
UnZipLoRA: Separating Content and Style from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Chang and Shah, Viraj and Cui, Aiyu and Lazebnik, Svetlana}, title = {UnZipLoRA: Separating Content and Style from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16776-16785} }
What You Have is What You Track: Adaptive and Robust Multimodal Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2025_ICCV, author = {Tan, Yuedong and Shao, Jiawei and Zamfir, Eduard and Li, Ruanjun and An, Zhaochong and Ma, Chao and Paudel, Danda and Van Gool, Luc and Timofte, Radu and Wu, Zongwei}, title = {What You Have is What You Track: Adaptive and Robust Multimodal Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3455-3465} }
RareCLIP: Rarity-aware Online Zero-shot Industrial Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Jianfang and Cao, Min and Peng, Silong and Xie, Qiong}, title = {RareCLIP: Rarity-aware Online Zero-shot Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24478-24487} }
AdaDCP: Learning an Adapter with Discrete Cosine Prior for Clear-to-Adverse Domain Generalization-
[pdf]
[bibtex]@InProceedings{Bi_2025_ICCV, author = {Bi, Qi and Shen, Yixian and Yi, Jingjun and Xia, Gui-Song}, title = {AdaDCP: Learning an Adapter with Discrete Cosine Prior for Clear-to-Adverse Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12997-13008} }
HERMES: A Unified Self-Driving World Model for Simultaneous 3D Scene Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Xin and Liang, Dingkang and Tu, Sifan and Chen, Xiwu and Ding, Yikang and Zhang, Dingyuan and Tan, Feiyang and Zhao, Hengshuang and Bai, Xiang}, title = {HERMES: A Unified Self-Driving World Model for Simultaneous 3D Scene Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27817-27827} }
ArgMatch: Adaptive Refinement Gathering for Efficient Dense Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_ICCV, author = {Deng, Yuxin and Zhang, Kaining and Tang, Linfeng and Yang, Jiaqi and Ma, Jiayi}, title = {ArgMatch: Adaptive Refinement Gathering for Efficient Dense Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27369-27379} }
Enhancing Image Restoration Transformer via Adaptive Translation Equivariance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, JiaKui and Yao, Zhengjian and Jin, Lujia and He, Hangzhou and Lu, Yanye}, title = {Enhancing Image Restoration Transformer via Adaptive Translation Equivariance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16047-16057} }
Free4D: Tuning-free 4D Scene Generation with Spatial-Temporal Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Tianqi and Huang, Zihao and Chen, Zhaoxi and Wang, Guangcong and Hu, Shoukang and Shen, Liao and Sun, Huiqiang and Cao, Zhiguo and Li, Wei and Liu, Ziwei}, title = {Free4D: Tuning-free 4D Scene Generation with Spatial-Temporal Consistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25571-25582} }
Generative Zoo-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Niewiadomski_2025_ICCV, author = {Niewiadomski, Tomasz and Yiannakidis, Anastasios and Cuevas-Velasquez, Hanz and Sanyal, Soubhik and Black, Michael J. and Zuffi, Silvia and Kulits, Peter}, title = {Generative Zoo}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8492-8502} }
Any-SSR: How Recursive Least Squares Works in Continual Learning of Large Language Model-
[pdf]
[supp]
[bibtex]@InProceedings{Tong_2025_ICCV, author = {Tong, Kai and Pan, Kang and Zhang, Xiao and Meng, Erli and He, Run and Cui, Yawen and Guo, Nuoyan and Zhuang, Huiping}, title = {Any-SSR: How Recursive Least Squares Works in Continual Learning of Large Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3047-3057} }
Instruction-Oriented Preference Alignment for Enhancing Multi-Modal Comprehension Capability of MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zitian and Liao, Yue and Rong, Kang and Rao, Fengyun and Yang, Yibo and Liu, Si}, title = {Instruction-Oriented Preference Alignment for Enhancing Multi-Modal Comprehension Capability of MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2010-2021} }
RapVerse: Coherent Vocals and Whole-Body Motion Generation from Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Jiaben and Yan, Xin and Chen, Yihang and Cen, Siyuan and Wang, Zixin and Ma, Qinwei and Zhen, Haoyu and Qian, Kaizhi and Lu, Lie and Gan, Chuang}, title = {RapVerse: Coherent Vocals and Whole-Body Motion Generation from Text}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10097-10107} }
MoFRR: Mixture of Diffusion Models for Face Retouching Restoration-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Jiaxin and Ying, Qichao and Qian, Zhenxing and Li, Sheng and Zhang, Runqi and Liu, Jian and Zhang, Xinpeng}, title = {MoFRR: Mixture of Diffusion Models for Face Retouching Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12842-12851} }
SFUOD: Source-Free Unknown Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Keon-Hee and Choe, Seun-An and Park, Gyeong-Moon}, title = {SFUOD: Source-Free Unknown Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3499-3508} }
UniEgoMotion: A Unified Model for Egocentric Motion Reconstruction, Forecasting, and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Patel_2025_ICCV, author = {Patel, Chaitanya and Nakamura, Hiroki and Kyuragi, Yuta and Kozuka, Kazuki and Niebles, Juan Carlos and Adeli, Ehsan}, title = {UniEgoMotion: A Unified Model for Egocentric Motion Reconstruction, Forecasting, and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10318-10329} }
ToolVQA: A Dataset for Multi-step Reasoning VQA with External Tools-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_ICCV, author = {Yin, Shaofeng and Lei, Ting and Liu, Yang}, title = {ToolVQA: A Dataset for Multi-step Reasoning VQA with External Tools}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4424-4433} }
Spherical Epipolar Rectification for Deep Two-View Absolute Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Brousseau_2025_ICCV, author = {Brousseau, Pierre-Andr\'e and Roy, S\'ebastien}, title = {Spherical Epipolar Rectification for Deep Two-View Absolute Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28925-28934} }
ScenePainter: Semantically Consistent Perpetual 3D Scene Generation with Concept Relation Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_ICCV, author = {Xia, Chong and Zhang, Shengjun and Liu, Fangfu and Liu, Chang and Hirunyaratsameewong, Khodchaphun and Duan, Yueqi}, title = {ScenePainter: Semantically Consistent Perpetual 3D Scene Generation with Concept Relation Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28808-28817} }
ESCNet:Edge-Semantic Collaborative Network for Camouflaged Object Detection-
[pdf]
[bibtex]@InProceedings{Ye_2025_ICCV, author = {Ye, Sheng and Chen, Xin and Zhang, Yan and Lin, Xianming and Cao, Liujuan}, title = {ESCNet:Edge-Semantic Collaborative Network for Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20053-20063} }
PixelStitch: Structure-Preserving Pixel-Wise Bidirectional Warps for Unsupervised Image Stitching-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2025_ICCV, author = {Jin, Hengzhe and Nie, Lang and Lin, Chunyu and Feng, Xiaomei and Zhao, Yao}, title = {PixelStitch: Structure-Preserving Pixel-Wise Bidirectional Warps for Unsupervised Image Stitching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28125-28134} }
SANA-Sprint: One-Step Diffusion with Continuous-Time Consistency Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Junsong and Xue, Shuchen and Zhao, Yuyang and Yu, Jincheng and Paul, Sayak and Chen, Junyu and Cai, Han and Han, Song and Xie, Enze}, title = {SANA-Sprint: One-Step Diffusion with Continuous-Time Consistency Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16185-16195} }
Information-Bottleneck Driven Binary Neural Network for Change Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yin_2025_ICCV, author = {Yin, Kaijie and Zhang, Zhiyuan and Kong, Shu and Gao, Tian and Xu, Cheng-Zhong and Kong, Hui}, title = {Information-Bottleneck Driven Binary Neural Network for Change Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7176-7186} }
Erasing More Than Intended? How Concept Erasure Degrades the Generation of Non-Target Concepts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Amara_2025_ICCV, author = {Amara, Ibtihel and Humayun, Ahmed Imtiaz and Kajic, Ivana and Parekh, Zarana and Harris, Natalie and Young, Sarah and Nagpal, Chirag and Kim, Najoung and He, Junfeng and Vasconcelos, Cristina Nader and Ramachandran, Deepak and Farnadi, Golnoosh and Heller, Katherine and Havaei, Mohammad and Rostamzadeh, Negar}, title = {Erasing More Than Intended? How Concept Erasure Degrades the Generation of Non-Target Concepts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16420-16430} }
Global and Local Entailment Learning for Natural World Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sastry_2025_ICCV, author = {Sastry, Srikumar and Dhakal, Aayush and Xing, Eric and Khanal, Subash and Jacobs, Nathan}, title = {Global and Local Entailment Learning for Natural World Imagery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15770-15780} }
Ross3D: Reconstructive Visual Instruction Tuning with 3D-Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Haochen and Zhao, Yucheng and Wang, Tiancai and Fan, Haoqiang and Zhang, Xiangyu and Zhang, Zhaoxiang}, title = {Ross3D: Reconstructive Visual Instruction Tuning with 3D-Awareness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9275-9286} }
LVFace: Progressive Cluster Optimization for Large Vision Models in Face Recognition-
[pdf]
[arXiv]
[bibtex]@InProceedings{You_2025_ICCV, author = {You, Jinghan and Li, Shanglin and Sun, Yuanrui and Wei, Jiangchuan and Guo, Mingyu and Feng, Chao and Ran, Jiao}, title = {LVFace: Progressive Cluster Optimization for Large Vision Models in Face Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11840-11849} }
Dataset Ownership Verification for Pre-trained Masked Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Yuechen and Song, Jie and Shan, Yicheng and Zhang, Xiaoyan and Wan, Yuanyu and Zhang, Shengxuming and Duan, Jiarui and Song, Mingli}, title = {Dataset Ownership Verification for Pre-trained Masked Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3132-3142} }
VLR-Driver: Large Vision-Language-Reasoning Models for Embodied Autonomous Driving-
[pdf]
[bibtex]@InProceedings{Kong_2025_ICCV, author = {Kong, Fanjie and Li, Yitong and Chen, Weihuang and Min, Chen and Li, Yizhe and Gao, Zhiqiang and Li, Haoyang and Guo, Zhongyu and Sun, Hongbin}, title = {VLR-Driver: Large Vision-Language-Reasoning Models for Embodied Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26966-26976} }
ResGS: Residual Densification of 3D Gaussian for Efficient Detail Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lyu_2025_ICCV, author = {Lyu, Yanzhe and Cheng, Kai and Kang, Xin and Chen, Xuejin}, title = {ResGS: Residual Densification of 3D Gaussian for Efficient Detail Recovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28093-28102} }
Language Driven Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Zhu and Pang, Bowen and Liu, Lizhe and Zhang, Runmin and Li, Qiang and Cao, Si-Yuan and Luo, Maochun and Chen, Mingxia and Yang, Sheng and Shen, Hui-Liang}, title = {Language Driven Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7548-7558} }
Prior2Former - Evidential Modeling of Mask Transformers for Assumption-Free Open-World Panoptic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Schmidt_2025_ICCV, author = {Schmidt, Sebastian and Koerner, Julius and Fuchsgruber, Dominik and Gasperini, Stefano and Tombari, Federico and G\"unnemann, Stephan}, title = {Prior2Former - Evidential Modeling of Mask Transformers for Assumption-Free Open-World Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23646-23656} }
E-NeMF: Event-based Neural Motion Field for Novel Space-time View Synthesis of Dynamic Scenes-
[pdf]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yan and Chen, Zehao and Yan, Haojie and Ma, De and Tang, Huajin and Zheng, Qian and Pan, Gang}, title = {E-NeMF: Event-based Neural Motion Field for Novel Space-time View Synthesis of Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10854-10864} }
Event-based Tiny Object Detection: A Benchmark Dataset and Baseline-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Nuo and Xiao, Chao and Dai, Yimian and He, Shiman and Li, Miao and An, Wei}, title = {Event-based Tiny Object Detection: A Benchmark Dataset and Baseline}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7209-7218} }
Optimal Transport for Brain-Image Alignment: Unveiling Redundancy and Synergy in Neural Information Processing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_ICCV, author = {Xiao, Yang and Lu, Wang and Ji, Jie and Ye, Ruimeng and Li, Gen and Ma, Xiaolong and Hui, Bo}, title = {Optimal Transport for Brain-Image Alignment: Unveiling Redundancy and Synergy in Neural Information Processing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20445-20455} }
Adversarial Distribution Matching for Diffusion Distillation Towards Efficient Image and Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Yanzuo and Ren, Yuxi and Xia, Xin and Lin, Shanchuan and Wang, Xing and Xiao, Xuefeng and Ma, Andy J. and Xie, Xiaohua and Lai, Jian-Huang}, title = {Adversarial Distribution Matching for Diffusion Distillation Towards Efficient Image and Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16818-16829} }
Switch-a-View: View Selection Learned from Unlabeled In-the-wild Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Majumder_2025_ICCV, author = {Majumder, Sagnik and Nagarajan, Tushar and Al-Halah, Ziad and Grauman, Kristen}, title = {Switch-a-View: View Selection Learned from Unlabeled In-the-wild Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11969-11979} }
E-SAM: Training-Free Segment Every Entity Model-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Weiming and Xiao, Dingwen and Chen, Lei and Wang, Lin}, title = {E-SAM: Training-Free Segment Every Entity Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24688-24697} }
ViewSRD: 3D Visual Grounding via Structured Multi-View Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Ronggang and Yang, Haoxin and Cai, Yan and Xu, Xuemiao and Zhang, Huaidong and He, Shengfeng}, title = {ViewSRD: 3D Visual Grounding via Structured Multi-View Decomposition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9726-9736} }
UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Junjie and Geng, Yifeng and Bo, Liefeng}, title = {UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14399-14408} }
FDPT: Federated Discrete Prompt Tuning for Black-Box Visual-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Jiaqi and Chen, Simin and Tang, Jing and Yang, Yuzhe and Chen, Yiming and Wang, Lixu and Lin, Song and Wang, Zehua and Chen, Wei and Tian, Zijian}, title = {FDPT: Federated Discrete Prompt Tuning for Black-Box Visual-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2461-2470} }
Exploiting Diffusion Prior for Task-driven Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jaeha and Oh, Junghun and Lee, Kyoung Mu}, title = {Exploiting Diffusion Prior for Task-driven Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10151-10161} }
CE-FAM: Concept-Based Explanation via Fusion of Activation Maps-
[pdf]
[bibtex]@InProceedings{Kuroki_2025_ICCV, author = {Kuroki, Michihiro and Yamasaki, Toshihiko}, title = {CE-FAM: Concept-Based Explanation via Fusion of Activation Maps}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1413-1422} }
Dual-level Prototype Learning for Composite Degraded Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zhongze and Zhao, Haitao and Yao, Lujian and Peng, Jingchao and Zhao, Kaijie}, title = {Dual-level Prototype Learning for Composite Degraded Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14006-14016} }
Forensic-MoE: Exploring Comprehensive Synthetic Image Detection Traces with Mixture of Experts-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_ICCV, author = {Fang, Mingqi and Li, Ziguang and Yu, Lingyun and Yang, Quanwei and Xie, Hongtao and Zhang, Yongdong}, title = {Forensic-MoE: Exploring Comprehensive Synthetic Image Detection Traces with Mixture of Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17772-17782} }
Robust Adverse Weather Removal via Spectral-based Spatial Grouping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_ICCV, author = {Jeong, Yuhwan and Yang, Yunseo and Yoon, Youngho and Yoon, Kuk-Jin}, title = {Robust Adverse Weather Removal via Spectral-based Spatial Grouping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11872-11883} }
PLAN: Proactive Low-Rank Allocation for Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Xiequn and Zhuang, Zhan and Zhang, Yu}, title = {PLAN: Proactive Low-Rank Allocation for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2909-2918} }
EMoTive: Event-guided Trajectory Modeling for 3D Motion Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wan_2025_ICCV, author = {Wan, Zengyu and Zhai, Wei and Cao, Yang and Zha, Zhengjun}, title = {EMoTive: Event-guided Trajectory Modeling for 3D Motion Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9342-9351} }
RobuSTereo: Robust Zero-Shot Stereo Matching under Adverse Weather-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuran and Liang, Yingping and Hu, Yutao and Fu, Ying}, title = {RobuSTereo: Robust Zero-Shot Stereo Matching under Adverse Weather}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25134-25144} }
SemGes: Semantics-aware Co-Speech Gesture Generation using Semantic Coherence and Relevance Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Lanmiao and Ghaleb, Esam and Ozyurek, Asli and Yumak, Zerrin}, title = {SemGes: Semantics-aware Co-Speech Gesture Generation using Semantic Coherence and Relevance Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13963-13973} }
From Sharp to Blur: Unsupervised Domain Adaptation for 2D Human Pose Estimation Under Extreme Motion Blur Using Event Cameras-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Youngho and Cho, Hoonhee and Yoon, Kuk-Jin}, title = {From Sharp to Blur: Unsupervised Domain Adaptation for 2D Human Pose Estimation Under Extreme Motion Blur Using Event Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9406-9417} }
HyPiDecoder: Hybrid Pixel Decoder for Efficient Segmentation and Detection-
[pdf]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Fengzhe and Shi, Humphrey}, title = {HyPiDecoder: Hybrid Pixel Decoder for Efficient Segmentation and Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22100-22109} }
DDB: Diffusion Driven Balancing to Address Spurious Correlations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parast_2025_ICCV, author = {Parast, Aryan Yazdan and Azam, Basim and Akhtar, Naveed}, title = {DDB: Diffusion Driven Balancing to Address Spurious Correlations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17526-17535} }
Feature Purification Matters: Suppressing Outlier Propagation for Training-Free Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2025_ICCV, author = {Jin, Shuo and Yu, Siyue and Zhang, Bingfeng and Sun, Mingjie and Dong, Yi and Xiao, Jimin}, title = {Feature Purification Matters: Suppressing Outlier Propagation for Training-Free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20291-20300} }
Motal: Unsupervised 3D Object Detection by Modality and Task-specific Knowledge Transfer-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Hai and Lin, Hongwei and Guo, Xusheng and Li, Xin and Wang, Mingming and Wang, Cheng and Wen, Chenglu}, title = {Motal: Unsupervised 3D Object Detection by Modality and Task-specific Knowledge Transfer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6284-6293} }
MMAIF: Multi-task and Multi-degradation All-in-One for Image Fusion with Language Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Zihan and Zhong, Yu and Wang, Ziqi and Deng, Liang-Jian}, title = {MMAIF: Multi-task and Multi-degradation All-in-One for Image Fusion with Language Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11744-11754} }
SparseLaneSTP: Leveraging Spatio-Temporal Priors with Sparse Transformers for 3D Lane Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Pittner_2025_ICCV, author = {Pittner, Maximilian and Janai, Joel and Faigle, Mario and Condurache, Alexandru Paul}, title = {SparseLaneSTP: Leveraging Spatio-Temporal Priors with Sparse Transformers for 3D Lane Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29099-29109} }
MonSTeR: a Unified Model for Motion, Scene, Text Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Collorone_2025_ICCV, author = {Collorone, Luca and Gioia, Matteo and Pappa, Massimiliano and Leoni, Paolo and Ficarra, Giovanni and Litany, Or and Spinelli, Indro and Galasso, Fabio}, title = {MonSTeR: a Unified Model for Motion, Scene, Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10940-10949} }
Semantic Causality-Aware Vision-Based 3D Occupancy Prediction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Dubing and Zheng, Huan and Zhou, Yucheng and Li, Xianfei and Liao, Wenlong and He, Tao and Peng, Pai and Shen, Jianbing}, title = {Semantic Causality-Aware Vision-Based 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24878-24888} }
OmniSAM: Omnidirectional Segment Anything Model for UDA in Panoramic Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_ICCV, author = {Zhong, Ding and Zheng, Xu and Liao, Chenfei and Lyu, Yuanhuiyi and Chen, Jialei and Wu, Shengyang and Zhang, Linfeng and Hu, Xuming}, title = {OmniSAM: Omnidirectional Segment Anything Model for UDA in Panoramic Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23892-23901} }
Event-Driven Storytelling with Multiple Lifelike Humans in a 3D Scene-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lim_2025_ICCV, author = {Lim, Donggeun and Bae, Jinseok and Hwang, Inwoo and Lee, Seungmin and Lee, Hwanhee and Kim, Young Min}, title = {Event-Driven Storytelling with Multiple Lifelike Humans in a 3D Scene}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11654-11664} }
Probabilistic Inertial Poser (ProbIP): Uncertainty-aware Human Motion Modeling from Sparse Inertial Sensors-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Min and Jeon, Younho and Jo, Sungho}, title = {Probabilistic Inertial Poser (ProbIP): Uncertainty-aware Human Motion Modeling from Sparse Inertial Sensors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25893-25902} }
Bi-Level Optimization for Self-Supervised AI-Generated Face Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zou_2025_ICCV, author = {Zou, Mian and Zhong, Nan and Yu, Baosheng and Zhan, Yibing and Ma, Kede}, title = {Bi-Level Optimization for Self-Supervised AI-Generated Face Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18959-18968} }
RIOcc: Efficient Cross-Modal Fusion Transformer with Collaborative Feature Refinement for 3D Semantic Occupancy Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Baojie and Li, Xiaotian and Zhou, Yuhan and Jiang, Yuyu and Tian, Jiandong and Fan, Huijie}, title = {RIOcc: Efficient Cross-Modal Fusion Transformer with Collaborative Feature Refinement for 3D Semantic Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25851-25861} }
Golden Noise for Diffusion Models: A Learning Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zikai and Shao, Shitong and Bai, Lichen and Zhang, Shufei and Xu, Zhiqiang and Han, Bo and Xie, Zeke}, title = {Golden Noise for Diffusion Models: A Learning Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17688-17697} }
CharaConsist: Fine-Grained Consistent Character Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Mengyu and Ding, Henghui and Peng, Jianing and Zhao, Yao and Chen, Yunpeng and Wei, Yunchao}, title = {CharaConsist: Fine-Grained Consistent Character Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16058-16067} }
RetinexMCNet: A Memory Controller Dominated Network for Low-Light Video Enhancement Based on Retinex-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Meiao and Kang, Xuejing and Lu, Yaxi and Xu, Jie}, title = {RetinexMCNet: A Memory Controller Dominated Network for Low-Light Video Enhancement Based on Retinex}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9716-9725} }
Learnable Fractional Reaction-Diffusion Dynamics for Under-Display ToF Imaging and Beyond-
[pdf]
[supp]
[bibtex]@InProceedings{Qiao_2025_ICCV, author = {Qiao, Xin and Poggi, Matteo and Wei, Xing and Deng, Pengchao and Zhou, Yanhui and Mattoccia, Stefano}, title = {Learnable Fractional Reaction-Diffusion Dynamics for Under-Display ToF Imaging and Beyond}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6080-6090} }
Anomaly Detection of Integrated Circuits Package Substrates Using the Large Vision Model SAIC: Dataset Construction, Methodology, and Application-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Ruiyun and Guo, Bingyang and Li, Haoyuan}, title = {Anomaly Detection of Integrated Circuits Package Substrates Using the Large Vision Model SAIC: Dataset Construction, Methodology, and Application}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22563-22574} }
Refer to Any Segmentation Mask Group With Vision-Language Prompts-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Shengcao and Wei, Zijun and Kuen, Jason and Liu, Kangning and Zhang, Lingzhi and Gu, Jiuxiang and Jung, HyunJoon and Gui, Liang-Yan and Wang, Yu-Xiong}, title = {Refer to Any Segmentation Mask Group With Vision-Language Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21853-21863} }
Integrating Visual Interpretation and Linguistic Reasoning for Geometric Problem Solving-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Zixian and Liu, Ming and Wang, Qilong and Ji, Zhilong and Bai, Jinfeng and Zhang, Lei and Zuo, Wangmeng}, title = {Integrating Visual Interpretation and Linguistic Reasoning for Geometric Problem Solving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3988-3998} }
Dual-Rate Dynamic Teacher for Source-Free Domain Adaptive Object Detection-
[pdf]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Qi and Wu, Xiao and He, Jun-Yan and Li, Shuai}, title = {Dual-Rate Dynamic Teacher for Source-Free Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2067-2076} }
How Do Multimodal Large Language Models Handle Complex Multimodal Reasoning? Placing Them in An Extensible Escape Game-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Ziyue and Dong, Yurui and Luo, Fuwen and Ruan, Minyuan and Cheng, Zhili and Chen, Chi and Li, Peng and Liu, Yang}, title = {How Do Multimodal Large Language Models Handle Complex Multimodal Reasoning? Placing Them in An Extensible Escape Game}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4807-4817} }
UST-SSM: Unified Spatio-Temporal State Space Models for Point Cloud Video Modeling-
[pdf]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Peiming and Wang, Ziyi and Yuan, Yulin and Liu, Hong and Meng, Xiangming and Yuan, Junsong and Liu, Mengyuan}, title = {UST-SSM: Unified Spatio-Temporal State Space Models for Point Cloud Video Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6738-6747} }
Recover Biological Structure from Sparse-View Diffraction Images with Neural Volumetric Prior-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Renzhi and Zhou, Haowen and Chen, Yubei and Xue, Yi}, title = {Recover Biological Structure from Sparse-View Diffraction Images with Neural Volumetric Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27771-27782} }
PropVG: End-to-End Proposal-Driven Visual Grounding with Multi-Granularity Discrimination-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2025_ICCV, author = {Dai, Ming and Cheng, Wenxuan and Zhuang, Jiedong and Liu, Jiang-jiang and Zhao, Hongshen and Feng, Zhenhua and Yang, Wankou}, title = {PropVG: End-to-End Proposal-Driven Visual Grounding with Multi-Granularity Discrimination}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7058-7068} }
StyleSRN: Scene Text Image Super-Resolution with Text Style Embedding-
[pdf]
[supp]
[bibtex]@InProceedings{Yuan_2025_ICCV, author = {Yuan, Shengrong and Wang, Runmin and Hao, Ke and Ma, Xuqi and Gao, Changxin and Liu, Li and Sang, Nong}, title = {StyleSRN: Scene Text Image Super-Resolution with Text Style Embedding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18693-18702} }
SpatialSplat: Efficient Semantic 3D from Sparse Unposed Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sheng_2025_ICCV, author = {Sheng, Yu and Deng, Jiajun and Zhang, Xinran and Zhang, Yu and Hua, Bei and Zhang, Yanyong and Ji, Jianmin}, title = {SpatialSplat: Efficient Semantic 3D from Sparse Unposed Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26404-26414} }
Learning Visual Proxy for Compositional Zero-Shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shiyu and Yan, Cheng and Liu, Yang and Jing, Chenchen and Zhou, Lei and Wang, Wenjun}, title = {Learning Visual Proxy for Compositional Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2793-2802} }
Processing and acquisition traces in visual encoders: What does CLIP know about your camera?-
[pdf]
[supp]
[bibtex]@InProceedings{Ramos_2025_ICCV, author = {Ramos, Ryan and Stojni\'c, Vladan and Kordopatis-Zilos, Giorgos and Nakashima, Yuta and Tolias, Giorgos and Garcia, Noa}, title = {Processing and acquisition traces in visual encoders: What does CLIP know about your camera?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17056-17066} }
Holistic Tokenizer for Autoregressive Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Anlin and Wang, Haochen and Zhao, Yucheng and Deng, Weipeng and Wang, Tiancai and Zhang, Xiangyu and Qi, Xiaojuan}, title = {Holistic Tokenizer for Autoregressive Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16916-16926} }
ConsistentCity: Semantic Flow-guided Occupancy DiT for Temporally Consistent Driving Scene Synthesis-
[pdf]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Benjin and Wang, Xiaogang and Li, Hongsheng}, title = {ConsistentCity: Semantic Flow-guided Occupancy DiT for Temporally Consistent Driving Scene Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26382-26392} }
G2PDiffusion: Cross-Species Genotype-to-Phenotype Prediction via Evolutionary Diffusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Mengdi and Gao, Zhangyang and Chang, Hong and Li, Stan Z. and Shan, Shiguang and Chen, Xilin}, title = {G2PDiffusion: Cross-Species Genotype-to-Phenotype Prediction via Evolutionary Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20705-20714} }
PathFinder: A Multi-Modal Multi-Agent System for Medical Diagnostic Decision-Making Applied to Histopathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghezloo_2025_ICCV, author = {Ghezloo, Fatemeh and Seyfioglu, Mehmet Saygin and Soraki, Rustin and Ikezogwo, Wisdom O. and Li, Beibin and Vivekanandan, Tejoram and Elmore, Joann G. and Krishna, Ranjay and Shapiro, Linda}, title = {PathFinder: A Multi-Modal Multi-Agent System for Medical Diagnostic Decision-Making Applied to Histopathology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23431-23441} }
VLABench: A Large-Scale Benchmark for Language-Conditioned Robotics Manipulation with Long-Horizon Reasoning Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shiduo and Xu, Zhe and Liu, Peiju and Yu, Xiaopeng and Li, Yuan and Gao, Qinghui and Fei, Zhaoye and Yin, Zhangyue and Wu, Zuxuan and Jiang, Yu-Gang and Qiu, Xipeng}, title = {VLABench: A Large-Scale Benchmark for Language-Conditioned Robotics Manipulation with Long-Horizon Reasoning Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11142-11152} }
MolParser: End-to-end Visual Recognition of Molecule Structures in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_ICCV, author = {Fang, Xi and Wang, Jiankun and Cai, Xiaochen and Chen, Shangqian and Yang, Shuwen and Tao, Haoyi and Wang, Nan and Yao, Lin and Zhang, Linfeng and Ke, Guolin}, title = {MolParser: End-to-end Visual Recognition of Molecule Structures in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24528-24538} }
UPRE: Zero-Shot Domain Adaptation for Object Detection via Unified Prompt and Representation Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiao and Wei, Fei and Wang, Yong and Zhao, Wenda and Li, Feiyi and Chu, Xiangxiang}, title = {UPRE: Zero-Shot Domain Adaptation for Object Detection via Unified Prompt and Representation Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {508-518} }
Heavy Labels Out! Dataset Distillation with Label Space Lightening-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Ruonan and Liu, Songhua and Chen, Zigeng and Ye, Jingwen and Wang, Xinchao}, title = {Heavy Labels Out! Dataset Distillation with Label Space Lightening}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5017-5026} }
VideoMiner: Iteratively Grounding Key Frames of Hour-Long Videos via Tree-based Group Relative Policy Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Xinye and Guo, Hongcan and Qian, Jiawen and Nan, Guoshun and Wang, Chao and Pan, Yuqi and Hou, Tianhao and Wang, Xiaojuan and Gao, Yutong}, title = {VideoMiner: Iteratively Grounding Key Frames of Hour-Long Videos via Tree-based Group Relative Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23773-23783} }
Hierarchical Visual Prompt Learning for Continual Video Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Jiahua and Yin, Hui and Liang, Wenqi and Zhao, Hanbin and Ding, Henghui and Sebe, Nicu and Khan, Salman and Khan, Fahad Shahbaz}, title = {Hierarchical Visual Prompt Learning for Continual Video Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11829-11839} }
FROSS: Faster-Than-Real-Time Online 3D Semantic Scene Graph Generation from RGB-D Images-
[pdf]
[supp]
[bibtex]@InProceedings{Hou_2025_ICCV, author = {Hou, Hao-Yu and Lee, Chun-Yi and Sonogashira, Motoharu and Kawanishi, Yasutomo}, title = {FROSS: Faster-Than-Real-Time Online 3D Semantic Scene Graph Generation from RGB-D Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28818-28827} }
HVPUNet: Hybrid-Voxel Point-cloud Upsampling Network-
[pdf]
[bibtex]@InProceedings{Ha_2025_ICCV, author = {Ha, Juhyung and Vats, Vibhas Kumar and Jung, Soon-heung and Reza, Alimoor and Crandall, David J.}, title = {HVPUNet: Hybrid-Voxel Point-cloud Upsampling Network}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29153-29162} }
MorphoGen: Efficient Unconditional Generation of Long-Range Projection Neuronal Morphology via a Global-to-Local Framework-
[pdf]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Tianfang and Zhou, Hongyang and Li, Anan}, title = {MorphoGen: Efficient Unconditional Generation of Long-Range Projection Neuronal Morphology via a Global-to-Local Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13021-13031} }
REDUCIO! Generating 1K Video within 16 Seconds using Extremely Compressed Motion Latents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_ICCV, author = {Tian, Rui and Dai, Qi and Bao, Jianmin and Qiu, Kai and Yang, Yifan and Luo, Chong and Wu, Zuxuan and Jiang, Yu-Gang}, title = {REDUCIO! Generating 1K Video within 16 Seconds using Extremely Compressed Motion Latents}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19237-19247} }
EgoAgent: A Joint Predictive Agent Model in Egocentric Worlds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Lu and Wang, Yizhou and Tang, Shixiang and Ma, Qianhong and He, Tong and Ouyang, Wanli and Zhou, Xiaowei and Bao, Hujun and Peng, Sida}, title = {EgoAgent: A Joint Predictive Agent Model in Egocentric Worlds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6970-6980} }
Growing a Twig to Accelerate Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2025_ICCV, author = {Shao, Zhenwei and Wang, Mingyang and Yu, Zhou and Pan, Wenwen and Yang, Yan and Wei, Tao and Zhang, Hongyuan and Mao, Ning and Chen, Wei and Yu, Jun}, title = {Growing a Twig to Accelerate Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20064-20074} }
Is Less More? Exploring Token Condensation as Training-free Test-time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zixin and Gong, Dong and Wang, Sen and Huang, Zi and Luo, Yadan}, title = {Is Less More? Exploring Token Condensation as Training-free Test-time Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {144-154} }
Structure-Guided Diffusion Models for High-Fidelity Portrait Shadow Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Wanchang and Zhang, Qing and Zheng, Rongjia and Zheng, Wei-Shi}, title = {Structure-Guided Diffusion Models for High-Fidelity Portrait Shadow Removal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11675-11684} }
Controllable Latent Space Augmentation for Digital Pathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Boutaj_2025_ICCV, author = {Boutaj, Sofi\`ene and Scalbert, Marin and Marza, Pierre and Couzinie-Devy, Florent and Vakalopoulou, Maria and Christodoulidis, Stergios}, title = {Controllable Latent Space Augmentation for Digital Pathology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22165-22174} }
Global Motion Corresponder for 3D Point-Based Scene Interpolation under Large Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Junru and Vashist, Chirag and Uy, Mikaela Angelina and Stearns, Colton and Luo, Xuan and Guibas, Leonidas and Li, Ke}, title = {Global Motion Corresponder for 3D Point-Based Scene Interpolation under Large Motion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7884-7893} }
WINS: Winograd Structured Pruning for Fast Winograd Convolution-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Cheonjun and Oh, Hyun Jae and Park, Mincheol and Moon, Hyunchan and Kim, Minsik and Kim, Suhyun and Yoon, Myung Kuk and Ro, Won Woo}, title = {WINS: Winograd Structured Pruning for Fast Winograd Convolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22477-22487} }
Revelio: Interpreting and leveraging semantic information in diffusion models-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Dahye and Thomas, Xavier and Ghadiyaram, Deepti}, title = {Revelio: Interpreting and leveraging semantic information in diffusion models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4659-4669} }
UniRes: Universal Image Restoration for Complex Degradations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Mo and Ye, Keren and Delbracio, Mauricio and Milanfar, Peyman and Patel, Vishal M. and Talebi, Hossein}, title = {UniRes: Universal Image Restoration for Complex Degradations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13237-13247} }
RayGaussX: Accelerating Gaussian-Based Ray Marching for Real-Time and High-Quality Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Blanc_2025_ICCV, author = {Blanc, Hugo and Deschaud, Jean-Emmanuel and Paljic, Alexis}, title = {RayGaussX: Accelerating Gaussian-Based Ray Marching for Real-Time and High-Quality Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27575-27584} }
CoopTrack: Exploring End-to-End Learning for Efficient Cooperative Sequential Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_ICCV, author = {Zhong, Jiaru and Wang, Jiahao and Xu, Jiahui and Li, Xiaofan and Nie, Zaiqing and Yu, Haibao}, title = {CoopTrack: Exploring End-to-End Learning for Efficient Cooperative Sequential Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26954-26965} }
ContraGS: Codebook-Condensed and Trainable Gaussian Splatting for Fast, Memory-Efficient Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Durvasula_2025_ICCV, author = {Durvasula, Sankeerth and Muhunthan, Sharanshangar and Moustafa, Zain and Chen, Richard and Liang, Ruofan and Guan, Yushi and Ahuja, Nilesh and Jain, Nilesh and Panneer, Selvakumar and Vijaykumar, Nandita}, title = {ContraGS: Codebook-Condensed and Trainable Gaussian Splatting for Fast, Memory-Efficient Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28935-28945} }
NATRA: Noise-Agnostic Framework for Trajectory Prediction with Noisy Observations-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Rongqing and Li, Changsheng and Lv, Ruilin and Li, Yuhang and Gao, Yang and Zhang, Xiaolu and Zhou, Jun}, title = {NATRA: Noise-Agnostic Framework for Trajectory Prediction with Noisy Observations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27872-27884} }
Your Text Encoder Can Be An Object-Level Watermarking Controller-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Devulapally_2025_ICCV, author = {Devulapally, Naresh Kumar and Huang, Mingzhen and Asnani, Vishal and Agarwal, Shruti and Lyu, Siwei and Lokhande, Vishnu Suresh}, title = {Your Text Encoder Can Be An Object-Level Watermarking Controller}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16576-16585} }
Spatial-Temporal Aware Visuomotor Diffusion Policy Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Zhenyang and Wang, Yikai and Wang, Kuanning and Liang, Longfei and Xue, Xiangyang and Fu, Yanwei}, title = {Spatial-Temporal Aware Visuomotor Diffusion Policy Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7122-7131} }
ModalTune: Fine-Tuning Slide-Level Foundation Models with Multi-Modal Information for Multi-task Learning in Digital Pathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ramanathan_2025_ICCV, author = {Ramanathan, Vishwesh and Xu, Tony and Pati, Pushpak and Ahmed, Faruk and Goubran, Maged and Martel, Anne L.}, title = {ModalTune: Fine-Tuning Slide-Level Foundation Models with Multi-Modal Information for Multi-task Learning in Digital Pathology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23912-23923} }
Harnessing Vision Foundation Models for High-Performance, Training-Free Open Vocabulary Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Yuheng and Dong, Minjing and Xu, Chang}, title = {Harnessing Vision Foundation Models for High-Performance, Training-Free Open Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23487-23497} }
FixTalk: Taming Identity Leakage for High-Quality Talking Head Generation in Extreme Cases-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2025_ICCV, author = {Tan, Shuai and Gong, Bill and Ji, Bin and Pan, Ye}, title = {FixTalk: Taming Identity Leakage for High-Quality Talking Head Generation in Extreme Cases}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24-36} }
MinCD-PnP: Learning 2D-3D Correspondences with Approximate Blind PnP-
[pdf]
[supp]
[bibtex]@InProceedings{An_2025_ICCV, author = {An, Pei and Yang, Jiaqi and Peng, Muyao and Yang, You and Liu, Qiong and Wu, Xiaolin and Nan, Liangliang}, title = {MinCD-PnP: Learning 2D-3D Correspondences with Approximate Blind PnP}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26519-26528} }
Attention to Trajectory: Trajectory-Aware Open-Vocabulary Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yunhao and Jiao, Yifan and Meng, Dan and Fan, Heng and Zhang, Libo}, title = {Attention to Trajectory: Trajectory-Aware Open-Vocabulary Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14390-14398} }
Hallucinatory Image Tokens: A Training-free EAZY Approach to Detecting and Mitigating Object Hallucinations in LVLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Che_2025_ICCV, author = {Che, Liwei and Liu, Tony Qingze and Jia, Jing and Qin, Weiyi and Tang, Ruixiang and Pavlovic, Vladimir}, title = {Hallucinatory Image Tokens: A Training-free EAZY Approach to Detecting and Mitigating Object Hallucinations in LVLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21635-21644} }
CVPT: Cross Visual Prompt Tuning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Lingyun and Mao, Jianxu and Yi, Junfei and Tao, Ziming and Wang, Yaonan}, title = {CVPT: Cross Visual Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {848-858} }
On the Robustness Tradeoff in Fine-Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Kunyang and Ferrand, Jean-Charles Noirot and Sheatsley, Ryan and Hoak, Blaine and Beugin, Yohan and Pauley, Eric and McDaniel, Patrick}, title = {On the Robustness Tradeoff in Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4898-4907} }
Lay2Story: Extending Diffusion Transformers for Layout-Togglable Story Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Ao and Feng, Jiasong and Cao, Ke and Wang, Jing and Wang, Yun and Zhang, Quanwei and Zhang, Zhanjie}, title = {Lay2Story: Extending Diffusion Transformers for Layout-Togglable Story Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16102-16111} }
Unfolding-Associative Encoder-Decoder Network with Progressive Alignment for Pansharpening-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_ICCV, author = {Fang, Shijie and Gan, Hongping}, title = {Unfolding-Associative Encoder-Decoder Network with Progressive Alignment for Pansharpening}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13651-13661} }
DeepMesh: Auto-Regressive Artist-mesh Creation with Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Ruowen and Ye, Junliang and Wang, Zhengyi and Liu, Guangce and Chen, Yiwen and Wang, Yikai and Zhu, Jun}, title = {DeepMesh: Auto-Regressive Artist-mesh Creation with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10612-10623} }
VisRL: Intention-Driven Visual Perception via Reinforced Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Zhangquan and Luo, Xufang and Li, Dongsheng}, title = {VisRL: Intention-Driven Visual Perception via Reinforced Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2545-2555} }
PINO: Person-Interaction Noise Optimization for Long-Duration and Customizable Motion Generation of Arbitrary-Sized Groups-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ota_2025_ICCV, author = {Ota, Sakuya and Yu, Qing and Fujiwara, Kent and Ikehata, Satoshi and Sato, Ikuro}, title = {PINO: Person-Interaction Noise Optimization for Long-Duration and Customizable Motion Generation of Arbitrary-Sized Groups}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10676-10685} }
MagicDrive-V2: High-Resolution Long Video Generation for Autonomous Driving with Adaptive Control-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Ruiyuan and Chen, Kai and Xiao, Bo and Hong, Lanqing and Li, Zhenguo and Xu, Qiang}, title = {MagicDrive-V2: High-Resolution Long Video Generation for Autonomous Driving with Adaptive Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28135-28144} }
Communication-Efficient Multi-Vehicle Collaborative Semantic Segmentation via Sparse 3D Gaussian Sharing-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2025_ICCV, author = {Hong, Tianyu and Zhou, Xiaobo and Hu, Wenkai and Xie, Qi and Ke, Zhihui and Qiu, Tie}, title = {Communication-Efficient Multi-Vehicle Collaborative Semantic Segmentation via Sparse 3D Gaussian Sharing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28622-28631} }
Category-Specific Selective Feature Enhancement for Long-Tailed Multi-Label Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Ruiqi and Tang, Xu and Zhang, Xiangrong and Ma, Jingjing}, title = {Category-Specific Selective Feature Enhancement for Long-Tailed Multi-Label Image Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3757-3766} }
Sparse-Dense Side-Tuner for efficient Video Temporal Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Pujol-Perich_2025_ICCV, author = {Pujol-Perich, David and Escalera, Sergio and Clap\'es, Albert}, title = {Sparse-Dense Side-Tuner for efficient Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21515-21524} }
Corvid: Improving Multimodal Large Language Models Towards Chain-of-Thought Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Jingjing and Ma, Chao and Song, Xurui and Zhang, Hanwang and Luo, Jun}, title = {Corvid: Improving Multimodal Large Language Models Towards Chain-of-Thought Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3034-3046} }
CompCap: Improving Multimodal Large Language Models with Composite Captions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Xiaohui and Shukla, Satya Narayan and Azab, Mahmoud and Singh, Aashu and Wang, Qifan and Yang, David and Peng, ShengYun and Yu, Hanchao and Yan, Shen and Zhang, Xuewen and He, Baosheng}, title = {CompCap: Improving Multimodal Large Language Models with Composite Captions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23582-23592} }
COSMO: Combination of Selective Memorization for Low-cost Vision-and-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Siqi and Qiao, Yanyuan and Wang, Qunbo and Yan, Zike and Wu, Qi and Wei, Zhihua and Liu, Jing}, title = {COSMO: Combination of Selective Memorization for Low-cost Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5511-5522} }
STI-Bench: Are MLLMs Ready for Precise Spatial-Temporal World Understanding?-
[pdf]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yun and Zhang, Yiming and Lin, Tao and Liu, Xiangrui and Cai, Wenxiao and Liu, Zheng and Zhao, Bo}, title = {STI-Bench: Are MLLMs Ready for Precise Spatial-Temporal World Understanding?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5622-5632} }
Describe, Adapt and Combine: Empowering CLIP Encoders for Open-set 3D Object Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zhichuan and Zhou, Yang and Liu, Zhe and Yu, Rui and Bai, Song and Wang, Yulong and He, Xinwei and Bai, Xiang}, title = {Describe, Adapt and Combine: Empowering CLIP Encoders for Open-set 3D Object Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21026-21036} }
Embodied Image Captioning: Self-supervised Learning Agents for Spatially Coherent Image Descriptions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Galliena_2025_ICCV, author = {Galliena, Tommaso and Apicella, Tommaso and Rosa, Stefano and Morerio, Pietro and Del Bue, Alessio and Natale, Lorenzo}, title = {Embodied Image Captioning: Self-supervised Learning Agents for Spatially Coherent Image Descriptions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24370-24379} }
CAD-Assistant: Tool-Augmented VLLMs as Generic CAD Task Solvers-
[pdf]
[supp]
[bibtex]@InProceedings{Mallis_2025_ICCV, author = {Mallis, Dimitrios and Karadeniz, Ahmet Serda and Cavada, Sebastian and Rukhovich, Danila and Foteinopoulou, Niki and Cherenkova, Kseniya and Kacem, Anis and Aouada, Djamila}, title = {CAD-Assistant: Tool-Augmented VLLMs as Generic CAD Task Solvers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7284-7294} }
Fast Image Super-Resolution via Consistency Rectified Flow-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Jiaqi and Li, Wenbo and Sun, Haoze and Li, Fan and Wang, Zhixin and Peng, Long and Ren, Jingjing and Yang, Haoran and Hu, Xiaowei and Pei, Renjing and Heng, Pheng-Ann}, title = {Fast Image Super-Resolution via Consistency Rectified Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11755-11765} }
Adapt Foundational Segmentation Models with Heterogeneous Searching Space-
[pdf]
[supp]
[bibtex]@InProceedings{Yi_2025_ICCV, author = {Yi, Li and Hu, Jie and Zhang, Songan and Jiang, Guannan}, title = {Adapt Foundational Segmentation Models with Heterogeneous Searching Space}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23364-23373} }
Adversarial Exploitation of Data Diversity Improves Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Sihang and Tan, Siqi and Chang, Bowen and Zhang, Jing and Feng, Chen and Li, Yiming}, title = {Adversarial Exploitation of Data Diversity Improves Visual Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26848-26858} }
Learning Implicit Features with Flow-Infused Transformations for Realistic Virtual Try-On-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Delong and Huang, Qiwei and Sun, Yang and Liu, Yuanliu and Zheng, Wei-Shi and Xiong, Pengfei and Zhang, Wei}, title = {Learning Implicit Features with Flow-Infused Transformations for Realistic Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18736-18745} }
BoxDreamer: Dreaming Box Corners for Generalizable Object Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Yuanhong and He, Xingyi and Zhao, Chen and Yu, Junhao and Yang, Jiaqi and Hu, Ruizhen and Shen, Yujun and Zhu, Xing and Zhou, Xiaowei and Peng, Sida}, title = {BoxDreamer: Dreaming Box Corners for Generalizable Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9374-9384} }
LawDIS: Language-Window-based Controllable Dichotomous Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_ICCV, author = {Yan, Xinyu and Sun, Meijun and Ji, Ge-Peng and Khan, Fahad Shahbaz and Khan, Salman and Fan, Deng-Ping}, title = {LawDIS: Language-Window-based Controllable Dichotomous Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23902-23911} }
3DGraphLLM: Combining Semantic Graphs and Large Language Models for 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zemskova_2025_ICCV, author = {Zemskova, Tatiana and Yudin, Dmitry}, title = {3DGraphLLM: Combining Semantic Graphs and Large Language Models for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8885-8895} }
FineMotion: A Dataset and Benchmark with both Spatial and Temporal Annotation for Fine-grained Motion Generation and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Bizhu and Xie, Jinheng and Ding, Meidan and Kong, Zhe and Ren, Jianfeng and Bai, Ruibin and Qu, Rong and Shen, Linlin}, title = {FineMotion: A Dataset and Benchmark with both Spatial and Temporal Annotation for Fine-grained Motion Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13837-13846} }
MC-Bench: A Benchmark for Multi-Context Visual Grounding in the Era of MLLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Yunqiu and Zhu, Linchao and Yang, Yi}, title = {MC-Bench: A Benchmark for Multi-Context Visual Grounding in the Era of MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17675-17687} }
What Makes for Text to 360-degree Panorama Generation with Stable Diffusion?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2025_ICCV, author = {Ni, Jinhong and Zhang, Chang-Bin and Zhang, Qiang and Zhang, Jing}, title = {What Makes for Text to 360-degree Panorama Generation with Stable Diffusion?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16555-16564} }
CHORDS: Diffusion Sampling Accelerator with Multi-core Hierarchical ODE Solvers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Jiaqi and Ye, Haotian and Li, Puheng and Xu, Minkai and Zou, James and Ermon, Stefano}, title = {CHORDS: Diffusion Sampling Accelerator with Multi-core Hierarchical ODE Solvers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19386-19395} }
HERMES: temporal-coHERent long-forM understanding with Episodes and Semantics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Faure_2025_ICCV, author = {Faure, Gueter Josmy and Yeh, Jia-Fong and Chen, Min-Hung and Su, Hung-Ting and Lai, Shang-Hong and Hsu, Winston H.}, title = {HERMES: temporal-coHERent long-forM understanding with Episodes and Semantics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22911-22921} }
CoralSRT: Revisiting Coral Reef Semantic Segmentation by Feature Rectification via Self-supervised Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Ziqiang and Wong, Yuk-Kwan and Hua, Binh-Son and Shi, Jianbo and Yeung, Sai-Kit}, title = {CoralSRT: Revisiting Coral Reef Semantic Segmentation by Feature Rectification via Self-supervised Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19967-19977} }
VGMamba: Attribute-to-Location Clue Reasoning for Quantity-Agnostic 3D Visual Grounding-
[pdf]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yihang and Zhang, Jinhao and Wang, Yuxuan and Wu, Aming and Deng, Cheng}, title = {VGMamba: Attribute-to-Location Clue Reasoning for Quantity-Agnostic 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5295-5304} }
Decouple and Track: Benchmarking and Improving Video Diffusion Transformers For Motion Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Qingyu and Wu, Jianzong and Bai, Jinbin and Zhang, Jiangning and Qi, Lu and Tong, Yunhai and Li, Xiangtai}, title = {Decouple and Track: Benchmarking and Improving Video Diffusion Transformers For Motion Transfer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10995-11005} }
MaskControl: Spatio-Temporal Control for Masked Motion Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pinyoanuntapong_2025_ICCV, author = {Pinyoanuntapong, Ekkasit and Saleem, Muhammad and Karunratanakul, Korrawe and Wang, Pu and Xue, Hongfei and Chen, Chen and Guo, Chuan and Cao, Junli and Ren, Jian and Tulyakov, Sergey}, title = {MaskControl: Spatio-Temporal Control for Masked Motion Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9955-9965} }
Evidential Knowledge Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Xiang_2025_ICCV, author = {Xiang, Liangyu and Gao, Junyu and Xu, Changsheng}, title = {Evidential Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2814-2824} }
Straighten Viscous Rectified Flow via Noise Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2025_ICCV, author = {Dai, Jimin and Yan, Jiexi and Yang, Jian and Luo, Lei}, title = {Straighten Viscous Rectified Flow via Noise Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15005-15014} }
A Framework for Double-Blind Federated Adaptation of Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tastan_2025_ICCV, author = {Tastan, Nurbek and Nandakumar, Karthik}, title = {A Framework for Double-Blind Federated Adaptation of Foundation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {923-933} }
Omni-scene Perception-oriented Point Cloud Geometry Enhancement for Coordinate Quantization-
[pdf]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Wang and Gao, Wei}, title = {Omni-scene Perception-oriented Point Cloud Geometry Enhancement for Coordinate Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26055-26064} }
B-VLLM: A Vision Large Language Model with Balanced Spatio-Temporal Tokens-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Zhuqiang and Yin, Zhenfei and He, Mengwei and Wang, Zhihui and Liu, Zicheng and Wang, Zhiyong and Hu, Kun}, title = {B-VLLM: A Vision Large Language Model with Balanced Spatio-Temporal Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24549-24558} }
ImHead: A Large-scale Implicit Morphable Model for Localized Head Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Potamias_2025_ICCV, author = {Potamias, Rolandos Alexandros and Galanakis, Stathis and Deng, Jiankang and Papaioannou, Athanasios and Zafeiriou, Stefanos}, title = {ImHead: A Large-scale Implicit Morphable Model for Localized Head Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10196-10206} }
SL2A-INR: Single-Layer Learnable Activation for Implicit Neural Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Rezaeian_2025_ICCV, author = {Rezaeian, Reza and Heidari, Moein and Azad, Reza and Merhof, Dorit and Soltanian-Zadeh, Hamid and Hacihaliloglu, Ilker}, title = {SL2A-INR: Single-Layer Learnable Activation for Implicit Neural Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26065-26074} }
Authentic 4D Driving Simulation with a Video Generation Model-
[pdf]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Lening and Zheng, Wenzhao and Du, Dalong and Zhang, Yunpeng and Ren, Yilong and Jiang, Han and Cui, Zhiyong and Yu, Haiyang and Zhou, Jie and Zhang, Shanghang}, title = {Authentic 4D Driving Simulation with a Video Generation Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28892-28902} }
Partial Forward Blocking: A Novel Data Pruning Paradigm for Lossless Training Acceleration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Dongyue and Guo, Zilin and Zuo, Jialong and Sang, Nong and Gao, Changxin}, title = {Partial Forward Blocking: A Novel Data Pruning Paradigm for Lossless Training Acceleration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {319-328} }
FedPall: Prototype-based Adversarial and Collaborative Learning for Federated Learning with Feature Drift-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yong and Liang, Feng and Yuan, Guanghu and Yang, Min and Li, Chengming and Hu, Xiping}, title = {FedPall: Prototype-based Adversarial and Collaborative Learning for Federated Learning with Feature Drift}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3111-3120} }
From Easy to Hard: Progressive Active Learning Framework for Infrared Small Target Detection with Single Point Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Chuang and Zhao, Jinmiao and Liu, Yunpeng and Zhao, Sicheng and Dai, Yimian and Yue, Xiangyu}, title = {From Easy to Hard: Progressive Active Learning Framework for Infrared Small Target Detection with Single Point Supervision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2588-2598} }
Asynchronous Event Error-Minimizing Noise for Safeguarding Event Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Ruofei and Duan, Peiqi and Shi, Boxin and Wan, Renjie}, title = {Asynchronous Event Error-Minimizing Noise for Safeguarding Event Dataset}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10141-10150} }
A Constrained Optimization Approach for Gaussian Splatting from Coarsely-posed Images and Noisy Lidar Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_ICCV, author = {Peng, Jizong and Tse, Tze Ho Elden and Xu, Kai and Gao, Wenchao and Yao, Angela}, title = {A Constrained Optimization Approach for Gaussian Splatting from Coarsely-posed Images and Noisy Lidar Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2961-2970} }
BabyVLM: Data-Efficient Pretraining of VLMs Inspired by Infant Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Shengao and Chandra, Arjun and Liu, Aoming and Saligrama, Venkatesh and Gong, Boqing}, title = {BabyVLM: Data-Efficient Pretraining of VLMs Inspired by Infant Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1380-1390} }
Efficient Concertormer for Image Deblurring and Beyond-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kuo_2025_ICCV, author = {Kuo, Pin-Hung and Pan, Jinshan and Chien, Shao-Yi and Yang, Ming-Hsuan}, title = {Efficient Concertormer for Image Deblurring and Beyond}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14665-14675} }
Curve-Aware Gaussian Splatting for 3D Parametric Curve Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Zhirui and Yi, Renjiao and Dai, Yaqiao and Zhu, Xuening and Chen, Wei and Zhu, Chenyang and Xu, Kai}, title = {Curve-Aware Gaussian Splatting for 3D Parametric Curve Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27531-27541} }
Sim-DETR: Unlock DETR for Temporal Sentence Grounding-
[pdf]
[bibtex]@InProceedings{Tang_2025_ICCV, author = {Tang, Jiajin and Wei, Zhengxuan and Zhu, Yuchen and Shi, Cheng and Li, Guanbin and Lin, Liang and Yang, Sibei}, title = {Sim-DETR: Unlock DETR for Temporal Sentence Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22760-22771} }
DynamicID: Zero-Shot Multi-ID Image Personalization with Flexible Facial Editability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Xirui and Wang, Jiahao and Chen, Hao and Zhang, Weizhan and Wang, Benqi and Li, Yikun and Nan, Haishun}, title = {DynamicID: Zero-Shot Multi-ID Image Personalization with Flexible Facial Editability}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10549-10559} }
Wavelet Policy: Lifting Scheme for Policy Learning in Long-Horizon Tasks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Hao and Yuan, Shuaihang and Bethala, Geeta Chandra Raju and Wen, Congcong and Tzes, Anthony and Fang, Yi}, title = {Wavelet Policy: Lifting Scheme for Policy Learning in Long-Horizon Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12349-12359} }
METEOR: Multi-Encoder Collaborative Token Pruning for Efficient Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yuchen and Wang, Yaoming and Shi, Bowen and Zhang, Xiaopeng and Dai, Wenrui and Li, Chenglin and Xiong, Hongkai and Tian, Qi}, title = {METEOR: Multi-Encoder Collaborative Token Pruning for Efficient Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21492-21504} }
Contact-Aware Amodal Completion for Human-Object Interaction via Multi-Regional Inpainting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chi_2025_ICCV, author = {Chi, Seunggeun and Sachdeva, Enna and Huang, Pin-Hao and Lee, Kwonjoon}, title = {Contact-Aware Amodal Completion for Human-Object Interaction via Multi-Regional Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9487-9496} }
SeqGrowGraph: Learning Lane Topology as a Chain of Graph Expansions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Mengwei and Zeng, Shuang and Chang, Xinyuan and Liu, Xinran and Pan, Zheng and Xu, Mu and Wei, Xing}, title = {SeqGrowGraph: Learning Lane Topology as a Chain of Graph Expansions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27166-27175} }
Ultra-Precision 6DoF Pose Estimation Using 2-D Interpolated Discrete Fourier Transform-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Guowei and Mao, Zian and Huang, Peisen}, title = {Ultra-Precision 6DoF Pose Estimation Using 2-D Interpolated Discrete Fourier Transform}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5802-5810} }
Neural Compression for 3D Geometry Sets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_ICCV, author = {Ren, Siyu and Hou, Junhui and Lin, Weiyao and Wang, Wenping}, title = {Neural Compression for 3D Geometry Sets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25294-25304} }
Lifting the Structural Morphing for Wide-Angle Images Rectification: Unified Content and Boundary Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Luan_2025_ICCV, author = {Luan, Wenting and Lu, Siqi and Zheng, Yongbin and Xu, Wanying and Nie, Lang and Zhou, Zongtan and Liao, Kang}, title = {Lifting the Structural Morphing for Wide-Angle Images Rectification: Unified Content and Boundary Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25529-25538} }
ASGS: Single-Domain Generalizable Open-Set Object Detection via Adaptive Subgraph Searching-
[pdf]
[supp]
[bibtex]@InProceedings{Yuan_2025_ICCV, author = {Yuan, Yuxuan and Tang, Luyao and Chen, Yixin and Chen, Chaoqi and Huang, Yue and Ding, Xinghao}, title = {ASGS: Single-Domain Generalizable Open-Set Object Detection via Adaptive Subgraph Searching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20911-20921} }
Dual-Temporal Exemplar Representation Network for Video Semantic Segmentation-
[pdf]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Xiaolong and Zhang, Lei and Li, Jiayi and Wang, Lituan and Guan, Yifan and Yan, Yu and Zhang, Leyi and Song, Hao}, title = {Dual-Temporal Exemplar Representation Network for Video Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10775-10785} }
UniConvNet: Expanding Effective Receptive Field while Maintaining Asymptotically Gaussian Distribution for ConvNets of Any Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuhao and Xi, Wei}, title = {UniConvNet: Expanding Effective Receptive Field while Maintaining Asymptotically Gaussian Distribution for ConvNets of Any Scale}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20922-20933} }
Denoising Token Prediction in Masked Autoregressive Models-
[pdf]
[bibtex]@InProceedings{Yao_2025_ICCV, author = {Yao, Ting and Li, Yehao and Pan, Yingwei and Qiu, Zhaofan and Mei, Tao}, title = {Denoising Token Prediction in Masked Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18024-18033} }
Uncertainty-Aware Gradient Stabilization for Small Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Huixin and Li, Yanjing and Yang, Linlin and Cao, Xianbin and Zhang, Baochang}, title = {Uncertainty-Aware Gradient Stabilization for Small Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8407-8417} }
Radiant Foam: Real-Time Differentiable Ray Tracing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Govindarajan_2025_ICCV, author = {Govindarajan, Shrisudhan and Rebain, Daniel and Yi, Kwang Moo and Tagliasacchi, Andrea}, title = {Radiant Foam: Real-Time Differentiable Ray Tracing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4135-4145} }
Attention to the Burstiness in Visual Prompt Tuning!-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuzhu and Duan, Manni and Kong, Shu}, title = {Attention to the Burstiness in Visual Prompt Tuning!}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4253-4263} }
BadVideo: Stealthy Backdoor Attack against Text-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Ruotong and Zhu, Mingli and Ou, Jiarong and Chen, Rui and Tao, Xin and Wan, Pengfei and Wu, Baoyuan}, title = {BadVideo: Stealthy Backdoor Attack against Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19075-19084} }
Cassic: Towards Content-Adaptive State-Space Models for Learned Image Compression-
[pdf]
[bibtex]@InProceedings{Qin_2025_ICCV, author = {Qin, Shiyu and Wang, Jinpeng and Zhou, Yimin and Chen, Bin and Luo, Tianci and An, Baoyi and Dai, Tao and Xia, Shu-Tao and Wang, Yaowei}, title = {Cassic: Towards Content-Adaptive State-Space Models for Learned Image Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15727-15736} }
AIGI-Holmes: Towards Explainable and Generalizable AI-Generated Image Detection via Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Ziyin and Luo, Yunpeng and Wu, Yuanchen and Sun, Ke and Ji, Jiayi and Yan, Ke and Ding, Shouhong and Sun, Xiaoshuai and Wu, Yunsheng and Ji, Rongrong}, title = {AIGI-Holmes: Towards Explainable and Generalizable AI-Generated Image Detection via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18746-18758} }
TokensGen: Harnessing Condensed Tokens for Long Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ouyang_2025_ICCV, author = {Ouyang, Wenqi and Xiao, Zeqi and Yang, Danni and Zhou, Yifan and Yang, Shuai and Yang, Lei and Si, Jianlou and Pan, Xingang}, title = {TokensGen: Harnessing Condensed Tokens for Long Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18197-18206} }
Cross-View Isolated Sign Language Recognition via View Synthesis and Feature Disentanglement-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_ICCV, author = {Shen, Xin and Wang, Xinyu and Shen, Lei and Zhang, Kaihao and Yu, Xin}, title = {Cross-View Isolated Sign Language Recognition via View Synthesis and Feature Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20647-20657} }
R1-Onevision: Advancing Generalized Multimodal Reasoning through Cross-Modal Formalization-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Yi and He, Xiaoxuan and Pan, Hongkun and Jiang, Xiyan and Deng, Yan and Yang, Xingtao and Lu, Haoyu and Yin, Dacheng and Rao, Fengyun and Zhu, Minfeng and Zhang, Bo and Chen, Wei}, title = {R1-Onevision: Advancing Generalized Multimodal Reasoning through Cross-Modal Formalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2376-2385} }
HIS-GPT: Towards 3D Human-In-Scene Multimodal Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Jiahe and Hou, Ruibing and Tian, Zejie and Chang, Hong and Shan, Shiguang}, title = {HIS-GPT: Towards 3D Human-In-Scene Multimodal Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4317-4327} }
Learnable Logit Adjustment for Imbalanced Semi-Supervised Learning under Class Distribution Mismatch-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Hyuck and Park, Taemin and Kim, Heeyoung}, title = {Learnable Logit Adjustment for Imbalanced Semi-Supervised Learning under Class Distribution Mismatch}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2664-2674} }
Subjective Camera 1.0: Bridging Human Cognition and Visual Reconstruction through Sequence-Aware Sketch-Guided Diffusion-
[pdf]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Haoyang and Sun, Dongfang and Ma, Caoyuan and Wang, Shiqin and Zhang, Kewei and Wang, Zheng and Wang, Zhixiang}, title = {Subjective Camera 1.0: Bridging Human Cognition and Visual Reconstruction through Sequence-Aware Sketch-Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17838-17847} }
Learning Robust Image Watermarking with Lossless Cover Recovery-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Jiale and Wang, Wei and Shi, Chongyang and Dong, Li and Hu, Xiping}, title = {Learning Robust Image Watermarking with Lossless Cover Recovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15056-15065} }
Enhancing Transformers Through Conditioned Embedded Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saratchandran_2025_ICCV, author = {Saratchandran, Hemanth and Lucey, Simon}, title = {Enhancing Transformers Through Conditioned Embedded Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4786-4795} }
Drawing Developmental Trajectory from Cortical Surface Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Wenxuan and Qu, Ruowen and Liu, Zhongliang and Dai, Zhuoyan and Shi, Dongzi and Yu, Sijin and Xiong, Tong and Liu, Shiping and Xu, Xiangmin and Xing, Xiaofen and Zhang, Xin}, title = {Drawing Developmental Trajectory from Cortical Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11026-11035} }
CryoFastAR: Fast Cryo-EM Ab initio Reconstruction Made Easy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jiakai and Zhou, Shouchen and Dai, Haizhao and Liu, Xinhang and Wang, Peihao and Fan, Zhiwen and Pei, Yuan and Yu, Jingyi}, title = {CryoFastAR: Fast Cryo-EM Ab initio Reconstruction Made Easy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8462-8471} }
3DGS-LM: Faster Gaussian-Splatting Optimization with Levenberg-Marquardt-
[pdf]
[supp]
[bibtex]@InProceedings{Hollein_2025_ICCV, author = {H\"ollein, Lukas and Bo\v{z}i\v{c}, Alja\v{z} and Zollh\"ofer, Michael and Nie{\ss}ner, Matthias}, title = {3DGS-LM: Faster Gaussian-Splatting Optimization with Levenberg-Marquardt}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26740-26750} }
GauUpdate: New Object Insertion in 3D Gaussian Fields with Consistent Global Illumination-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2025_ICCV, author = {Ren, Chengwei and Zhang, Fan and Xu, Liangchao and Pan, Liang and Liu, Ziwei and Wang, Wenping and Zhang, Xiao-Ping and Liu, Yuan}, title = {GauUpdate: New Object Insertion in 3D Gaussian Fields with Consistent Global Illumination}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28653-28663} }
OphCLIP: Hierarchical Retrieval-Augmented Learning for Ophthalmic Surgical Video-Language Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Ming and Yuan, Kun and Shen, Yaling and Tang, Feilong and Xu, Xiaohao and Zhou, Lin and Li, Wei and Chen, Ying and Xu, Zhongxing and Peng, Zelin and Yan, Siyuan and Srivastav, Vinkle and Song, Diping and Li, Tianbin and Shi, Danli and Ye, Jin and Padoy, Nicolas and Navab, Nassir and He, Junjun and Ge, Zongyuan}, title = {OphCLIP: Hierarchical Retrieval-Augmented Learning for Ophthalmic Surgical Video-Language Pretraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19838-19849} }
Hints of Prompt: Enhancing Visual Representation for Multimodal LLMs in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Hao and Gao, Zhanning and Chen, Zhili and Ye, Maosheng and Chen, Qifeng and Cao, Tongyi and Qi, Honggang}, title = {Hints of Prompt: Enhancing Visual Representation for Multimodal LLMs in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6165-6175} }
IM360: Large-scale Indoor Mapping with 360 Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2025_ICCV, author = {Jung, Dongki and Choi, Jaehoon and Lee, Yonghan and Manocha, Dinesh}, title = {IM360: Large-scale Indoor Mapping with 360 Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29040-29050} }
RegGS: Unposed Sparse Views Gaussian Splatting with 3DGS Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Chong and Hu, Yu and Yu, Sicheng and Zhao, Beizhen and Wang, Zijian and Wang, Hao}, title = {RegGS: Unposed Sparse Views Gaussian Splatting with 3DGS Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8100-8109} }
SU-RGS: Relightable 3D Gaussian Splatting from Sparse Views under Unconstrained Illuminations-
[pdf]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Qi and Huang, Chi and Zhang, Qian and Li, Nan and Feng, Wei}, title = {SU-RGS: Relightable 3D Gaussian Splatting from Sparse Views under Unconstrained Illuminations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26859-26868} }
Representation Shift: Unifying Token Compression with FlashAttention-
[pdf]
[arXiv]
[bibtex]@InProceedings{Choi_2025_ICCV, author = {Choi, Joonmyung and Lee, Sanghyeok and Ko, Byungoh and Kim, Eunseo and Kil, Jihyung and Kim, Hyunwoo J.}, title = {Representation Shift: Unifying Token Compression with FlashAttention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20456-20466} }
DGTalker: Disentangled Generative Latent Space Learning for Audio-Driven Gaussian Talking Heads-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Xiaoxi and Fan, Yanbo and Yang, Qiya and Wang, Xuan and Gao, Wei and Li, Ge}, title = {DGTalker: Disentangled Generative Latent Space Learning for Audio-Driven Gaussian Talking Heads}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11079-11088} }
RALoc: Enhancing Outdoor LiDAR Localization via Rotation Awareness-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Yuyang and Li, Wen and Ao, Sheng and Xu, Qingshan and Yu, Shangshu and Guo, Yu and Zhou, Yin and Shen, Siqi and Wang, Cheng}, title = {RALoc: Enhancing Outdoor LiDAR Localization via Rotation Awareness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3304-3313} }
EDiT: Efficient Diffusion Transformers with Linear Compressed Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Becker_2025_ICCV, author = {Becker, Philipp and Mehrotra, Abhinav and Chavhan, Ruchika and Chadwick, Malcolm and Morreale, Luca and Noroozi, Mehdi and Gil C. P. Ramos, Alberto and Bhattacharya, Sourav}, title = {EDiT: Efficient Diffusion Transformers with Linear Compressed Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19608-19616} }
GenHancer: Imperfect Generative Models are Secretly Strong Vision-Centric Enhancers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Shijie and Ge, Yuying and Wang, Teng and Guo, Yuxin and Ge, Yixiao and Shan, Ying}, title = {GenHancer: Imperfect Generative Models are Secretly Strong Vision-Centric Enhancers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24402-24412} }
LLaVA-3D: A Simple yet Effective Pathway to Empowering LMMs with 3D Capabilities-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Chenming and Wang, Tai and Zhang, Wenwei and Pang, Jiangmiao and Liu, Xihui}, title = {LLaVA-3D: A Simple yet Effective Pathway to Empowering LMMs with 3D Capabilities}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4295-4305} }
Understanding Museum Exhibits using Vision-Language Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Balauca_2025_ICCV, author = {Balauca, Ada-Astrid and Garai, Sanjana and Balauca, Stefan and Shetty, Rasesh Udayakumar and Agrawal, Naitik and Shah, Dhwanil Subhashbhai and Fu, Yuqian and Wang, Xi and Toutanova, Kristina and Paudel, Danda Pani and Van Gool, Luc}, title = {Understanding Museum Exhibits using Vision-Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2227-2238} }
MINERVA: Evaluating Complex Video Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nagrani_2025_ICCV, author = {Nagrani, Arsha and Menon, Sachit and Iscen, Ahmet and Buch, Shyamal and Mehran, Ramin and Jha, Nilpa and Hauth, Anja and Zhu, Yukun and Vondrick, Carl and Sirotenko, Mikhail and Schmid, Cordelia and Weyand, Tobias}, title = {MINERVA: Evaluating Complex Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23968-23978} }
Active Membership Inference Test (aMINT): Enhancing Model Auditability with Multi-Task Learning.-
[pdf]
[arXiv]
[bibtex]@InProceedings{DeAlcala_2025_ICCV, author = {DeAlcala, Daniel and Morales, Aythami and Fierrez, Julian and Mancera, Gonzalo and Tolosana, Ruben and Ortega-Garcia, Javier}, title = {Active Membership Inference Test (aMINT): Enhancing Model Auditability with Multi-Task Learning.}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {647-656} }
One Perturbation is Enough: On Generating Universal Adversarial Perturbations against Vision-Language Pre-training Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_ICCV, author = {Fang, Hao and Kong, Jiawei and Yu, Wenbo and Chen, Bin and Li, Jiawei and Wu, Hao and Xia, Shu-Tao and Xu, Ke}, title = {One Perturbation is Enough: On Generating Universal Adversarial Perturbations against Vision-Language Pre-training Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4090-4100} }
When Lighting Deceives: Exposing Vision-Language Models' Illumination Vulnerability Through Illumination Transformation Attack-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Hanqing and Ruan, Shouwei and Huang, Yao and Zhao, Shiji and Wei, Xingxing}, title = {When Lighting Deceives: Exposing Vision-Language Models' Illumination Vulnerability Through Illumination Transformation Attack}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10485-10495} }
Exploring View Consistency for Scene-Adaptive Low-Light Light Field Image Enhancement-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shuo and Gao, Chen and Lin, Youfang}, title = {Exploring View Consistency for Scene-Adaptive Low-Light Light Field Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7340-7349} }
DIA: The Adversarial Exposure of Deterministic Inversion in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2025_ICCV, author = {Hong, Seunghoo and Son, Geonho and Lee, Juhun and Woo, Simon S.}, title = {DIA: The Adversarial Exposure of Deterministic Inversion in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17994-18003} }
Rectifying Magnitude Neglect in Linear Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Qihang and Huang, Huaibo and Ai, Yuang and He, Ran}, title = {Rectifying Magnitude Neglect in Linear Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21505-21514} }
GEMeX: A Large-Scale, Groundable, and Explainable Medical VQA Benchmark for Chest X-ray Diagnosis-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Bo and Zou, Ke and Zhan, Li-Ming and Lu, Zexin and Dong, Xiaoyu and Chen, Yidi and Xie, Chengqiang and Cao, Jiannong and Wu, Xiao-Ming and Fu, Huazhu}, title = {GEMeX: A Large-Scale, Groundable, and Explainable Medical VQA Benchmark for Chest X-ray Diagnosis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21310-21320} }
Learning to See Inside Opaque Liquid Containers using Speckle Vibrometry-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kichler_2025_ICCV, author = {Kichler, Matan and Bagon, Shai and Sheinin, Mark}, title = {Learning to See Inside Opaque Liquid Containers using Speckle Vibrometry}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9466-9476} }
HouseTour: A Virtual Real Estate A(I)gent-
[pdf]
[supp]
[bibtex]@InProceedings{Celen_2025_ICCV, author = {\c{C}elen, Ata and Pollefeys, Marc and Barath, Daniel and Armeni, Iro}, title = {HouseTour: A Virtual Real Estate A(I)gent}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17761-17771} }
Debiased Teacher for Day-to-Night Domain Adaptive Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Cui_2025_ICCV, author = {Cui, Yiming and Li, Liang and Yin, Haibing and Gao, Yuhan and Sun, Yaoqi and Yan, Chenggang}, title = {Debiased Teacher for Day-to-Night Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2577-2587} }
Frequency-Aligned Knowledge Distillation for Lightweight Spatiotemporal Forecasting-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yuqi and Yang, Chuanguang and Zeng, Hansheng and Dong, Zeyu and An, Zhulin and Xu, Yongjun and Tian, Yingli and Wu, Hao}, title = {Frequency-Aligned Knowledge Distillation for Lightweight Spatiotemporal Forecasting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7262-7272} }
Colors See Colors Ignore: Clothes Changing ReID with Color Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pathak_2025_ICCV, author = {Pathak, Priyank and Rawat, Yogesh S.}, title = {Colors See Colors Ignore: Clothes Changing ReID with Color Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16797-16807} }
Image as an IMU: Estimating Camera Motion from a Single Motion-Blurred Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Jerred and Clark, Ronald}, title = {Image as an IMU: Estimating Camera Motion from a Single Motion-Blurred Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {90-99} }
AnyI2V: Animating Any Conditional Image with Motion Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Ziye and Luo, Hao and Shuai, Xincheng and Ding, Henghui}, title = {AnyI2V: Animating Any Conditional Image with Motion Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17302-17311} }
GENMO: A GENeralist Model for Human MOtion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Jiefeng and Cao, Jinkun and Zhang, Haotian and Rempe, Davis and Kautz, Jan and Iqbal, Umar and Yuan, Ye}, title = {GENMO: A GENeralist Model for Human MOtion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11766-11776} }
M-Net: MRI Brain Tumor Sequential Segmentation Network via Mesh-Cast-
[pdf]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Jiacheng and Ding, Hui and Zhang, Shiyu and Huo, Guoping}, title = {M-Net: MRI Brain Tumor Sequential Segmentation Network via Mesh-Cast}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20116-20125} }
Weakly Supervised Visible-Infrared Person Re-Identification via Heterogeneous Expert Collaborative Consistency Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yafei and Kong, Lingqi and Li, Huafeng and Wen, Jie}, title = {Weakly Supervised Visible-Infrared Person Re-Identification via Heterogeneous Expert Collaborative Consistency Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12659-12669} }
Prompt-A-Video: Prompt Your Video Diffusion Model via Preference-Aligned LLM-
[pdf]
[supp]
[bibtex]@InProceedings{Ji_2025_ICCV, author = {Ji, Yatai and Zhang, Jiacheng and Wu, Jie and Zhang, Shilong and Chen, Shoufa and Ge, Chongjian and Sun, Peize and Chen, Weifeng and Shao, Wenqi and Xiao, Xuefeng and Huang, Weilin and Luo, Ping}, title = {Prompt-A-Video: Prompt Your Video Diffusion Model via Preference-Aligned LLM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18725-18735} }
Diffusion Image Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chihaoui_2025_ICCV, author = {Chihaoui, Hamadi and Favaro, Paolo}, title = {Diffusion Image Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24636-24644} }
Constructing Ophthalmic MLLM for Positioning-diagnosis Collaboration Through Clinical Cognitive Chain Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Xinyao and Song, Diping}, title = {Constructing Ophthalmic MLLM for Positioning-diagnosis Collaboration Through Clinical Cognitive Chain Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21547-21556} }
MemDistill: Distilling LiDAR Knowledge into Memory for Camera-Only 3D Object Detection-
[pdf]
[bibtex]@InProceedings{Kwon_2025_ICCV, author = {Kwon, Donghyeon and Yoon, Youngseok and Son, Hyeongseok and Kwak, Suha}, title = {MemDistill: Distilling LiDAR Knowledge into Memory for Camera-Only 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6828-6838} }
OpenRSD: Towards Open-prompts for Object Detection in Remote Sensing Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Ziyue and Feng, Yongchao and Liu, Ziqi and Yang, Shuai and Liu, Qingjie and Wang, Yunhong}, title = {OpenRSD: Towards Open-prompts for Object Detection in Remote Sensing Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8384-8394} }
Privacy-centric Deep Motion Retargeting for Anonymization of Skeleton-Based Motion Visualization-
[pdf]
[supp]
[bibtex]@InProceedings{Carr_2025_ICCV, author = {Carr, Thomas and Xu, Depeng and Yuan, Shuhan and Lu, Aidong}, title = {Privacy-centric Deep Motion Retargeting for Anonymization of Skeleton-Based Motion Visualization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13162-13170} }
FlashDepth: Real-time Streaming Video Depth Estimation at 2K Resolution-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chou_2025_ICCV, author = {Chou, Gene and Xian, Wenqi and Yang, Guandao and Abdelfattah, Mohamed and Hariharan, Bharath and Snavely, Noah and Yu, Ning and Debevec, Paul}, title = {FlashDepth: Real-time Streaming Video Depth Estimation at 2K Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9638-9648} }
Towards Performance Consistency in Multi-Level Model Collaboration-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Qi and Yu, Runpeng and Wang, Xinchao}, title = {Towards Performance Consistency in Multi-Level Model Collaboration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2567-2576} }
Polarimetric Neural Field via Unified Complex-Valued Wave Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Chu and Yang, Yixin and Liao, Junda and Guo, Heng and Shi, Boxin and Sato, Imari}, title = {Polarimetric Neural Field via Unified Complex-Valued Wave Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25660-25669} }
CLIP-Adapted Region-to-Text Learning for Generative Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Ge_2025_ICCV, author = {Ge, Jiannan and Xie, Lingxi and Xie, Hongtao and Li, Pandeng and Liu, Sun-Ao and Zhang, Xiaopeng and Tian, Qi and Zhang, Yongdong}, title = {CLIP-Adapted Region-to-Text Learning for Generative Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24034-24044} }
Where, What, Why: Towards Explainable Driver Attention Prediction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Yuchen and Tang, Jiayu and Xiao, Xiaoyan and Lin, Yueyao and Liu, Linkai and Guo, Zipeng and Fei, Hao and Xia, Xiaobo and Gou, Chao}, title = {Where, What, Why: Towards Explainable Driver Attention Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2675-2685} }
MeshAnything V2: Artist-Created Mesh Generation with Adjacent Mesh Tokenization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yiwen and Wang, Yikai and Luo, Yihao and Wang, Zhengyi and Chen, Zilong and Zhu, Jun and Zhang, Chi and Lin, Guosheng}, title = {MeshAnything V2: Artist-Created Mesh Generation with Adjacent Mesh Tokenization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13922-13931} }
Visual Intention Grounding for Egocentric Assistants-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Pengzhan and Xiao, Junbin and Tse, Tze Ho Elden and Li, Yicong and Akula, Arjun and Yao, Angela}, title = {Visual Intention Grounding for Egocentric Assistants}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2512-2522} }
Analyzing Finetuning Representation Shift for Multimodal LLMs Steering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khayatan_2025_ICCV, author = {Khayatan, Pegah and Shukor, Mustafa and Parekh, Jayneel and Dapogny, Arnaud and Cord, Matthieu}, title = {Analyzing Finetuning Representation Shift for Multimodal LLMs Steering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2206-2216} }
TeethGenerator: A two-stage framework for paired pre- and post-orthodontic 3D dental data generation-
[pdf]
[supp]
[bibtex]@InProceedings{Lei_2025_ICCV, author = {Lei, Changsong and Liang, Yaqian and Wang, Shaofeng and Dai, Jiajia and Liu, Yong-Jin}, title = {TeethGenerator: A two-stage framework for paired pre- and post-orthodontic 3D dental data generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25872-25881} }
Enhancing Zero-shot Object Counting via Text-guided Local Ranking and Number-evoked Global Attention-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shiwei and Zhou, Qi and Ke, Wei}, title = {Enhancing Zero-shot Object Counting via Text-guided Local Ranking and Number-evoked Global Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21097-21106} }
Bidirectional Likelihood Estimation with Multi-Modal Large Language Models for Text-Video Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ko_2025_ICCV, author = {Ko, Dohwan and Lee, Ji Soo and Choi, Minhyuk and Meng, Zihang and Kim, Hyunwoo J.}, title = {Bidirectional Likelihood Estimation with Multi-Modal Large Language Models for Text-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22263-22273} }
TinyViM: Frequency Decoupling for Tiny Hybrid Vision Mamba-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Xiaowen and Ni, Zhenliang and Chen, Xinghao}, title = {TinyViM: Frequency Decoupling for Tiny Hybrid Vision Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23519-23529} }
Harnessing Massive Satellite Imagery with Efficient Masked Image Modeling-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Fengxiang and Wang, Hongzhen and Wang, Di and Guo, Zonghao and Zhong, Zhenyu and Lan, Long and Yang, Wenjing and Zhang, Jing}, title = {Harnessing Massive Satellite Imagery with Efficient Masked Image Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6935-6947} }
Visual-Oriented Fine-Grained Knowledge Editing for MultiModal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Zhen and Gu, Leijiang and Yang, Xun and Duan, Zhangling and Shi, Zenglin and Wang, Meng}, title = {Visual-Oriented Fine-Grained Knowledge Editing for MultiModal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2491-2500} }
Bridging Domain Generalization to Multimodal Domain Generalization via Unified Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Hai and Xia, Yan and Zhou, Sashuai and Wang, Hanting and Wang, Shulei and Zhao, Zhou}, title = {Bridging Domain Generalization to Multimodal Domain Generalization via Unified Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22488-22498} }
A Recipe for Generating 3D Worlds from a Single Image-
[pdf]
[supp]
[bibtex]@InProceedings{Schwarz_2025_ICCV, author = {Schwarz, Katja and Rozumny, Denis and Bul\`o, Samuel Rota and Porzi, Lorenzo and Kontschieder, Peter}, title = {A Recipe for Generating 3D Worlds from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3520-3530} }
MVGBench: a Comprehensive Benchmark for Multi-view Generation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Xianghui and Lessen, Jan Eric and Pons-Moll, Gerard}, title = {MVGBench: a Comprehensive Benchmark for Multi-view Generation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8207-8218} }
Fewer Denoising Steps or Cheaper Per-Step Inference: Towards Compute-Optimal Diffusion Model Deployment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Zhenbang and Fu, Yonggan and Wang, Lifu and Qian, Jiayi and Luo, Xiao and Lin, Yingyan Celine}, title = {Fewer Denoising Steps or Cheaper Per-Step Inference: Towards Compute-Optimal Diffusion Model Deployment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3001-3010} }
Competitive Distillation: A Simple Learning Strategy for Improving Visual Classification-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Daqian and Diao, Xiaolei and Chen, Xu and John, C\'edric M}, title = {Competitive Distillation: A Simple Learning Strategy for Improving Visual Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2981-2990} }
DynFaceRestore: Balancing Fidelity and Quality in Diffusion-Guided Blind Face Restoration with Dynamic Blur-Level Mapping and Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Do_2025_ICCV, author = {Do, Huu-Phu and Chen, Yu-Wei and Liao, Yi-Cheng and Hsiao, Chi-Wei and Wang, Han-Yang and Chiu, Wei-Chen and Huang, Ching-Chun}, title = {DynFaceRestore: Balancing Fidelity and Quality in Diffusion-Guided Blind Face Restoration with Dynamic Blur-Level Mapping and Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10432-10441} }
LoRAverse: A Submodular Framework to Retrieve Diverse Adapters for Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Sonmezer_2025_ICCV, author = {Sonmezer, Mert and Zheng, Matthew and Yanardag, Pinar}, title = {LoRAverse: A Submodular Framework to Retrieve Diverse Adapters for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17879-17888} }
AllTracker: Efficient Dense Point Tracking at High Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Harley_2025_ICCV, author = {Harley, Adam W. and You, Yang and Sun, Xinglong and Zheng, Yang and Raghuraman, Nikhil and Gu, Yunqi and Liang, Sheldon and Chu, Wen-Hsuan and Dave, Achal and You, Suya and Ambrus, Rares and Fragkiadaki, Katerina and Guibas, Leonidas}, title = {AllTracker: Efficient Dense Point Tracking at High Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5253-5262} }
Gaussian-based World Model: Gaussian Priors for Voxel-Based Occupancy Prediction and Future Motion Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2025_ICCV, author = {Feng, Tuo and Wang, Wenguan and Yang, Yi}, title = {Gaussian-based World Model: Gaussian Priors for Voxel-Based Occupancy Prediction and Future Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25239-25249} }
CombatVLA: An Efficient Vision-Language-Action Model for Combat Tasks in 3D Action Role-Playing Games-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Peng and Bu, Pi and Wang, Yingyao and Wang, Xinyi and Wang, Ziming and Guo, Jie and Zhao, Yingxiu and Zhu, Qi and Song, Jun and Yang, Siran and Wang, Jiamang and Zheng, Bo}, title = {CombatVLA: An Efficient Vision-Language-Action Model for Combat Tasks in 3D Action Role-Playing Games}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10919-10928} }
SketchSplat: 3D Edge Reconstruction via Differentiable Multi-view Sketch Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ying_2025_ICCV, author = {Ying, Haiyang and Zwicker, Matthias}, title = {SketchSplat: 3D Edge Reconstruction via Differentiable Multi-view Sketch Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25649-25659} }
Social Debiasing for Fair Multi-modal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Harry and Guo, Yangyang and Guo, Qingpei and Yang, Ming and Gan, Tian and Guan, Weili and Nie, Liqiang}, title = {Social Debiasing for Fair Multi-modal LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1740-1750} }
DOGR: Towards Versatile Visual Document Grounding and Referring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Yinan and Chen, Yuxin and Lin, Haokun and Wu, Yichen and Yang, Shuyu and Qi, Zhongang and Ma, Chen and Zhu, Li}, title = {DOGR: Towards Versatile Visual Document Grounding and Referring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3596-3606} }
Benchmarking and Learning Multi-Dimensional Quality Evaluator for Text-to-3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yujie and Cui, Bingyang and Yang, Qi and Li, Zhu and Xu, Yiling}, title = {Benchmarking and Learning Multi-Dimensional Quality Evaluator for Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18563-18574} }
Sat2City: 3D City Generation from A Single Satellite Image with Cascaded Latent Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hua_2025_ICCV, author = {Hua, Tongyan and Jiang, Lutao and Chen, Ying-Cong and Zhao, Wufan}, title = {Sat2City: 3D City Generation from A Single Satellite Image with Cascaded Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27978-27988} }
Hi3DGen: High-fidelity 3D Geometry Generation from Images via Normal Bridging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_ICCV, author = {Ye, Chongjie and Wu, Yushuang and Lu, Ziteng and Chang, Jiahao and Guo, Xiaoyang and Zhou, Jiaqing and Zhao, Hao and Han, Xiaoguang}, title = {Hi3DGen: High-fidelity 3D Geometry Generation from Images via Normal Bridging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25050-25061} }
SC-Captioner: Improving Image Captioning with Self-Correction by Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Lin and Zeng, Xianfang and Li, Kangcong and Yu, Gang and Chen, Tao}, title = {SC-Captioner: Improving Image Captioning with Self-Correction by Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23145-23155} }
DreamDance: Animating Human Images by Enriching 3D Geometry Cues from 2D Poses-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pang_2025_ICCV, author = {Pang, Yatian and Zhu, Bin and Lin, Bin and Zheng, Mingzhe and Tay, Francis E. H. and Lim, Ser-Nam and Yang, Harry and Yuan, Li}, title = {DreamDance: Animating Human Images by Enriching 3D Geometry Cues from 2D Poses}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14039-14050} }
MBTI: Masked Blending Transformers with Implicit Positional Encoding for Frame-rate Agnostic Motion Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Huh_2025_ICCV, author = {Huh, Jungwoo and Park, Yeseung and Kim, Seongjean and Kim, Jungsu and Lee, Sanghoon}, title = {MBTI: Masked Blending Transformers with Implicit Positional Encoding for Frame-rate Agnostic Motion Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11568-11578} }
MixRI: Mixing Features of Reference Images for Novel Object Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Xinhang and Shi, Jiawei and Dang, Zheng and Dai, Yuchao}, title = {MixRI: Mixing Features of Reference Images for Novel Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9024-9035} }
p-AVAS: Can Physics-Integrated Audio-Visual Modeling Boost Neural Acoustic Synthesis?-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Susan and Huang, Chao and Tang, Yunlong and Zhang, Zeliang and Xu, Chenliang}, title = {p-AVAS: Can Physics-Integrated Audio-Visual Modeling Boost Neural Acoustic Synthesis?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13942-13951} }
Learning Precise Affordances from Egocentric Videos for Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Gen and Tsagkas, Nikolaos and Song, Jifei and Mon-Williams, Ruaridh and Vijayakumar, Sethu and Shao, Kun and Sevilla-Lara, Laura}, title = {Learning Precise Affordances from Egocentric Videos for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10581-10591} }
VAFlow: Video-to-Audio Generation with Cross-Modality Flow Matching-
[pdf]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Xihua and Cheng, Xin and Wang, Yuyue and Song, Ruihua and Wang, Yunfeng}, title = {VAFlow: Video-to-Audio Generation with Cross-Modality Flow Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11777-11786} }
HDR Image Generation via Gain Map Decomposed Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Guan_2025_ICCV, author = {Guan, Yuanshen and Xu, Ruikang and Liao, Yinuo and Yao, Mingde and Wang, Lizhi and Xiong, Zhiwei}, title = {HDR Image Generation via Gain Map Decomposed Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17536-17545} }
DiffuMatch: Category-Agnostic Spectral Diffusion Priors for Robust Non-rigid Shape Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pierson_2025_ICCV, author = {Pierson, Emery and Li, Lei and Dai, Angela and Ovsjanikov, Maks}, title = {DiffuMatch: Category-Agnostic Spectral Diffusion Priors for Robust Non-rigid Shape Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5745-5756} }
MoGA: 3D Generative Avatar Prior for Monocular Gaussian Avatar Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Zijian and Duan, Longteng and Song, Jie and Black, Michael J. and Geiger, Andreas}, title = {MoGA: 3D Generative Avatar Prior for Monocular Gaussian Avatar Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13304-13314} }
Aligning Moments in Time using Video Queries-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kumar_2025_ICCV, author = {Kumar, Yogesh and Agarwal, Uday and Gupta, Manish and Mishra, Anand}, title = {Aligning Moments in Time using Video Queries}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20215-20225} }
StreamGS: Online Generalizable Gaussian Splatting Reconstruction for Unposed Image Streams-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yang and Wang, Jinglu and Chu, Lei and Li, Xiao and Kao, Shiu-Hong and Chen, Ying-Cong and Lu, Yan}, title = {StreamGS: Online Generalizable Gaussian Splatting Reconstruction for Unposed Image Streams}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25841-25850} }
Adversarial Attention Perturbations for Large Object Detection Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yahn_2025_ICCV, author = {Yahn, Zachary and Tekin, Selim Furkan and Ilhan, Fatih and Hu, Sihao and Huang, Tiansheng and Xu, Yichang and Loper, Margaret and Liu, Ling}, title = {Adversarial Attention Perturbations for Large Object Detection Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3184-3193} }
Meta-Learning Dynamic Center Distance: Hard Sample Mining for Learning with Noisy Labels-
[pdf]
[supp]
[bibtex]@InProceedings{Mu_2025_ICCV, author = {Mu, Chenyu and Qu, Yijun and Yan, Jiexi and Yang, Erkun and Deng, Cheng}, title = {Meta-Learning Dynamic Center Distance: Hard Sample Mining for Learning with Noisy Labels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {415-425} }
Ensemble Foreground Management for Unsupervised Object Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Ziling and Moemeni, Armaghan and Caleb-Solly, Praminda}, title = {Ensemble Foreground Management for Unsupervised Object Discovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20268-20279} }
RESCUE: Crowd Evacuation Simulation via Controlling SDM-United Characters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Xiaolin and Zhou, Tianyi and Kang, Hongbo and Ma, Jian and Wang, Ziwen and Huang, Jing and Weng, Wenguo and Lai, Yu-Kun and Li, Kun}, title = {RESCUE: Crowd Evacuation Simulation via Controlling SDM-United Characters}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24955-24964} }
Joint Diffusion Models in Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Skiers_2025_ICCV, author = {Skier\'s, Pawe{\l} and Deja, Kamil}, title = {Joint Diffusion Models in Continual Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4380-4390} }
MissRAG: Addressing the Missing Modality Challenge in Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Pipoli_2025_ICCV, author = {Pipoli, Vittorio and Saporita, Alessia and Bolelli, Federico and Cornia, Marcella and Baraldi, Lorenzo and Grana, Costantino and Cucchiara, Rita and Ficarra, Elisa}, title = {MissRAG: Addressing the Missing Modality Challenge in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3215-3224} }
Signs as Tokens: A Retrieval-Enhanced Multilingual Sign Language Generator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zuo_2025_ICCV, author = {Zuo, Ronglai and Potamias, Rolandos Alexandros and Ververas, Evangelos and Deng, Jiankang and Zafeiriou, Stefanos}, title = {Signs as Tokens: A Retrieval-Enhanced Multilingual Sign Language Generator}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23806-23816} }
Test-Time Prompt Tuning for Zero-Shot Depth Completion-
[pdf]
[supp]
[bibtex]@InProceedings{Jeong_2025_ICCV, author = {Jeong, Chanhwi and Bae, Inhwan and Park, Jin-Hwi and Jeon, Hae-Gon}, title = {Test-Time Prompt Tuning for Zero-Shot Depth Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9443-9454} }
GUIOdyssey: A Comprehensive Dataset for Cross-App GUI Navigation on Mobile Devices-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Quanfeng and Shao, Wenqi and Liu, Zitao and Du, Lingxiao and Meng, Fanqing and Li, Boxuan and Chen, Botong and Huang, Siyuan and Zhang, Kaipeng and Luo, Ping}, title = {GUIOdyssey: A Comprehensive Dataset for Cross-App GUI Navigation on Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22404-22414} }
TOTP: Transferable Online Pedestrian Trajectory Prediction with Temporal-Adaptive Mamba Latent Diffusion-
[pdf]
[bibtex]@InProceedings{Ren_2025_ICCV, author = {Ren, Ziyang and Wei, Ping and Deng, Shangqi and Tang, Haowen and Li, Jiapeng and Li, Huan}, title = {TOTP: Transferable Online Pedestrian Trajectory Prediction with Temporal-Adaptive Mamba Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26263-26272} }
AstroLoc: Robust Space to Ground Image Localizer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Berton_2025_ICCV, author = {Berton, Gabriele and Stoken, Alex and Masone, Carlo}, title = {AstroLoc: Robust Space to Ground Image Localizer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5811-5820} }
Dual Recursive Feedback on Generation and Appearance Latents for Pose-Robust Text-to-Image Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jiwon and Kim, Pureum and Kim, SeonHwa and Park, Soobin and Cha, Eunju and Jin, Kyong Hwan}, title = {Dual Recursive Feedback on Generation and Appearance Latents for Pose-Robust Text-to-Image Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15491-15500} }
Chimera: Improving Generalist Model with Domain-Specific Experts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_ICCV, author = {Peng, Tianshuo and Li, Mingsheng and Yuan, Jiakang and Zhou, Hongbin and Xia, Renqiu and Zhang, Renrui and Bai, Lei and Mao, Song and Wang, Bin and Zhou, Aojun and Shi, Botian and Chen, Tao and Zhang, Bo and Yue, Xiangyu}, title = {Chimera: Improving Generalist Model with Domain-Specific Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3011-3022} }
EgoM2P: Egocentric Multimodal Multitask Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Gen and Chen, Yutong and Wu, Yiqian and Zhao, Kaifeng and Pollefeys, Marc and Tang, Siyu}, title = {EgoM2P: Egocentric Multimodal Multitask Pretraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10830-10843} }
On Large Multimodal Models as Open-World Image Classifiers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Conti_2025_ICCV, author = {Conti, Alessandro and Mancini, Massimiliano and Fini, Enrico and Wang, Yiming and Rota, Paolo and Ricci, Elisa}, title = {On Large Multimodal Models as Open-World Image Classifiers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16388-16398} }
Adaptive Hyper-Graph Convolution Network for Skeleton-based Human Action Recognition with Virtual Connections-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Youwei and Xu, Tianyang and Wu, Cong and Wu, Xiaojun and Kittler, Josef}, title = {Adaptive Hyper-Graph Convolution Network for Skeleton-based Human Action Recognition with Virtual Connections}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12648-12658} }
Simultaneous Motion And Noise Estimation with Event Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shiba_2025_ICCV, author = {Shiba, Shintaro and Aoki, Yoshimitsu and Gallego, Guillermo}, title = {Simultaneous Motion And Noise Estimation with Event Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6959-6969} }
Supercharging Floorplan Localization with Semantic Rays-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Grader_2025_ICCV, author = {Grader, Yuval and Averbuch-Elor, Hadar}, title = {Supercharging Floorplan Localization with Semantic Rays}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27116-27125} }
MDP3: A Training-free Approach for List-wise Frame Selection in Video-LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Hui and Lu, Shiyin and Wang, Huanyu and Chen, Qing-Guo and Xu, Zhao and Luo, Weihua and Zhang, Kaifu and Li, Ming}, title = {MDP3: A Training-free Approach for List-wise Frame Selection in Video-LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24090-24101} }
General Compression Framework for Efficient Transformer Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2025_ICCV, author = {Hong, Lingyi and Li, Jinglun and Zhou, Xinyu and Yan, Shilin and Guo, Pinxue and Jiang, Kaixun and Chen, Zhaoyu and Gao, Shuyong and Li, Runze and Sheng, Xingdong and Zhang, Wei and Lu, Hong and Zhang, Wenqiang}, title = {General Compression Framework for Efficient Transformer Object Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13427-13437} }
Toward Long-Tailed Online Anomaly Detection through Class-Agnostic Concepts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Chiao-An and Peng, Kuan-Chuan and Yeh, Raymond A.}, title = {Toward Long-Tailed Online Anomaly Detection through Class-Agnostic Concepts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23419-23430} }
CIARD: Cyclic Iterative Adversarial Robustness Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Liming and Pang, Shuchao and Zheng, Xu and Gu, Xiang and Du, Anan and Liu, Yunhuai and Zhou, Yongbin}, title = {CIARD: Cyclic Iterative Adversarial Robustness Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {350-359} }
DiffTell: A High-Quality Dataset for Describing Image Manipulation Changes-
[pdf]
[supp]
[bibtex]@InProceedings{Di_2025_ICCV, author = {Di, Zonglin and Shi, Jing and Fan, Yifei and Tan, Hao and Black, Alexander and Collomosse, John and Liu, Yang}, title = {DiffTell: A High-Quality Dataset for Describing Image Manipulation Changes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24580-24590} }
A Plug-and-Play Physical Motion Restoration Approach for In-the-Wild High-Difficulty Motions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Youliang and Li, Ronghui and Zhang, Yachao and Pan, Liang and Wang, Jingbo and Liu, Yebin and Li, Xiu}, title = {A Plug-and-Play Physical Motion Restoration Approach for In-the-Wild High-Difficulty Motions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13281-13292} }
Local Dense Logit Relations for Enhanced Knowledge Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Liuchi and Liu, Kang and Liu, Jinshuai and Wang, Lu and Xu, Lisheng and Cheng, Jun}, title = {Local Dense Logit Relations for Enhanced Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4539-4549} }
Less is More: Empowering GUI Agent with Context-Aware Simplification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Gongwei and Zhou, Xurui and Shao, Rui and Lyu, Yibo and Zhou, Kaiwen and Wang, Shuai and Li, Wentao and Li, Yinchuan and Qi, Zhongang and Nie, Liqiang}, title = {Less is More: Empowering GUI Agent with Context-Aware Simplification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5901-5911} }
TrafficLoc: Localizing Traffic Surveillance Cameras in 3D Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_ICCV, author = {Xia, Yan and Lu, Yunxiang and Song, Rui and Dhaouadi, Oussema and Henriques, Jo\~ao F. and Cremers, Daniel}, title = {TrafficLoc: Localizing Traffic Surveillance Cameras in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28685-28695} }
Detect Anything 3D in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hanxue and Jiang, Haoran and Yao, Qingsong and Sun, Yanan and Zhang, Renrui and Zhao, Hao and Li, Hongyang and Zhu, Hongzi and Yang, Zetong}, title = {Detect Anything 3D in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5048-5059} }
Failure Cases Are Better Learned But Boundary Says Sorry: Facilitating Smooth Perception Change for Accuracy-Robustness Trade-Off in Adversarial Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yanyun and Liu, Li}, title = {Failure Cases Are Better Learned But Boundary Says Sorry: Facilitating Smooth Perception Change for Accuracy-Robustness Trade-Off in Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4691-4700} }
CA-I2P: Channel-Adaptive Registration Network with Global Optimal Selection-
[pdf]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Zhixin and Deng, Jiacheng and Li, Xinjun and Yin, Xiaotian and Liao, Bohao and Yin, Baoqun and Yang, Wenfei and Zhang, Tianzhu}, title = {CA-I2P: Channel-Adaptive Registration Network with Global Optimal Selection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27739-27749} }
Exploiting Vision Language Model for Training-Free 3D Point Cloud OOD Detection via Graph Score Propagation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Tiankai and Li, Yushu and Goodge, Adam and Teng, Fei and Yang, Xulei and Li, Tianrui and Xu, Xun}, title = {Exploiting Vision Language Model for Training-Free 3D Point Cloud OOD Detection via Graph Score Propagation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28797-28807} }
Monocular Facial Appearance Capture in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Yingyan and Gadola, Kate and Chandran, Prashanth and Weiss, Sebastian and Gross, Markus and Zoss, Gaspard and Bradley, Derek}, title = {Monocular Facial Appearance Capture in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12078-12088} }
Gaussian Variation Field Diffusion for High-fidelity Video-to-4D Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Bowen and Xu, Sicheng and Wang, Chuxin and Yang, Jiaolong and Zhao, Feng and Chen, Dong and Guo, Baining}, title = {Gaussian Variation Field Diffusion for High-fidelity Video-to-4D Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12502-12513} }
Can3Tok: Canonical 3D Tokenization and Latent Modeling of Scene-Level 3D Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Quankai and Georgiev, Iliyan and Wang, Tuanfeng Y. and Singh, Krishna Kumar and Neumann, Ulrich and Yoon, Jae Shin}, title = {Can3Tok: Canonical 3D Tokenization and Latent Modeling of Scene-Level 3D Gaussians}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9320-9331} }
EmotiCrafter: Text-to-Emotional-Image Generation based on Valence-Arousal Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dang_2025_ICCV, author = {Dang, Shengqi and He, Yi and Ling, Long and Qian, Ziqing and Zhao, Nanxuan and Cao, Nan}, title = {EmotiCrafter: Text-to-Emotional-Image Generation based on Valence-Arousal Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15218-15228} }
HFD-Teacher: High-Frequency Depth Distillation from Depth Foundation Models for Enhanced Depth Completion-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Zhiyuan and Cheng, Anqi and Zhu, Haiyue and Li, Tianjiao and Tao, Pey Yuen and Mao, Kezhi}, title = {HFD-Teacher: High-Frequency Depth Distillation from Depth Foundation Models for Enhanced Depth Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8994-9003} }
EAMamba: Efficient All-Around Vision State Space Model for Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Yu-Cheng and Xu, Yu-Syuan and Chen, Hao-Wei and Kuo, Hsien-Kai and Lee, Chun-Yi}, title = {EAMamba: Efficient All-Around Vision State Space Model for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11708-11719} }
AHCPTQ: Accurate and Hardware-Compatible Post-Training Quantization for Segment Anything Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wenlun and Zhong, Yunshan and Ando, Shimpei and Yoshioka, Kentaro}, title = {AHCPTQ: Accurate and Hardware-Compatible Post-Training Quantization for Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22383-22392} }
WonderTurbo: Generating Interactive 3D World in 0.72 Seconds-
[pdf]
[supp]
[bibtex]@InProceedings{Ni_2025_ICCV, author = {Ni, Chaojun and Wang, Xiaofeng and Zhu, Zheng and Wang, Weijie and Li, Haoyun and Zhao, Guosheng and Li, Jie and Qin, Wenkang and Huang, Guan and Mei, Wenjun}, title = {WonderTurbo: Generating Interactive 3D World in 0.72 Seconds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27423-27434} }
Learning Hierarchical Line Buffer for Image Processing-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Jiacheng and Li, Feiran and Iso, Daisuke}, title = {Learning Hierarchical Line Buffer for Image Processing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11132-11141} }
Looking in the Mirror: A Faithful Counterfactual Explanation Method for Interpreting Deep Image Classification Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chowdhury_2025_ICCV, author = {Chowdhury, Townim and Phan, Vu Minh Hieu and Liao, Kewen and Dong, Nanyu and To, Minh-Son and van den Hengel, Anton and Verjans, Johan W. and Liao, Zhibin}, title = {Looking in the Mirror: A Faithful Counterfactual Explanation Method for Interpreting Deep Image Classification Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2239-2249} }
Inference-Time Diffusion Model Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Geon Yeong and Lee, Sang Wan and Ye, Jong Chul}, title = {Inference-Time Diffusion Model Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4049-4058} }
S3E: Self-Supervised State Estimation for Radar-Inertial System-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Shengpeng and Xie, Yulong and Liao, Qing and Wang, Wei}, title = {S3E: Self-Supervised State Estimation for Radar-Inertial System}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26686-26695} }
Rethinking Key-frame-based Micro-expression Recognition: A Robust and Accurate Framework Against Key-frame Errors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zheyuan and Tang, Weihao and Chen, Hong}, title = {Rethinking Key-frame-based Micro-expression Recognition: A Robust and Accurate Framework Against Key-frame Errors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12274-12283} }
Target Bias Is All You Need: Zero-Shot Debiasing of Vision-Language Models with Bias Corpus-
[pdf]
[supp]
[bibtex]@InProceedings{Jang_2025_ICCV, author = {Jang, Taeuk and Jung, Hoin and Wang, Xiaoqian}, title = {Target Bias Is All You Need: Zero-Shot Debiasing of Vision-Language Models with Bias Corpus}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1935-1946} }
Mastering Collaborative Multi-modal Data Selection: A Focus on Informativeness, Uniqueness, and Representativeness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Qifan and Shen, Zhebei and Yue, Zhongqi and Wu, Yang and Qin, Bosheng and Zhang, Wenqiao and Li, Yunfei and Li, Juncheng and Tang, Siliang and Zhuang, Yueting}, title = {Mastering Collaborative Multi-modal Data Selection: A Focus on Informativeness, Uniqueness, and Representativeness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {155-165} }
WIR3D: Visually-Informed and Geometry-Aware 3D Shape Abstraction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Richard and Fu, Daniel and Tan, Noah and Lang, Itai and Hanocka, Rana}, title = {WIR3D: Visually-Informed and Geometry-Aware 3D Shape Abstraction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14810-14821} }
XTrack: Multimodal Training Boosts RGB-X Video Object Trackers-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2025_ICCV, author = {Tan, Yuedong and Wu, Zongwei and Fu, Yuqian and Zhou, Zhuyun and Sun, Guolei and Zamfir, Eduard and Ma, Chao and Paudel, Danda and Van Gool, Luc and Timofte, Radu}, title = {XTrack: Multimodal Training Boosts RGB-X Video Object Trackers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5734-5744} }
FaceCraft4D: Animated 3D Facial Avatar Generation from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_ICCV, author = {Yin, Fei and R, Mallikarjun B and Yao, Chun-Han and Mantiuk, Rafal K. and Jampani, Varun}, title = {FaceCraft4D: Animated 3D Facial Avatar Generation from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11612-11621} }
GenFlow3D: Generative Scene Flow Estimation and Prediction on Point Cloud Sequences-
[pdf]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Hanlin and Weng, Wenming and Zhang, Yueyi and Xiong, Zhiwei}, title = {GenFlow3D: Generative Scene Flow Estimation and Prediction on Point Cloud Sequences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27488-27497} }
Attention to Neural Plagiarism: Diffusion Models Can Plagiarize Your Copyrighted Images!-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2025_ICCV, author = {Zou, Zihang and Gong, Boqing and Wang, Liqiang}, title = {Attention to Neural Plagiarism: Diffusion Models Can Plagiarize Your Copyrighted Images!}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19546-19556} }
PBCAT: Patch-Based Composite Adversarial Training against Physically Realizable Attacks on Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Xiao and Zhu, Yiming and Huang, Yifan and Zhang, Wei and He, Yingzhe and Shi, Jie and Hu, Xiaolin}, title = {PBCAT: Patch-Based Composite Adversarial Training against Physically Realizable Attacks on Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24456-24466} }
TRCE: Towards Reliable Malicious Concept Erasure in Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Ruidong and Guo, Honglin and Wang, Lanjun and Zhang, Chenyu and Nie, Weizhi and Liu, An-An}, title = {TRCE: Towards Reliable Malicious Concept Erasure in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18927-18936} }
PossLoss: A Reliable and Sensitive Facial Landmark Detection Loss Function-
[pdf]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Qikui}, title = {PossLoss: A Reliable and Sensitive Facial Landmark Detection Loss Function}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24858-24867} }
ChartPoint: Guiding MLLMs with Grounding Reflection for Chart Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Zhengzhuo and Du, SiNan and Qi, Yiyan and Lu, Siwen and Xu, Chengjin and Yuan, Chun and Guo, Jian}, title = {ChartPoint: Guiding MLLMs with Grounding Reflection for Chart Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {426-436} }
StealthAttack: Robust 3D Gaussian Splatting Poisoning via Density-Guided Illusions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2025_ICCV, author = {Ke, Bo-Hsu and Xie, You-Zhe and Liu, Yu-Lun and Chiu, Wei-Chen}, title = {StealthAttack: Robust 3D Gaussian Splatting Poisoning via Density-Guided Illusions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27400-27411} }
LightCity: An Urban Dataset for Outdoor Inverse Rendering and Reconstruction under Multi-illumination Conditions-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Jingjing and Hu, Qirui and Bao, Chong and Zhu, Yuke and Bao, Hujun and Cui, Zhaopeng and Zhang, Guofeng}, title = {LightCity: An Urban Dataset for Outdoor Inverse Rendering and Reconstruction under Multi-illumination Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26477-26487} }
On the Generalization of Representation Uncertainty in Earth Observation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kondylatos_2025_ICCV, author = {Kondylatos, Spyros and Bountos, Nikolaos Ioannis and Michail, Dimitrios and Zhu, Xiao Xiang and Camps-Valls, Gustau and Papoutsis, Ioannis}, title = {On the Generalization of Representation Uncertainty in Earth Observation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6552-6562} }
Region-aware Anchoring Mechanism for Efficient Referring Visual Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Ouyang_2025_ICCV, author = {Ouyang, Shuyi and Niu, Ziwei and Wang, Hongyi and Chen, Yen-Wei and Lin, Lanfen}, title = {Region-aware Anchoring Mechanism for Efficient Referring Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24192-24202} }
Mind the Cost of Scaffold! Benign Clients May Even Become Accomplices of Backdoor Attack-
[pdf]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Xingshuo and Zhang, Xuanye and Lan, Xiang and Wang, Haozhao and Xu, Shengmin and Ren, Shen and Zeng, Jason and Wu, Ming and Heinrich, Michael and Zhang, Tianwei}, title = {Mind the Cost of Scaffold! Benign Clients May Even Become Accomplices of Backdoor Attack}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1580-1589} }
MV-Adapter: Multi-View Consistent Image Generation Made Easy-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Zehuan and Guo, Yuan-Chen and Wang, Haoran and Yi, Ran and Ma, Lizhuang and Cao, Yan-Pei and Sheng, Lu}, title = {MV-Adapter: Multi-View Consistent Image Generation Made Easy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16377-16387} }
Hybrid-Tower: Fine-grained Pseudo-query Interaction and Generation for Text-to-Video Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Lan_2025_ICCV, author = {Lan, Bangxiang and Xie, Ruobing and Zhao, Ruixiang and Sun, Xingwu and Kang, Zhanhui and Yang, Gang and Li, Xirong}, title = {Hybrid-Tower: Fine-grained Pseudo-query Interaction and Generation for Text-to-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24497-24506} }
ATLAS: Decoupling Skeletal and Shape Parameters for Expressive Parametric Human Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Jinhyung and Romero, Javier and Saito, Shunsuke and Prada, Fabian and Shiratori, Takaaki and Xu, Yichen and Bogo, Federica and Yu, Shoou-I and Kitani, Kris and Khirodkar, Rawal}, title = {ATLAS: Decoupling Skeletal and Shape Parameters for Expressive Parametric Human Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6508-6518} }
Enhancing Reward Models for High-quality Image Generation: Beyond Text-Image Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ba_2025_ICCV, author = {Ba, Ying and Zhang, Tianyu and Bai, Yalong and Mo, Wenyi and Liang, Tao and Su, Bing and Wen, Ji-Rong}, title = {Enhancing Reward Models for High-quality Image Generation: Beyond Text-Image Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19022-19031} }
PHATNet: A Physics-guided Haze Transfer Network for Domain-adaptive Real-world Image Dehazing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tsai_2025_ICCV, author = {Tsai, Fu-Jen and Peng, Yan-Tsung and Lin, Yen-Yu and Lin, Chia-Wen}, title = {PHATNet: A Physics-guided Haze Transfer Network for Domain-adaptive Real-world Image Dehazing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5591-5600} }
Diagnosing Pretrained Models for Out-of-distribution Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Xiong_2025_ICCV, author = {Xiong, Haipeng and Xu, Kai and Yao, Angela}, title = {Diagnosing Pretrained Models for Out-of-distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1836-1845} }
LD-RPS: Zero-Shot Unified Image Restoration via Latent Diffusion Recurrent Posterior Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Huaqiu and Wang, Yong and Huang, Tongwen and Huang, Hailang and Wang, Haoqian and Chu, Xiangxiang}, title = {LD-RPS: Zero-Shot Unified Image Restoration via Latent Diffusion Recurrent Posterior Sampling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13684-13694} }
Outlier-Aware Post-Training Quantization for Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Hailing and Lu, Jianglin and Zhang, Yitian and Fu, Yun}, title = {Outlier-Aware Post-Training Quantization for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16175-16184} }
Equipping Vision Foundation Model with Mixture of Experts for Out-of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Shizhen and Liu, Jiahui and Wen, Xin and Tan, Haoru and Qi, Xiaojuan}, title = {Equipping Vision Foundation Model with Mixture of Experts for Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1751-1761} }
The Curse of Conditions: Analyzing and Improving Optimal Transport for Conditional Flow-Based Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Ho Kei and Schwing, Alexander}, title = {The Curse of Conditions: Analyzing and Improving Optimal Transport for Conditional Flow-Based Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15875-15884} }
BlinkTrack: Feature Tracking over 80 FPS via Events and Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2025_ICCV, author = {Shen, Yichen and Li, Yijin and Chen, Shuo and Li, Guanglin and Huang, Zhaoyang and Bao, Hujun and Cui, Zhaopeng and Zhang, Guofeng}, title = {BlinkTrack: Feature Tracking over 80 FPS via Events and Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9298-9308} }
Less is More: Improving Motion Diffusion Models with Sparse Keyframes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bae_2025_ICCV, author = {Bae, Jinseok and Hwang, Inwoo and Lee, Young-Yoon and Guo, Ziyu and Liu, Joseph and Ben-Shabat, Yizhak and Kim, Young Min and Kapadia, Mubbasir}, title = {Less is More: Improving Motion Diffusion Models with Sparse Keyframes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11069-11078} }
CAVIS: Context-Aware Video Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Seunghun and Seo, Jiwan and Han, Kiljoon and Choi, Minwoo and Im, Sunghoon}, title = {CAVIS: Context-Aware Video Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4507-4517} }
LeanVAE: An Ultra-Efficient Reconstruction VAE for Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Yu and Yuan, Fajie}, title = {LeanVAE: An Ultra-Efficient Reconstruction VAE for Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15692-15702} }
Monocular Semantic Scene Completion via Masked Recurrent Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Xuzhi and Wu, Xinran and Wang, Song and Kong, Lingdong and Zhao, Ziping}, title = {Monocular Semantic Scene Completion via Masked Recurrent Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24811-24822} }
TurboReg: TurboClique for Robust and Efficient Point Cloud Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_ICCV, author = {Yan, Shaocheng and Shi, Pengcheng and Zhao, Zhenjun and Wang, Kaixin and Cao, Kuang and Wu, Ji and Li, Jiayuan}, title = {TurboReg: TurboClique for Robust and Efficient Point Cloud Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26371-26381} }
AR-VRM: Imitating Human Motions for Visual Robot Manipulation with Analogical Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Dejie and Zhao, Zijing and Liu, Yang}, title = {AR-VRM: Imitating Human Motions for Visual Robot Manipulation with Analogical Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6818-6827} }
Self-Calibrated Variance-Stabilizing Transformations for Real-World Image Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Herbreteau_2025_ICCV, author = {Herbreteau, S\'ebastien and Unser, Michael}, title = {Self-Calibrated Variance-Stabilizing Transformations for Real-World Image Denoising}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10496-10506} }
HccePose(BF): Predicting Front & Back Surfaces to Construct Ultra-Dense 2D-3D Correspondences for Pose Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yulin and Hu, Mengting and Li, Hongli and Luo, Chen}, title = {HccePose(BF): Predicting Front \& Back Surfaces to Construct Ultra-Dense 2D-3D Correspondences for Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7166-7175} }
Find Any Part in 3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Ziqi and Yue, Yisong and Gkioxari, Georgia}, title = {Find Any Part in 3D}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7818-7827} }
CompleteMe: Reference-based Human Image Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tsai_2025_ICCV, author = {Tsai, Yu-Ju and Price, Brian and Liu, Qing and Figueroa, Luis and Pakhomov, Daniil and Ding, Zhihong and Cohen, Scott and Yang, Ming-Hsuan}, title = {CompleteMe: Reference-based Human Image Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18252-18261} }
MMOne: Representing Multiple Modalities in One Scene-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2025_ICCV, author = {Gu, Zhifeng and Wang, Bing}, title = {MMOne: Representing Multiple Modalities in One Scene}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1088-1098} }
VA-MoE: Variables-Adaptive Mixture of Experts for Incremental Weather Forecasting-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Hao and Tao, Han and Song, Guo and Zhang, Jie and Dong, Yonghan and Yu, Yunlong and Bai, Lei}, title = {VA-MoE: Variables-Adaptive Mixture of Experts for Incremental Weather Forecasting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7915-7924} }
2HandedAfforder: Learning Precise Actionable Bimanual Affordances from Human Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Heidinger_2025_ICCV, author = {Heidinger, Marvin and Jauhri, Snehal and Prasad, Vignesh and Chalvatzaki, Georgia}, title = {2HandedAfforder: Learning Precise Actionable Bimanual Affordances from Human Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14743-14753} }
GenDoP: Auto-regressive Camera Trajectory Generation as a Director of Photography-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Mengchen and Wu, Tong and Tan, Jing and Liu, Ziwei and Wetzstein, Gordon and Lin, Dahua}, title = {GenDoP: Auto-regressive Camera Trajectory Generation as a Director of Photography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18229-18239} }
Latent Swap Joint Diffusion for 2D Long-Form Latent Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2025_ICCV, author = {Dai, Yusheng and Wang, Chenxi and Li, Chang and Wang, Chen and Li, Kewei and Du, Jun and Sun, Lei and Gao, Jianqing and Wang, Ruoyu and Ma, Jiefeng}, title = {Latent Swap Joint Diffusion for 2D Long-Form Latent Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11006-11015} }
Neural Inverse Rendering for High-Accuracy 3D Measurement of Moving Objects with Fewer Phase-Shifting Patterns-
[pdf]
[supp]
[bibtex]@InProceedings{Urakawa_2025_ICCV, author = {Urakawa, Yuki and Watanabe, Yoshihiro}, title = {Neural Inverse Rendering for High-Accuracy 3D Measurement of Moving Objects with Fewer Phase-Shifting Patterns}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27692-27701} }
Embodied Navigation with Auxiliary Task of Action Description Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Kondoh_2025_ICCV, author = {Kondoh, Haru and Kanezaki, Asako}, title = {Embodied Navigation with Auxiliary Task of Action Description Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7025-7036} }
Taming the Untamed: Graph-Based Knowledge Retrieval and Reasoning for MLLMs to Conquer the Unknown-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Bowen and Jiang, Zhouqiang and Susumu, Yasuaki and Miwa, Shotaro and Chen, Tianwei and Nakashima, Yuta}, title = {Taming the Untamed: Graph-Based Knowledge Retrieval and Reasoning for MLLMs to Conquer the Unknown}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4732-4742} }
From Image to Video: An Empirical Study of Diffusion Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Velez_2025_ICCV, author = {V\'elez, Pedro and Polan{\'\i}a, Luisa F. and Yang, Yi and Zhang, Chuhan and Kabra, Rishabh and Arnab, Anurag and Sajjadi, Mehdi S. M.}, title = {From Image to Video: An Empirical Study of Diffusion Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16948-16958} }
Diffusion Curriculum: Synthetic-to-Real Data Curriculum via Image-Guided Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Yijun and Bhardwaj, Shweta and Zhou, Tianyi}, title = {Diffusion Curriculum: Synthetic-to-Real Data Curriculum via Image-Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1697-1707} }
Dynamic-VLM: Simple Dynamic Visual Token Compression for VideoLLM-
[pdf]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Han and Nie, Yuxiang and Ye, Yongjie and Wang, Yanjie and Li, Shuai and Yu, Haiyang and Lu, Jinghui and Huang, Can}, title = {Dynamic-VLM: Simple Dynamic Visual Token Compression for VideoLLM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20812-20823} }
CCL-LGS: Contrastive Codebook Learning for 3D Language Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2025_ICCV, author = {Tian, Lei and Li, Xiaomin and Ma, Liqian and Yin, Hao and Zheng, Zirui and Huang, Hefei and Li, Taiqing and Lu, Huchuan and Jia, Xu}, title = {CCL-LGS: Contrastive Codebook Learning for 3D Language Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9855-9864} }
TRACE: Learning 3D Gaussian Physical Dynamics from Multi-view Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Jinxi and Song, Ziyang and Yang, Bo}, title = {TRACE: Learning 3D Gaussian Physical Dynamics from Multi-view Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8820-8829} }
Two Losses, One Goal: Balancing Conflict Gradients for Semi-supervised Semantic Segmentation-
[pdf]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Rui and Mai, Huayu and Li, Wangkai and Chen, Yujia and Wang, Yuan}, title = {Two Losses, One Goal: Balancing Conflict Gradients for Semi-supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20357-20367} }
FreeCus: Free Lunch Subject-driven Customization in Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yanbing and Wang, Zhe and Zhou, Qin and Yang, Mengping}, title = {FreeCus: Free Lunch Subject-driven Customization in Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15521-15531} }
Trokens: Semantic-Aware Relational Trajectory Tokens for Few-Shot Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kumar_2025_ICCV, author = {Kumar, Pulkit and Huang, Shuaiyi and Walmer, Matthew and Rambhatla, Sai Saketh and Shrivastava, Abhinav}, title = {Trokens: Semantic-Aware Relational Trajectory Tokens for Few-Shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13544-13556} }
MotionStreamer: Streaming Motion Generation via Diffusion-based Autoregressive Model in Causal Latent Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_ICCV, author = {Xiao, Lixing and Lu, Shunlin and Pi, Huaijin and Fan, Ke and Pan, Liang and Zhou, Yueer and Feng, Ziyong and Zhou, Xiaowei and Peng, Sida and Wang, Jingbo}, title = {MotionStreamer: Streaming Motion Generation via Diffusion-based Autoregressive Model in Causal Latent Space}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10086-10096} }
RS-vHeat: Heat Conduction Guided Efficient Remote Sensing Foundation Model-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Huiyang and Wang, Peijin and Bi, Hanbo and Tong, Boyuan and Wang, Zhaozhi and Diao, Wenhui and Chang, Hao and Feng, Yingchao and Zhang, Ziqi and Wang, Yaowei and Ye, Qixiang and Fu, Kun and Sun, Xian}, title = {RS-vHeat: Heat Conduction Guided Efficient Remote Sensing Foundation Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9876-9887} }
EA-Vit: Efficient Adaptation for Elastic Vision Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Chen and Zhao, Wangbo and Zhang, Huiwen and Zhou, Yuhao and Tang, Weidong and Wang, Shuo and Yuan, Zhihang and Shang, Yuzhang and Peng, Xiaojiang and Wang, Kai and Yang, Dawei}, title = {EA-Vit: Efficient Adaptation for Elastic Vision Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1038-1047} }
Aether: Geometric-Aware Unified World Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Haoyi and Wang, Yifan and Zhou, Jianjun and Chang, Wenzheng and Zhou, Yang and Li, Zizun and Chen, Junyi and Shen, Chunhua and Pang, Jiangmiao and He, Tong}, title = {Aether: Geometric-Aware Unified World Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8535-8546} }
ArtEditor: Learning Customized Instructional Image Editor from Few-Shot Examples-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Shijie and Song, Yiren and Zhang, Yuxuan and Guo, Hailong and Wang, Xueyin and Liu, Jiaming}, title = {ArtEditor: Learning Customized Instructional Image Editor from Few-Shot Examples}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17651-17662} }
LiON-LoRA: Rethinking LoRA Fusion to Unify Controllable Spatial and Temporal Generation for Video Diffusion-
[pdf]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yisu and Cao, Chenjie and Yu, Chaohui and Zhu, Jianke}, title = {LiON-LoRA: Rethinking LoRA Fusion to Unify Controllable Spatial and Temporal Generation for Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14569-14579} }
VLIPP: Towards Physically Plausible Video Generation with Vision and Language Informed Physical Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Xindi and Li, Baolu and Zhang, Yiming and Yin, Zhenfei and Bai, Lei and Ma, Liqian and Wang, Zhiyong and Cai, Jianfei and Wong, Tien-Tsin and Lu, Huchuan and Jia, Xu}, title = {VLIPP: Towards Physically Plausible Video Generation with Vision and Language Informed Physical Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12360-12370} }
SAM4D: Segment Anything in Camera and LiDAR Streams-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Jianyun and Wang, Song and Ni, Ziqian and Hu, Chunyong and Yang, Sheng and Zhu, Jianke and Li, Qiang}, title = {SAM4D: Segment Anything in Camera and LiDAR Streams}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28535-28545} }
DreamRelation: Relation-Centric Video Customization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Yujie and Zhang, Shiwei and Yuan, Hangjie and Gong, Biao and Tang, Longxiang and Wang, Xiang and Qiu, Haonan and Li, Hengjia and Tan, Shuai and Zhang, Yingya and Shan, Hongming}, title = {DreamRelation: Relation-Centric Video Customization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12381-12393} }
DropletVideo: A Dataset and Approach to Explore Integral Spatio-Temporal Consistent Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Runze and Du, Guoguang and Li, Xiaochuan and Jia, Qi and Jin, Liang and Liu, Lu and Wang, Jingjing and Xu, Cong and Guo, Zhenhua and Zhao, Yaqian and Gong, Xiaoli and Li, Rengang and Fan, Baoyu}, title = {DropletVideo: A Dataset and Approach to Explore Integral Spatio-Temporal Consistent Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15583-15593} }
TAB: Transformer Attention Bottlenecks enable User Intervention and Debugging in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rahmanzadehgervi_2025_ICCV, author = {Rahmanzadehgervi, Pooyan and Nguyen, Hung Huy and Liu, Rosanne and Mai, Long and Nguyen, Anh Totti}, title = {TAB: Transformer Attention Bottlenecks enable User Intervention and Debugging in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22551-22562} }
Towards Stabilized and Efficient Diffusion Transformers through Long-Skip-Connections with Spectral Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Guanjie and Zhao, Xinyu and Zhou, Yucheng and Qu, Xiaoye and Chen, Tianlong and Cheng, Yu}, title = {Towards Stabilized and Efficient Diffusion Transformers through Long-Skip-Connections with Spectral Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17708-17718} }
Feature Extraction and Representation of Pre-training Point Cloud Based on Diffusion Models-
[pdf]
[bibtex]@InProceedings{Qiu_2025_ICCV, author = {Qiu, Chang and Da, Feipeng and Zhang, Zilei}, title = {Feature Extraction and Representation of Pre-training Point Cloud Based on Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26559-26568} }
FLOAT: Generative Motion Latent Flow Matching for Audio-driven Talking Portrait-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ki_2025_ICCV, author = {Ki, Taekyung and Min, Dongchan and Chae, Gyeongsu}, title = {FLOAT: Generative Motion Latent Flow Matching for Audio-driven Talking Portrait}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14699-14710} }
Membership Inference Attacks with False Discovery Rate Control-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Chenxu and Qian, Wei and Chen, Aobo and Huai, Mengdi}, title = {Membership Inference Attacks with False Discovery Rate Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1216-1227} }
EmbodiedSplat: Personalized Real-to-Sim-to-Real Navigation with Gaussian Splats from a Mobile Device-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chhablani_2025_ICCV, author = {Chhablani, Gunjan and Ye, Xiaomeng and Irshad, Muhammad Zubair and Kira, Zsolt}, title = {EmbodiedSplat: Personalized Real-to-Sim-to-Real Navigation with Gaussian Splats from a Mobile Device}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25431-25441} }
Sequential keypoint density estimator: an overlooked baseline of skeleton-based video anomaly detection-
[pdf]
[supp]
[bibtex]@InProceedings{Delic_2025_ICCV, author = {Deli\'c, Anja and Grcic, Matej and \v{S}egvi\'c, Sini\v{s}a}, title = {Sequential keypoint density estimator: an overlooked baseline of skeleton-based video anomaly detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11579-11589} }
SynCity: Training-Free Generation of 3D Worlds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Engstler_2025_ICCV, author = {Engstler, Paul and Shtedritski, Aleksandar and Laina, Iro and Rupprecht, Christian and Vedaldi, Andrea}, title = {SynCity: Training-Free Generation of 3D Worlds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27585-27595} }
Resonance: Learning to Predict Social-Aware Pedestrian Trajectories as Co-Vibrations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wong_2025_ICCV, author = {Wong, Conghao and Zou, Ziqian and Xia, Beihao}, title = {Resonance: Learning to Predict Social-Aware Pedestrian Trajectories as Co-Vibrations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25788-25799} }
Adversarial Training for Probabilistic Robustness-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yi and Chen, Yuhang and Chen, Zhen and Ruan, Wenjie and Huang, Xiaowei and Khastgir, Siddartha and Zhao, Xingyu}, title = {Adversarial Training for Probabilistic Robustness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1675-1685} }
MUNBa: Machine Unlearning via Nash Bargaining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Jing and Harandi, Mehrtash}, title = {MUNBa: Machine Unlearning via Nash Bargaining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4754-4765} }
DLF: Extreme Image Compression with Dual-generative Latent Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2025_ICCV, author = {Xue, Naifu and Jia, Zhaoyang and Li, Jiahao and Li, Bin and Zhang, Yuan and Lu, Yan}, title = {DLF: Extreme Image Compression with Dual-generative Latent Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19227-19236} }
PanoLlama: Generating Endless and Coherent Panoramas with Next-Token-Prediction LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Teng and Zhang, Xiaoyu and Tang, Yongchuan}, title = {PanoLlama: Generating Endless and Coherent Panoramas with Next-Token-Prediction LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15340-15349} }
Leaps and Bounds: An Improved Point Cloud Winding Number Formulation for Fast Normal Estimation and Surface Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Koneputugodage_2025_ICCV, author = {Koneputugodage, Chamin Hewa and Campbell, Dylan and Gould, Stephen}, title = {Leaps and Bounds: An Improved Point Cloud Winding Number Formulation for Fast Normal Estimation and Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26116-26125} }
VQ-SGen: A Vector Quantized Stroke Representation for Creative Sketch Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Jiawei and Cui, Zhiming and Li, Changjian}, title = {VQ-SGen: A Vector Quantized Stroke Representation for Creative Sketch Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19311-19320} }
Cross-Category Subjectivity Generalization for Style-Adaptive Sketch Re-ID-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Zechao and Yang, Zhengwei and Li, Hao and Wang, Zheng and Zou, Yixiong}, title = {Cross-Category Subjectivity Generalization for Style-Adaptive Sketch Re-ID}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22644-22653} }
VolumetricSMPL: A Neural Volumetric Body Model for Efficient Interactions, Contacts, and Collisions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mihajlovic_2025_ICCV, author = {Mihajlovic, Marko and Zhang, Siwei and Li, Gen and Zhao, Kaifeng and Muller, Lea and Tang, Siyu}, title = {VolumetricSMPL: A Neural Volumetric Body Model for Efficient Interactions, Contacts, and Collisions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5060-5070} }
Spatial-Temporal Forgery Trace based Forgery Image Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yilin and Feng, Zunlei and Wang, Jiachi and Lou, Hengrui and Zhou, Binjia and Lei, Jie and Song, Mingli and Bei, Yijun}, title = {Spatial-Temporal Forgery Trace based Forgery Image Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17067-17076} }
Passing the Driving Knowledge Test-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Maolin and Liu, Wanzhou and Ohn-Bar, Eshed}, title = {Passing the Driving Knowledge Test}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8395-8406} }
OneGT: One-Shot Geometry-Texture Neural Rendering for Head Avatars-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Jinshu and Li, Bingchuan and Zhang, Fan and Zhao, Songtao and He, Qian}, title = {OneGT: One-Shot Geometry-Texture Neural Rendering for Head Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11294-11304} }
Joint Asymmetric Loss for Learning with Noisy Labels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Jialiang and Liu, Xianming and Zhou, Xiong and Hu, Gangfeng and Zhai, Deming and Jiang, Junjun and Ji, Xiangyang}, title = {Joint Asymmetric Loss for Learning with Noisy Labels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1947-1956} }
A Unified Framework for Industrial Cel-Animation Colorization with Temporal-Structural Awareness-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2025_ICCV, author = {Feng, Xiaoyi and Huang, Tao and Wang, Peng and Huang, Zizhou and Haihang, Zhang and Zou, Yuntao and Li, Dagang and Zou, Kaifeng}, title = {A Unified Framework for Industrial Cel-Animation Colorization with Temporal-Structural Awareness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19301-19310} }
Uncalibrated Structure from Motion on a Sphere-
[pdf]
[supp]
[bibtex]@InProceedings{Ventura_2025_ICCV, author = {Ventura, Jonathan and Larsson, Viktor and Kahl, Fredrik}, title = {Uncalibrated Structure from Motion on a Sphere}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {69-78} }
CARL: Causality-guided Architecture Representation Learning for an Interpretable Performance Predictor-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2025_ICCV, author = {Ji, Han and Feng, Yuqi and Fan, Jiahao and Sun, Yanan}, title = {CARL: Causality-guided Architecture Representation Learning for an Interpretable Performance Predictor}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23019-23029} }
An OpenMind for 3D Medical Vision Self-supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wald_2025_ICCV, author = {Wald, Tassilo and Ulrich, Constantin and Suprijadi, Jonathan and Ziegler, Sebastian and Nohel, Michal and Peretzke, Robin and Kohler, Gregor and Maier-Hein, Klaus}, title = {An OpenMind for 3D Medical Vision Self-supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23839-23879} }
Online Dense Point Tracking with Streaming Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Qiaole and Fu, Yanwei}, title = {Online Dense Point Tracking with Streaming Memory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8710-8720} }
MCID: Multi-aspect Copyright Infringement Detection for Generated Images-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Chuanwei and Jia, Zexi and Fei, Hongyan and Zhu, Yeshuang and Yuan, Zhiqiang and Deng, Ying and Zhang, Jiapei and Duan, Xiaoyue and Zhang, Jinchao and Zhou, Jie}, title = {MCID: Multi-aspect Copyright Infringement Detection for Generated Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16154-16164} }
Any2AnyTryon: Leveraging Adaptive Position Embeddings for Versatile Virtual Clothing Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Hailong and Zeng, Bohan and Song, Yiren and Zhang, Wentao and Liu, Jiaming and Zhang, Chuang}, title = {Any2AnyTryon: Leveraging Adaptive Position Embeddings for Versatile Virtual Clothing Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19085-19096} }
STEP-DETR: Advancing DETR-based Semi-Supervised Object Detection with Super Teacher and Pseudo-Label Guided Text Queries-
[pdf]
[supp]
[bibtex]@InProceedings{Shehzadi_2025_ICCV, author = {Shehzadi, Tahira and Hashmi, Khurram Azeem and Sarode, Shalini and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {STEP-DETR: Advancing DETR-based Semi-Supervised Object Detection with Super Teacher and Pseudo-Label Guided Text Queries}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3069-3079} }
DISTA-Net: Dynamic Closely-Spaced Infrared Small Target Unmixing-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Shengdong and Yang, Shangdong and Li, Yuxuan and Zhang, Xin and Li, Xiang and Yang, Jian and Cheng, Ming-Ming and Dai, Yimian}, title = {DISTA-Net: Dynamic Closely-Spaced Infrared Small Target Unmixing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14655-14664} }
MDD: A Dataset for Text-and-Music Conditioned Duet Dance Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gupta_2025_ICCV, author = {Gupta, Prerit and Fotso-Puepi, Jason Alexander and Li, Zhengyuan and Mehta, Jay and Bera, Aniket}, title = {MDD: A Dataset for Text-and-Music Conditioned Duet Dance Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13932-13941} }
Met2Net: A Decoupled Two-Stage Spatio-Temporal Forecasting Model for Complex Meteorological Systems-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Shaohan and Yang, Hao and Chen, Min and Qin, Xiaolin}, title = {Met2Net: A Decoupled Two-Stage Spatio-Temporal Forecasting Model for Complex Meteorological Systems}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5458-5468} }
SVG-Head: Hybrid Surface-Volumetric Gaussians for High-Fidelity Head Reconstruction and Real-Time Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Heyi and Wang, Cong and Xu, Tian-Xing and Huang, Jingwei and Kang, Di and Guo, Chunchao and Zhang, Song-Hai}, title = {SVG-Head: Hybrid Surface-Volumetric Gaussians for High-Fidelity Head Reconstruction and Real-Time Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13326-13335} }
Dynamic Dictionary Learning for Remote Sensing Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zou_2025_ICCV, author = {Zou, Xuechao and Li, Yue and Zhang, Shun and Li, Kai and Wang, Shiying and Tao, Pin and Xing, Junliang and Lang, Congyan}, title = {Dynamic Dictionary Learning for Remote Sensing Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22457-22466} }
Video Motion Graphs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Haiyang and Xu, Zhan and Hong, Fa-Ting and Huang, Hsin-Ping and Zhou, Yi and Zhou, Yang}, title = {Video Motion Graphs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13730-13740} }
Ask and Remember: A Questions-Only Replay Strategy for Continual Visual Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Marouf_2025_ICCV, author = {Marouf, Imad Eddine and Tartaglione, Enzo and Lathuili\`ere, St\'ephane and Van De Weijer, Joost}, title = {Ask and Remember: A Questions-Only Replay Strategy for Continual Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18078-18089} }
Dense Policy: Bidirectional Autoregressive Learning of Actions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2025_ICCV, author = {Su, Yue and Zhan, Xinyu and Fang, Hongjie and Xue, Han and Fang, Hao-Shu and Li, Yong-Lu and Lu, Cewu and Yang, Lixin}, title = {Dense Policy: Bidirectional Autoregressive Learning of Actions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14486-14495} }
Conditional Visual Autoregressive Modeling for Pathological Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Ziyi and Xu, Zhe and Ma, Jiabo and Li, Wenqiang and Wang, Ruixuan and Du, Bo and Chen, Hao}, title = {Conditional Visual Autoregressive Modeling for Pathological Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17828-17837} }
BlueNeg: A 35mm Negative Film Dataset for Restoring Channel-Heterogeneous Deterioration-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Hanyuan and Li, Chengze and Xie, Minshan and Wang, Zhenni and Liang, Jiawen and Leung, Chi-Sing and Wong, Tien-Tsin}, title = {BlueNeg: A 35mm Negative Film Dataset for Restoring Channel-Heterogeneous Deterioration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13119-13128} }
egoPPG: Heart Rate Estimation from Eye-Tracking Cameras in Egocentric Systems to Benefit Downstream Vision Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Braun_2025_ICCV, author = {Braun, Bj\"orn and Armani, Rayan and Meier, Manuel and Moebus, Max and Holz, Christian}, title = {egoPPG: Heart Rate Estimation from Eye-Tracking Cameras in Egocentric Systems to Benefit Downstream Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5579-5590} }
Beyond Pixel Uncertainty: Bounding the OoD Objects in Road Scenes-
[pdf]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Huachao and Liu, Zelong and Sun, Zhichao and Zou, Yuda and Xia, Gui-Song and Xu, Yongchao}, title = {Beyond Pixel Uncertainty: Bounding the OoD Objects in Road Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8472-8481} }
Gain-MLP: Improving HDR Gain Map Encoding via a Lightweight MLP-
[pdf]
[supp]
[bibtex]@InProceedings{Canham_2025_ICCV, author = {Canham, Trevor D. and Tedla, SaiKiran and Murdoch, Michael J. and Brown, Michael S.}, title = {Gain-MLP: Improving HDR Gain Map Encoding via a Lightweight MLP}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18619-18628} }
MaterialMVP: Illumination-Invariant Material Generation via Multi-view PBR Diffusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Zebin and Yang, Mingxin and Yang, Shuhui and Tang, Yixuan and Wang, Tao and Zhang, Kaihao and Chen, Guanying and Liu, Yuhong and Jiang, Jie and Guo, Chunchao and Luo, Wenhan}, title = {MaterialMVP: Illumination-Invariant Material Generation via Multi-view PBR Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26294-26305} }
Learning to See in the Extremely Dark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Hai and Guan, Binhao and Liu, Zhen and Liu, Xiaohong and Yu, Jian and Liu, Zheng and Han, Songchen and Liu, Shuaicheng}, title = {Learning to See in the Extremely Dark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7676-7685} }
StyleMotif: Multi-Modal Motion Stylization using Style-Content Cross Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Ziyu and Lee, Young Yoon and Liu, Joseph and Ben-Shabat, Yizhak and Zordan, Victor and Kapadia, Mubbasir}, title = {StyleMotif: Multi-Modal Motion Stylization using Style-Content Cross Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13349-13359} }
SpecGuard: Spectral Projection-based Advanced Invisible Watermarking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alam_2025_ICCV, author = {Alam, Inzamamul and Islam, Md Tanvir and Woo, Simon S. and Muhammad, Khan}, title = {SpecGuard: Spectral Projection-based Advanced Invisible Watermarking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17984-17993} }
Scaling Laws for Native Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shukor_2025_ICCV, author = {Shukor, Mustafa and Fini, Enrico and da Costa, Victor Guilherme Turrisi and Cord, Matthieu and Susskind, Joshua and El-Nouby, Alaaeldin}, title = {Scaling Laws for Native Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12-23} }
Bridging Diffusion Models and 3D Representations: A 3D Consistent Super-Resolution Framework-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yi-Ting and Liao, Ting-Hsuan and Guo, Pengsheng and Schwing, Alexander and Huang, Jia-Bin}, title = {Bridging Diffusion Models and 3D Representations: A 3D Consistent Super-Resolution Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13481-13490} }
MagicID: Hybrid Preference Optimization for ID-Consistent and Dynamic-Preserved Video Customization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Hengjia and Jiang, Lifan and Xiao, Xi and Wang, Tianyang and Yi, Hongwei and Wu, Boxi and Cai, Deng}, title = {MagicID: Hybrid Preference Optimization for ID-Consistent and Dynamic-Preserved Video Customization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12737-12746} }
LookOut: Real-World Humanoid Egocentric Navigation-
[pdf]
[bibtex]@InProceedings{Pan_2025_ICCV, author = {Pan, Boxiao and Harley, Adam W. and Engelmann, Francis and Liu, C. Karen and Guibas, Leonidas J.}, title = {LookOut: Real-World Humanoid Egocentric Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24977-24988} }
Lightweight Gradient-Aware Upscaling of 3D Gaussian Splatting Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Niedermayr_2025_ICCV, author = {Niedermayr, Simon and Neuhauser, Christoph and Westermann, R\"udiger}, title = {Lightweight Gradient-Aware Upscaling of 3D Gaussian Splatting Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25862-25871} }
Motion-2-to-3: Leveraging 2D Motion Data for 3D Motion Generations-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Ruoxi and Pi, Huaijin and Shen, Zehong and Shuai, Qing and Hu, Zechen and Wang, Zhumei and Dong, Yajiao and Hu, Ruizhen and Komura, Taku and Peng, Sida and Zhou, Xiaowei}, title = {Motion-2-to-3: Leveraging 2D Motion Data for 3D Motion Generations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14305-14316} }
Balancing Conservatism and Aggressiveness: Prototype-Affinity Hybrid Network for Few-Shot Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zou_2025_ICCV, author = {Zou, Tianyu and Xiong, Shengwu and Yao, Ruilin and Rong, Yi}, title = {Balancing Conservatism and Aggressiveness: Prototype-Affinity Hybrid Network for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20561-20571} }
Large Scene Generation with Cube-Absorb Discrete Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Qianjiang and Hu, Wei}, title = {Large Scene Generation with Cube-Absorb Discrete Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25186-25196} }
Advancing Text-to-3D Generation with Linearized Lookahead Variational Score Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2025_ICCV, author = {Lei, Yu and Liu, Bingde and Xie, Qingsong and Lu, Haonan and Deng, Zhijie}, title = {Advancing Text-to-3D Generation with Linearized Lookahead Variational Score Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19567-19576} }
MS3D: High-Quality 3D Generation via Multi-Scale Representation Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Guan and Zhang, Jianfeng}, title = {MS3D: High-Quality 3D Generation via Multi-Scale Representation Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26336-26348} }
Memory-Efficient 4-bit Preconditioned Stochastic Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Jingyang and Ding, Kuangyu and Toh, Kim-Chuan and Zhou, Pan}, title = {Memory-Efficient 4-bit Preconditioned Stochastic Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22633-22643} }
On the Recovery of Cameras from Fundamental Matrices-
[pdf]
[supp]
[bibtex]@InProceedings{Madhavan_2025_ICCV, author = {Madhavan, Rakshith and Arrigoni, Federica}, title = {On the Recovery of Cameras from Fundamental Matrices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20934-20943} }
Not All Frame Features Are Equal: Video-to-4D Generation via Decoupling Dynamic-Static Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Liying and Liu, Chen and Zhu, Zhenwei and Liu, Ajian and Ma, Hui and Nong, Jian and Liang, Yanyan}, title = {Not All Frame Features Are Equal: Video-to-4D Generation via Decoupling Dynamic-Static Features}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7494-7504} }
FedDifRC: Unlocking the Potential of Text-to-Image Diffusion Models in Heterogeneous Federated Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Huan and Li, Haoran and Chen, Huaming and Yan, Jun and Shi, Jiahua and Shen, Jun}, title = {FedDifRC: Unlocking the Potential of Text-to-Image Diffusion Models in Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3726-3736} }
Vivid4D: Improving 4D Reconstruction from Monocular Video by Video Inpainting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Jiaxin and Miao, Sheng and Yang, Bangbang and Ma, Yuewen and Liao, Yiyi}, title = {Vivid4D: Improving 4D Reconstruction from Monocular Video by Video Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12592-12604} }
Feather the Throttle: Revisiting Visual Token Pruning for Vision-Language Model Acceleration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Endo_2025_ICCV, author = {Endo, Mark and Wang, Xiaohan and Yeung-Levy, Serena}, title = {Feather the Throttle: Revisiting Visual Token Pruning for Vision-Language Model Acceleration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22826-22835} }
RePoseD: Efficient Relative Pose Estimation With Known Depth Information-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2025_ICCV, author = {Ding, Yaqing and Kocur, Viktor and V\'avra, V\'aclav and Haladov\'a, Zuzana Berger and Yang, Jian and Sattler, Torsten and Kukelova, Zuzana}, title = {RePoseD: Efficient Relative Pose Estimation With Known Depth Information}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14876-14886} }
MEGA: Memory-Efficient 4D Gaussian Splatting for Dynamic Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xinjie and Liu, Zhening and Zhang, Yifan and Ge, Xingtong and He, Dailan and Xu, Tongda and Wang, Yan and Lin, Zehong and Yan, Shuicheng and Zhang, Jun}, title = {MEGA: Memory-Efficient 4D Gaussian Splatting for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27828-27838} }
Beyond Label Semantics: Language-Guided Action Anatomy for Few-shot Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qian_2025_ICCV, author = {Qian, Zefeng and Yao, Xincheng and Huang, Yifei and Zhang, Chongyang and Ying, Jiangyong and Sun, Hong}, title = {Beyond Label Semantics: Language-Guided Action Anatomy for Few-shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10421-10431} }
VoluMe - Authentic 3D Video Calls from Live Gaussian Splat Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{de_La_Gorce_2025_ICCV, author = {de La Gorce, Martin and Hewitt, Charlie and Tak\'acs, Tibor and Gerdisch, Robert and Hosenie, Zafiirah and Meishvili, Givi and Kowalski, Marek and Cashman, Thomas J. and Criminisi, Antonio}, title = {VoluMe - Authentic 3D Video Calls from Live Gaussian Splat Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13783-13792} }
Bridging 3D Anomaly Localization and Repair via High-Quality Continuous Geometric Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Bozhong and Gan, Jinye and Xu, Xiaohao and Chen, Xintao and Li, Wenqiao and Huang, Xiaonan and Ni, Na and Wu, Yingna}, title = {Bridging 3D Anomaly Localization and Repair via High-Quality Continuous Geometric Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27063-27072} }
MonoMobility: Zero-Shot 3D Mobility Analysis from Monocular Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Hongyi and Guo, Yulan and Wang, Xiaogang and Xu, Kai}, title = {MonoMobility: Zero-Shot 3D Mobility Analysis from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8800-8809} }
Prior-aware Dynamic Temporal Modeling Framework for Sequential 3D Hand Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2025_ICCV, author = {Ren, Pengfei and Wang, Jingyu and Sun, Haifeng and Qi, Qi and Liu, Xingyu and Zhang, Menghao and Zhang, Lei and Wang, Jing and Liao, Jianxin}, title = {Prior-aware Dynamic Temporal Modeling Framework for Sequential 3D Hand Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6476-6487} }
Generate, Refine, and Encode: Leveraging Synthesized Novel Samples for On-the-Fly Fine-Grained Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Xiao and Pu, Nan and Zheng, Haiyang and Li, Wenjing and Sebe, Nicu and Zhong, Zhun}, title = {Generate, Refine, and Encode: Leveraging Synthesized Novel Samples for On-the-Fly Fine-Grained Category Discovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1078-1087} }
Visual-RFT: Visual Reinforcement Fine-Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Ziyu and Sun, Zeyi and Zang, Yuhang and Dong, Xiaoyi and Cao, Yuhang and Duan, Haodong and Lin, Dahua and Wang, Jiaqi}, title = {Visual-RFT: Visual Reinforcement Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2034-2044} }
Foresight in Motion: Reinforcing Trajectory Prediction with Reward Heuristics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pei_2025_ICCV, author = {Pei, Muleilan and Shi, Shaoshuai and Chen, Xuesong and Liu, Xu and Shen, Shaojie}, title = {Foresight in Motion: Reinforcing Trajectory Prediction with Reward Heuristics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28303-28312} }
Spectral Sensitivity Estimation with an Uncalibrated Diffraction Grating-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Makabe_2025_ICCV, author = {Makabe, Lilika and Santo, Hiroaki and Okura, Fumio and Brown, Michael S. and Matsushita, Yasuyuki}, title = {Spectral Sensitivity Estimation with an Uncalibrated Diffraction Grating}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27252-27261} }
DISTIL: Data-Free Inversion of Suspicious Trojan Inputs via Latent Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mirzaei_2025_ICCV, author = {Mirzaei, Hossein and Taghavi, Zeinab and Rezaee, Sepehr and Hadi, Masoud and Madadi, Moein and Mathis, Mackenzie W.}, title = {DISTIL: Data-Free Inversion of Suspicious Trojan Inputs via Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3194-3205} }
Moment Quantization for Video Temporal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Xiaolong and Wang, Le and Zhou, Sanping and Shi, Liushuai and Xia, Kun and Liu, Mengnan and Wang, Yabing and Hua, Gang}, title = {Moment Quantization for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20137-20146} }
ADIEE: Automatic Dataset Creation and Scorer for Instruction-Guided Image Editing Evaluation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Sherry X. and Wei, Yi and Zhou, Luowei and Kumar, Suren}, title = {ADIEE: Automatic Dataset Creation and Scorer for Instruction-Guided Image Editing Evaluation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18345-18356} }
Dynamic Reconstruction of Hand-Object Interaction with Distributed Force-aware Contact Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Zhenjun and Xu, Wenqiang and Xie, Pengfei and Li, Yutong and Anthony, Brian W. and Zhang, Zhuorui and Lu, Cewu}, title = {Dynamic Reconstruction of Hand-Object Interaction with Distributed Force-aware Contact Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8590-8599} }
CSD-VAR: Content-Style Decomposition in Visual Autoregressive Models-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Quang-Binh and Luu, Minh and Nguyen, Quang and Tran, Anh and Nguyen, Khoi}, title = {CSD-VAR: Content-Style Decomposition in Visual Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17013-17023} }
MSQ: Memory-Efficient Bit Sparsification Quantization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Seokho and Yoon, Seoyeon and Kim, Jinhee and Wang, Dongwei and Jeon, Kang Eun and Yang, Huanrui and Ko, Jong Hwan}, title = {MSQ: Memory-Efficient Bit Sparsification Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21885-21894} }
Disrupting Model Merging: A Parameter-Level Defense Without Sacrificing Accuracy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Junhao_2025_ICCV, author = {Junhao, Wei and Zhe, Yu and Sakuma, Jun}, title = {Disrupting Model Merging: A Parameter-Level Defense Without Sacrificing Accuracy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17698-17707} }
VSSD: Vision Mamba with Non-Causal State Space Duality-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Yuheng and Li, Mingjia and Dong, Minjing and Xu, Chang}, title = {VSSD: Vision Mamba with Non-Causal State Space Duality}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10819-10829} }
Epipolar Consistent Attention Aggregation Network for Unsupervised Light Field Disparity Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Chen and Zhang, Shuo and Lin, Youfang}, title = {Epipolar Consistent Attention Aggregation Network for Unsupervised Light Field Disparity Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6488-6497} }
LLaVA-KD: A Framework of Distilling Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2025_ICCV, author = {Cai, Yuxuan and Zhang, Jiangning and He, Haoyang and He, Xinwei and Tong, Ao and Gan, Zhenye and Wang, Chengjie and Xue, Zhucun and Liu, Yong and Bai, Xiang}, title = {LLaVA-KD: A Framework of Distilling Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {239-249} }
CLIP-GS: Unifying Vision-Language Representation with 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Jiao_2025_ICCV, author = {Jiao, Siyu and Dong, Haoye and Yin, Yuyang and Jie, Zequn and Qian, Yinlong and Zhao, Yao and Shi, Humphrey and Wei, Yunchao}, title = {CLIP-GS: Unifying Vision-Language Representation with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4670-4680} }
Region-Level Data Attribution for Text-to-Image Generative Models-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Trong Bang and Le Nguyen, Phi and Lucey, Simon and Hoai, Minh}, title = {Region-Level Data Attribution for Text-to-Image Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18825-18833} }
TAPNext: Tracking Any Point (TAP) as Next Token Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zholus_2025_ICCV, author = {Zholus, Artem and Doersch, Carl and Yang, Yi and Koppula, Skanda and Patraucean, Viorica and He, Xu Owen and Rocco, Ignacio and Sajjadi, Mehdi S. M. and Chandar, Sarath and Goroshin, Ross}, title = {TAPNext: Tracking Any Point (TAP) as Next Token Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9693-9703} }
Learnable Feature Patches and Vectors for Boosting Low-light Image Enhancement without External Knowledge-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Xiaogang and Wu, Jiafei and Yan, Qingsen and Cui, Jiequan and Hong, Richang and Yu, Bei}, title = {Learnable Feature Patches and Vectors for Boosting Low-light Image Enhancement without External Knowledge}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7761-7770} }
Cycle Consistency as Reward: Learning Image-Text Alignment without Human Preferences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bahng_2025_ICCV, author = {Bahng, Hyojin and Chan, Caroline and Durand, Fredo and Isola, Phillip}, title = {Cycle Consistency as Reward: Learning Image-Text Alignment without Human Preferences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22934-22946} }
Vid-Group: Temporal Video Grounding Pretraining from Unlabeled Videos in the Wild-
[pdf]
[supp]
[bibtex]@InProceedings{Bao_2025_ICCV, author = {Bao, Peijun and Kong, Chenqi and Yang, Siyuan and Shao, Zihao and Jiang, Xinghao and Ng, Boon Poh and Er, Meng Hwa and Kot, Alex}, title = {Vid-Group: Temporal Video Grounding Pretraining from Unlabeled Videos in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20541-20550} }
BANet: Bilateral Aggregation Network for Mobile Stereo Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Gangwei and Liu, Jiaxin and Wang, Xianqi and Cheng, Junda and Deng, Yong and Zang, Jinliang and Chen, Yurui and Yang, Xin}, title = {BANet: Bilateral Aggregation Network for Mobile Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28870-28880} }
Diffuman4D: 4D Consistent Human View Synthesis from Sparse-View Videos with Spatio-Temporal Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2025_ICCV, author = {Jin, Yudong and Peng, Sida and Wang, Xuan and Xie, Tao and Xu, Zhen and Yang, Yifan and Shen, Yujun and Bao, Hujun and Zhou, Xiaowei}, title = {Diffuman4D: 4D Consistent Human View Synthesis from Sparse-View Videos with Spatio-Temporal Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11047-11057} }
AnnofreeOD: Detecting All Classes at Low Frame Rates Without Human Annotations-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Boyi and Liu, Yuhang and He, Houxin and Tian, Yonglin and Wang, Fei-Yue}, title = {AnnofreeOD: Detecting All Classes at Low Frame Rates Without Human Annotations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5315-5325} }
AIM: Adaptive Inference of Multi-Modal LLMs via Token Merging and Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_ICCV, author = {Zhong, Yiwu and Liu, Zhuoming and Li, Yin and Wang, Liwei}, title = {AIM: Adaptive Inference of Multi-Modal LLMs via Token Merging and Pruning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20180-20192} }
PanSt3R: Multi-view Consistent Panoptic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zust_2025_ICCV, author = {Zust, Lojze and Cabon, Yohann and Marrie, Juliette and Antsfeld, Leonid and Chidlovskii, Boris and Revaud, Jerome and Csurka, Gabriela}, title = {PanSt3R: Multi-view Consistent Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5856-5866} }
G2SF: Geometry-Guided Score Fusion for Multimodal Industrial Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Tao_2025_ICCV, author = {Tao, Chengyu and Cao, Xuanming and Du, Juan}, title = {G2SF: Geometry-Guided Score Fusion for Multimodal Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20551-20560} }
SA-Occ: Satellite-Assisted 3D Occupancy Prediction in Real World-
[pdf]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Chen and Wang, Zhirui and Sheng, Taowei and Jiang, Yi and Li, Yundu and Cheng, Peirui and Zhang, Luning and Chen, Kaiqiang and Hu, Yanfeng and Yang, Xue and Sun, Xian}, title = {SA-Occ: Satellite-Assisted 3D Occupancy Prediction in Real World}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27021-27030} }
Generic Event Boundary Detection via Denoising Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hwang_2025_ICCV, author = {Hwang, Jaejun and Gong, Dayoung and Kim, Manjin and Cho, Minsu}, title = {Generic Event Boundary Detection via Denoising Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14084-14094} }
ImageGem: In-the-wild Generative Image Interaction Dataset for Generative Model Personalization-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Yuanhe and Xie, Linxi and Chen, Zhuoran and Yu, Kangrui and Po, Ryan and Yang, Guandao and Wetzstein, Gordon and Wen, Hongyi}, title = {ImageGem: In-the-wild Generative Image Interaction Dataset for Generative Model Personalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19577-19586} }
Implicit Counterfactual Learning for Audio-Visual Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zha_2025_ICCV, author = {Zha, Mingfeng and Li, Tianyu and Wang, Guoqing and Wang, Peng and Wu, Yangyang and Yang, Yang and Shen, Heng Tao}, title = {Implicit Counterfactual Learning for Audio-Visual Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22349-22360} }
Context Guided Transformer Entropy Modeling for Video Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tong_2025_ICCV, author = {Tong, Junlong and Zhang, Wei and Jin, Yaohui and Shen, Xiaoyu}, title = {Context Guided Transformer Entropy Modeling for Video Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18885-18894} }
FlowEdit: Inversion-Free Text-Based Editing Using Pre-Trained Flow Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kulikov_2025_ICCV, author = {Kulikov, Vladimir and Kleiner, Matan and Huberman-Spiegelglas, Inbar and Michaeli, Tomer}, title = {FlowEdit: Inversion-Free Text-Based Editing Using Pre-Trained Flow Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19721-19730} }
CMT: A Cascade MAR with Topology Predictor for Multimodal Conditional CAD Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Jianyu and Wang, Yizhou and Yue, Xiangyu and Ma, Xinzhu and Guo, Jinyang and Zhou, Dongzhan and Ouyang, Wanli and Tang, Shixiang}, title = {CMT: A Cascade MAR with Topology Predictor for Multimodal Conditional CAD Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7014-7024} }
CLIPSym: Delving into Symmetry Detection with CLIP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Tinghan and Rahman, Md Ashiqur and Yeh, Raymond A.}, title = {CLIPSym: Delving into Symmetry Detection with CLIP}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21003-21013} }
MRGen: Segmentation Data Engine For Underrepresented MRI Modalities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Haoning and Zhao, Ziheng and Zhang, Ya and Wang, Yanfeng and Xie, Weidi}, title = {MRGen: Segmentation Data Engine For Underrepresented MRI Modalities}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19903-19913} }
PrimHOI: Compositional Human-Object Interaction via Reusable Primitives-
[pdf]
[supp]
[bibtex]@InProceedings{Jia_2025_ICCV, author = {Jia, Kai and Liu, Tengyu and Pei, Mingtao and Zhu, Yixin and Huang, Siyuan}, title = {PrimHOI: Compositional Human-Object Interaction via Reusable Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11491-11501} }
Nautilus: Locality-aware Autoencoder for Scalable Mesh Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuxuan and Yi, Xuanyu and Weng, Haohan and Xu, Qingshan and Wei, Xiaokang and Yang, Xianghui and Guo, Chunchao and Chen, Long and Zhang, Hanwang}, title = {Nautilus: Locality-aware Autoencoder for Scalable Mesh Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10961-10970} }
What Changed and What Could Have Changed? State-Change Counterfactuals for Procedure-Aware Video Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Kung_2025_ICCV, author = {Kung, Chi-Hsi and Ramirez, Frangil and Ha, Juhyung and Chen, Yi-Ting and Crandall, David and Tsai, Yi-Hsuan}, title = {What Changed and What Could Have Changed? State-Change Counterfactuals for Procedure-Aware Video Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12294-12306} }
Accelerate 3D Object Detection Models via Zero-Shot Attention Key Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Lizhen and Bai, Xiuxiu and Jia, Xiaojun and Fang, Jianwu and Pang, Shanmin}, title = {Accelerate 3D Object Detection Models via Zero-Shot Attention Key Pruning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23085-23094} }
Adaptive Learning of High-Value Regions for Semi-Supervised Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Lei_2025_ICCV, author = {Lei, Tao and Yang, Ziyao and Wang, Xingwu and Wang, Yi and Wang, Xuan and Sun, Feiman and Nandi, Asoke K.}, title = {Adaptive Learning of High-Value Regions for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21450-21459} }
X-Dancer: Expressive Music to Human Dance Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Zeyuan and Xu, Hongyi and Song, Guoxian and Xie, You and Zhang, Chenxu and Chen, Xin and Wang, Chao and Chang, Di and Luo, Linjie}, title = {X-Dancer: Expressive Music to Human Dance Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10602-10611} }
IDF: Iterative Dynamic Filtering Networks for Generalizable Image Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Dongjin and Ko, Jaekyun and Ali, Muhammad Kashif and Kim, Tae Hyun}, title = {IDF: Iterative Dynamic Filtering Networks for Generalizable Image Denoising}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12180-12190} }
LayerLock: Non-collapsing Representation Learning with Progressive Freezing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Erdogan_2025_ICCV, author = {Erdogan, Goker and Parthasarathy, Nikhil and Ionescu, Catalin and Hudson, Drew A. and Lerchner, Alexander and Zisserman, Andrew and Sajjadi, Mehdi S. M. and Carreira, Joao}, title = {LayerLock: Non-collapsing Representation Learning with Progressive Freezing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19461-19470} }
Augmenting Moment Retrieval: Zero-Dependency Two-Stage Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Zhengxuan and Tang, Jiajin and Yang, Sibei}, title = {Augmenting Moment Retrieval: Zero-Dependency Two-Stage Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3401-3412} }
Derm1M: A Million-scale Vision-Language Dataset Aligned with Clinical Ontology Knowledge for Dermatology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_ICCV, author = {Yan, Siyuan and Hu, Ming and Jiang, Yiwen and Li, Xieji and Fei, Hao and Tschandl, Philipp and Kittler, Harald and Ge, Zongyuan}, title = {Derm1M: A Million-scale Vision-Language Dataset Aligned with Clinical Ontology Knowledge for Dermatology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12681-12690} }
Expressive Talking Human from Single-Image with Imperfect Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Xiang_2025_ICCV, author = {Xiang, Jun and Guo, Yudong and Hu, Leipeng and Guo, Boyang and Yuan, Yancheng and Zhang, Juyong}, title = {Expressive Talking Human from Single-Image with Imperfect Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10398-10409} }
DADM: Dual Alignment of Domain and Modality for Face Anti-spoofing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Jingyi and Lin, Xun and Yu, Zitong and Zhang, Liepiao and Liu, Xin and Li, Hui and Yuan, Xiaochen and Cao, Xiaochun}, title = {DADM: Dual Alignment of Domain and Modality for Face Anti-spoofing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12045-12056} }
IntroStyle: Training-Free Introspective Style Attribution using Diffusion Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kumar_2025_ICCV, author = {Kumar, Anand and Mu, Jiteng and Vasconcelos, Nuno}, title = {IntroStyle: Training-Free Introspective Style Attribution using Diffusion Features}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14909-14918} }
Learning Deblurring Texture Prior from Unpaired Data with Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Chengxu and Qi, Lu and Pan, Jinshan and Qian, Xueming and Yang, Ming-Hsuan}, title = {Learning Deblurring Texture Prior from Unpaired Data with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14195-14204} }
Oasis: One Image is All You Need for Multimodal Instruction Data Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Letian and Cui, Quan and Zhao, Bingchen and Yang, Cheng}, title = {Oasis: One Image is All You Need for Multimodal Instruction Data Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3542-3551} }
Learning Neural Scene Representation from iToF Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2025_ICCV, author = {Chang, Wenjie and Chang, Hanzhi and Zhang, Yueyi and Yang, Wenfei and Zhang, Tianzhu}, title = {Learning Neural Scene Representation from iToF Imaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27937-27946} }
GeometryCrafter: Consistent Geometry Estimation for Open-world Videos with Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Tian-Xing and Gao, Xiangjun and Hu, Wenbo and Li, Xiaoyu and Zhang, Song-Hai and Shan, Ying}, title = {GeometryCrafter: Consistent Geometry Estimation for Open-world Videos with Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6632-6644} }
Multi-modal Multi-platform Person Re-Identification: Benchmark and Method-
[pdf]
[supp]
[bibtex]@InProceedings{Ha_2025_ICCV, author = {Ha, Ruiyang and Jiang, Songyi and Li, Bin and Pan, Bikang and Zhu, Yihang and Zhang, Junjie and Zhu, Xiatian and Gong, Shaogang and Wang, Jingya}, title = {Multi-modal Multi-platform Person Re-Identification: Benchmark and Method}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10251-10261} }
CAP: Evaluation of Persuasive and Creative Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aghazadeh_2025_ICCV, author = {Aghazadeh, Aysan and Kovashka, Adriana}, title = {CAP: Evaluation of Persuasive and Creative Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16970-16980} }
SpatialTrackerV2: Advancing 3D Point Tracking with Explicit Camera Motion-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2025_ICCV, author = {Xiao, Yuxi and Wang, Jianyuan and Xue, Nan and Karaev, Nikita and Makarov, Yuri and Kang, Bingyi and Zhu, Xing and Bao, Hujun and Shen, Yujun and Zhou, Xiaowei}, title = {SpatialTrackerV2: Advancing 3D Point Tracking with Explicit Camera Motion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6726-6737} }
monoVLN: Bridging the Observation Gap between Monocular and Panoramic Vision and Language Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Renjie and Zhou, Yu and Cheng, Hao and Meng, Jingke and Zheng, Wei-Shi}, title = {monoVLN: Bridging the Observation Gap between Monocular and Panoramic Vision and Language Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9477-9486} }
Know "No" Better: A Data-Driven Approach for Enhancing Negation Awareness in CLIP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Junsung and Lee, Jungbeom and Song, Jongyoon and Yu, Sangwon and Jung, Dahuin and Yoon, Sungroh}, title = {Know ''No'' Better: A Data-Driven Approach for Enhancing Negation Awareness in CLIP}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2825-2835} }
Neurons: Emulating the Human Visual Cortex Improves Fidelity and Interpretability in fMRI-to-Video Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Haonan and Zhang, Qixiang and Wang, Lehan and Huang, Xuanqi and Li, Xiaomeng}, title = {Neurons: Emulating the Human Visual Cortex Improves Fidelity and Interpretability in fMRI-to-Video Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18367-18376} }
LACONIC: A 3D Layout Adapter for Controllable Image Creation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maillard_2025_ICCV, author = {Maillard, L\'eopold and Durand, Tom and Rahary, Adrien Ramanana and Ovsjanikov, Maks}, title = {LACONIC: A 3D Layout Adapter for Controllable Image Creation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18046-18057} }
InfiniDreamer: Arbitrarily Long Human Motion Generation via Segment Score Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuo_2025_ICCV, author = {Zhuo, Wenjie and Ma, Fan and Fan, Hehe}, title = {InfiniDreamer: Arbitrarily Long Human Motion Generation via Segment Score Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14688-14698} }
Robust Machine Unlearning for Quantized Neural Networks via Adaptive Gradient Reweighting with Similar Labels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tong_2025_ICCV, author = {Tong, Yujia and Wang, Yuze and Yuan, Jingling and Hu, Chuang}, title = {Robust Machine Unlearning for Quantized Neural Networks via Adaptive Gradient Reweighting with Similar Labels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20603-20612} }
FairGen: Enhancing Fairness in Text-to-Image Diffusion Models via Self-Discovering Latent Directions-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Yilei and Li, Wei-Hong and Zhang, Yiyuan and Cai, Minghong and Yue, Xiangyu}, title = {FairGen: Enhancing Fairness in Text-to-Image Diffusion Models via Self-Discovering Latent Directions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18411-18420} }
Gaze-Language Alignment for Zero-Shot Prediction of Visual Search Targets from Human Gaze Scanpaths-
[pdf]
[supp]
[bibtex]@InProceedings{Mondal_2025_ICCV, author = {Mondal, Sounak and Sendhilnathan, Naveen and Zhang, Ting and Liu, Yue and Proulx, Michael and Iuzzolino, Michael Louis and Qin, Chuan and Jonker, Tanya R.}, title = {Gaze-Language Alignment for Zero-Shot Prediction of Visual Search Targets from Human Gaze Scanpaths}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2738-2749} }
ReCoT: Reflective Self-Correction Training for Mitigating Confirmation Bias in Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Qu_2025_ICCV, author = {Qu, Mengxue and Hu, Yibo and Han, Kunyang and Wei, Yunchao and Zhao, Yao}, title = {ReCoT: Reflective Self-Correction Training for Mitigating Confirmation Bias in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9147-9157} }
Bridging Continuous and Discrete Tokens for Autoregressive Visual Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuqing and Lin, Zhijie and Teng, Yao and Zhu, Yuanzhi and Ren, Shuhuai and Feng, Jiashi and Liu, Xihui}, title = {Bridging Continuous and Discrete Tokens for Autoregressive Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18596-18605} }
Trade-offs in Image Generation: How Do Different Dimensions Interact?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Sicheng and Xie, Binzhu and Yan, Zhonghao and Zhang, Yuli and Zhou, Donghao and Chen, Xiaofei and Qiu, Shi and Liu, Jiaqi and Xie, Guoyang and Lu, Zhichao}, title = {Trade-offs in Image Generation: How Do Different Dimensions Interact?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17256-17267} }
From Easy to Hard: The MIR Benchmark for Progressive Interleaved Multi-Image Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Hang and Zhang, Jiayang and Nan, Guoshun and Deng, Wendi and Chen, Zhenyan and Zhang, Chenyang and Xiao, Wang and Huang, Shan and Pan, Yuqi and Qi, Tao and Leng, Sicong}, title = {From Easy to Hard: The MIR Benchmark for Progressive Interleaved Multi-Image Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {859-869} }
Rethinking Layered Graphic Design Generation with a Top-Down Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Jingye and Wang, Zhaowen and Zhao, Nanxuan and Zhang, Li and Liu, Difan and Yang, Jimei and Chen, Qifeng}, title = {Rethinking Layered Graphic Design Generation with a Top-Down Approach}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16861-16870} }
SegAnyPET: Universal Promptable Segmentation from Positron Emission Tomography Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yichi and Xue, Le and Zhang, Wenbo and Li, Lanlan and Liu, Yuchen and Jiang, Chen and Cheng, Yuan and Qi, Yuan}, title = {SegAnyPET: Universal Promptable Segmentation from Positron Emission Tomography Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21107-21116} }
StolenLoRA: Exploring LoRA Extraction Attacks via Synthetic Data-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yixu and Teng, Yan and Wang, Yingchun and Ma, Xingjun}, title = {StolenLoRA: Exploring LoRA Extraction Attacks via Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {263-272} }
The Best of Both Worlds: Integrating Language Models and Diffusion Models for Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_ICCV, author = {Yin, Aoxiong and Tan, Xu and Shen, Kai and Leng, Yichong and Zhou, Xinyu and Li, Juncheng and Tang, Siliang}, title = {The Best of Both Worlds: Integrating Language Models and Diffusion Models for Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15604-15615} }
SV4D 2.0: Enhancing Spatio-Temporal Consistency in Multi-View Video Diffusion for High-Quality 4D Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yao_2025_ICCV, author = {Yao, Chun-Han and Xie, Yiming and Voleti, Vikram and Jiang, Huaizu and Jampani, Varun}, title = {SV4D 2.0: Enhancing Spatio-Temporal Consistency in Multi-View Video Diffusion for High-Quality 4D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13248-13258} }
Long Context Tuning for Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Yuwei and Yang, Ceyuan and Yang, Ziyan and Ma, Zhibei and Lin, Zhijie and Yang, Zhenheng and Lin, Dahua and Jiang, Lu}, title = {Long Context Tuning for Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17281-17291} }
TCFG: Truncated Classifier-Free Guidance for Efficient and Scalable Text-to-Image Acceleration-
[pdf]
[bibtex]@InProceedings{Fu_2025_ICCV, author = {Fu, Xiaomeng and Li, Jia}, title = {TCFG: Truncated Classifier-Free Guidance for Efficient and Scalable Text-to-Image Acceleration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18552-18562} }
Removing Out-of-Focus Reflective Flares via Color Alignment-
[pdf]
[bibtex]@InProceedings{Lan_2025_ICCV, author = {Lan, Fengbo and Chen, Chang Wen}, title = {Removing Out-of-Focus Reflective Flares via Color Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9770-9779} }
ILLUME: Illuminating Your LLMs to See, Draw, and Self-Enhance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Chunwei and Lu, Guansong and Yang, Junwei and Huang, Runhui and Han, Jianhua and Hou, Lu and Zhang, Wei and Xu, Hang}, title = {ILLUME: Illuminating Your LLMs to See, Draw, and Self-Enhance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21612-21622} }
No Pose at All: Self-Supervised Pose-Free 3D Gaussian Splatting from Sparse Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Ranran and Mikolajczyk, Krystian}, title = {No Pose at All: Self-Supervised Pose-Free 3D Gaussian Splatting from Sparse Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27947-27957} }
Skeleton Motion Words for Unsupervised Skeleton-Based Temporal Action Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Gokay_2025_ICCV, author = {G\"okay, Uzay and Spurio, Federico and Bach, Dominik R. and Gall, Juergen}, title = {Skeleton Motion Words for Unsupervised Skeleton-Based Temporal Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12101-12111} }
Towards Robust Defense against Customization via Protective Perturbation Resistant to Diffusion-based Purification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Wenkui and Cao, Jie and Duan, Junxian and He, Ran}, title = {Towards Robust Defense against Customization via Protective Perturbation Resistant to Diffusion-based Purification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19290-19300} }
EmbodiedOcc: Embodied 3D Occupancy Prediction for Vision-based Online Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Yuqi and Zheng, Wenzhao and Zuo, Sicheng and Huang, Yuanhui and Zhou, Jie and Lu, Jiwen}, title = {EmbodiedOcc: Embodied 3D Occupancy Prediction for Vision-based Online Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26360-26370} }
MAVias: Mitigate any Visual Bias-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sarridis_2025_ICCV, author = {Sarridis, Ioannis and Koutlis, Christos and Papadopoulos, Symeon and Diou, Christos}, title = {MAVias: Mitigate any Visual Bias}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1271-1281} }
Calibrating MLLM-as-a-judge via Multimodal Bayesian Prompt Ensembles-
[pdf]
[supp]
[bibtex]@InProceedings{Slyman_2025_ICCV, author = {Slyman, Eric and Tanjim, Mehrab and Kafle, Kushal and Lee, Stefan}, title = {Calibrating MLLM-as-a-judge via Multimodal Bayesian Prompt Ensembles}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17224-17234} }
SCFlow: Implicitly Learning Style and Content Disentanglement with Flow Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Pingchuan and Yang, Xiaopei and Li, Yusong and Gui, Ming and Krause, Felix and Schusterbauer, Johannes and Ommer, Bj\"orn}, title = {SCFlow: Implicitly Learning Style and Content Disentanglement with Flow Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14919-14929} }
Lay-Your-Scene: Natural Scene Layout Generation with Diffusion Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Srivastava_2025_ICCV, author = {Srivastava, Divyansh and Zhang, Xiang and Wen, He and Wen, Chenru and Tu, Zhuowen}, title = {Lay-Your-Scene: Natural Scene Layout Generation with Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17909-17919} }
DiST-4D: Disentangled Spatiotemporal Diffusion with Metric Depth for 4D Driving Scene Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Jiazhe and Ding, Yikang and Chen, Xiwu and Chen, Shuo and Li, Bohan and Zou, Yingshuang and Lyu, Xiaoyang and Tan, Feiyang and Qi, Xiaojuan and Li, Zhiheng and Zhao, Hao}, title = {DiST-4D: Disentangled Spatiotemporal Diffusion with Metric Depth for 4D Driving Scene Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27231-27241} }
Enhancing Prompt Generation with Adaptive Refinement for Camouflaged Object Detection-
[pdf]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Xuehan and Ren, Guangyu and Dai, Tianhong and Stathaki, Tania and Liu, Hengyan}, title = {Enhancing Prompt Generation with Adaptive Refinement for Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20672-20682} }
Generalizable Non-Line-of-Sight Imaging with Learnable Physical Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Shida and Li, Yue and Zhang, Yueyi and Xiong, Zhiwei}, title = {Generalizable Non-Line-of-Sight Imaging with Learnable Physical Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25040-25049} }
Contrastive Flow Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stoica_2025_ICCV, author = {Stoica, George and Ramanujan, Vivek and Fan, Xiang and Farhadi, Ali and Krishna, Ranjay and Hoffman, Judy}, title = {Contrastive Flow Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1185-1194} }
Multi-scenario Overlapping Text Segmentation with Depth Awareness-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yang and Xie, Xudong and Liu, Yuliang and Bai, Xiang}, title = {Multi-scenario Overlapping Text Segmentation with Depth Awareness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17454-17463} }
SemiVisBooster: Boosting Semi-Supervised Learning for Fine-Grained Classification through Pseudo-Label Semantic Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wenjin and Li, Xinyu and Gao, Chenyang and Marsic, Ivan}, title = {SemiVisBooster: Boosting Semi-Supervised Learning for Fine-Grained Classification through Pseudo-Label Semantic Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1195-1204} }
Backdooring Self-Supervised Contrastive Learning by Noisy Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Tuo and Gui, Jie and Dong, Minjing and Jia, Ju and Fang, Lanting and Liu, Jian}, title = {Backdooring Self-Supervised Contrastive Learning by Noisy Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3684-3693} }
VisualCloze: A Universal Image Generation Framework via Visual In-Context Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhong-Yu and Du, Ruoyi and Yan, Juncheng and Zhuo, Le and Li, Zhen and Gao, Peng and Ma, Zhanyu and Cheng, Ming-Ming}, title = {VisualCloze: A Universal Image Generation Framework via Visual In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18969-18979} }
MagicHOI: Leveraging 3D Priors for Accurate Hand-object Reconstruction from Short Monocular Video Clips-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Shibo and He, Haonan and Parelli, Maria and Gebhardt, Christoph and Fan, Zicong and Song, Jie}, title = {MagicHOI: Leveraging 3D Priors for Accurate Hand-object Reconstruction from Short Monocular Video Clips}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5957-5968} }
ReMP-AD: Retrieval-enhanced Multi-modal Prompt Fusion for Few-Shot Industrial Visual Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Hongchi and Yang, Guanglei and Zhao, Debin and Ji, Yanli and Zuo, Wangmeng}, title = {ReMP-AD: Retrieval-enhanced Multi-modal Prompt Fusion for Few-Shot Industrial Visual Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20425-20434} }
Trial-Oriented Visual Rearrangement-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yuyi and Song, Xinhang and Qi, Tianliang and Jiang, Shuqiang}, title = {Trial-Oriented Visual Rearrangement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8022-8031} }
TOGA: Temporally Grounded Open-Ended Video QA with Weak Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gupta_2025_ICCV, author = {Gupta, Ayush and Roy, Anirban and Chellappa, Rama and Bastian, Nathaniel D. and Velasquez, Alvaro and Jha, Susmit}, title = {TOGA: Temporally Grounded Open-Ended Video QA with Weak Supervision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23593-23603} }
Multi-Object Sketch Animation by Scene Decomposition and Motion Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Jingyu and Xin, Zijie and Fu, Yuhan and Zhao, Ruixiang and Lan, Bangxiang and Li, Xirong}, title = {Multi-Object Sketch Animation by Scene Decomposition and Motion Planning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11537-11546} }
MUSE-VL: Modeling Unified VLM through Semantic Discrete Encoding-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Rongchang and Du, Chen and Song, Ping and Liu, Chang}, title = {MUSE-VL: Modeling Unified VLM through Semantic Discrete Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24135-24146} }
MistSense: Versatile Online Detection of Procedural and Execution Mistakes-
[pdf]
[bibtex]@InProceedings{Patsch_2025_ICCV, author = {Patsch, Constantin and Wu, Yuankai and Zakour, Marsil and Salihu, Driton and Steinbach, Eckehard}, title = {MistSense: Versatile Online Detection of Procedural and Execution Mistakes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14528-14537} }
FusionPhys: A Flexible Framework for Fusing Complementary Sensing Modalities in Remote Physiological Measurement-
[pdf]
[bibtex]@InProceedings{Ying_2025_ICCV, author = {Ying, Chenhang and Yang, Huiyu and Ge, Jieyi and Sun, Zhaodong and Cheng, Xu and Ren, Kui and Li, Xiaobai}, title = {FusionPhys: A Flexible Framework for Fusing Complementary Sensing Modalities in Remote Physiological Measurement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9363-9373} }
Task Vector Quantization for Memory-Efficient Model Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Youngeun and Lee, Seunghwan and Jung, Aecheon and Ryu, Bogon and Hong, Sungeun}, title = {Task Vector Quantization for Memory-Efficient Model Merging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20105-20115} }
Dirichlet-Constrained Variational Codebook Learning for Temporally Coherent Video Face Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Baoyou and Liu, Ce and Yuan, Weihao and Dong, Zilong and Zhu, Siyu}, title = {Dirichlet-Constrained Variational Codebook Learning for Temporally Coherent Video Face Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14507-14516} }
Dual-Expert Consistency Model for Efficient and High-Quality Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2025_ICCV, author = {Lv, Zhengyao and Si, Chenyang and Pan, Tianlin and Chen, Zhaoxi and Wong, Kwan-Yee K. and Qiao, Yu and Liu, Ziwei}, title = {Dual-Expert Consistency Model for Efficient and High-Quality Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14983-14993} }
GeoFormer: Geometry Point Encoder for 3D Object Detection with Graph-based Transformer-
[pdf]
[bibtex]@InProceedings{Jin_2025_ICCV, author = {Jin, Xin and Su, Haisheng and Ma, Cong and Liu, Kai and Wu, Wei and Hui, Fei and Yan, Junchi}, title = {GeoFormer: Geometry Point Encoder for 3D Object Detection with Graph-based Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26879-26889} }
Unraveling the Effects of Synthetic Data on End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2025_ICCV, author = {Ge, Junhao and Liu, Zuhong and Fan, Longteng and Jiang, Yifan and Su, Jiaqi and Li, Yiming and Zhang, Zhejun and Chen, Siheng}, title = {Unraveling the Effects of Synthetic Data on End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28859-28869} }
Dense2MoE: Restructuring Diffusion Transformer to MoE for Efficient Text-to-Image Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Youwei and Ren, Yuxi and Xia, Xin and Xiao, Xuefeng and Xie, Xiaohua}, title = {Dense2MoE: Restructuring Diffusion Transformer to MoE for Efficient Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18661-18670} }
KinMo: Kinematic-aware Human Motion Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Pengfei and Liu, Pinxin and Garrido, Pablo and Kim, Hyeongwoo and Chaudhuri, Bindita}, title = {KinMo: Kinematic-aware Human Motion Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11187-11197} }
NeuFrameQ: Neural Frame Fields for Scalable and Generalizable Anisotropic Quadrangulation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Ying-Tian and Li, Jiajun and Liu, Yu-Tao and Yu, Xin and Guo, Yuan-Chen and Cao, Yan-Pei and Liang, Ding and Shamir, Ariel and Zhang, Song-Hai}, title = {NeuFrameQ: Neural Frame Fields for Scalable and Generalizable Anisotropic Quadrangulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28000-28009} }
Keyframe-oriented Vision Token Pruning: Enhancing Efficiency of Large Vision Language Models on Long-Form Video Processing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yudong and Sun, Jingwei and Lin, Yueqian and Zhang, Jianyi and Zhang, Jingyang and Yin, Ming and Wang, Qinsi and Li, Hai and Chen, Yiran}, title = {Keyframe-oriented Vision Token Pruning: Enhancing Efficiency of Large Vision Language Models on Long-Form Video Processing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20802-20811} }
LongSplat: Robust Unposed 3D Gaussian Splatting for Casual Long Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Chin-Yang and Sun, Cheng and Yang, Fu-En and Chen, Min-Hung and Lin, Yen-Yu and Liu, Yu-Lun}, title = {LongSplat: Robust Unposed 3D Gaussian Splatting for Casual Long Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27412-27422} }
AVAM: a Universal Training-free Adaptive Visual Anchoring Embedded into Multimodal Large Language Model for Multi-image Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Kang and Zhong, Guojin and Cheng, Jintao and Yuan, Jin and Li, Zhiyong}, title = {AVAM: a Universal Training-free Adaptive Visual Anchoring Embedded into Multimodal Large Language Model for Multi-image Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2292-2302} }
WikiAutoGen: Towards Multi-Modal Wikipedia-Style Article Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Zhongyu and Chen, Jun and Xu, Dannong and Fei, Junjie and Shen, Xiaoqian and Zhao, Liangbing and Feng, Chun-Mei and Elhoseiny, Mohamed}, title = {WikiAutoGen: Towards Multi-Modal Wikipedia-Style Article Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15532-15541} }
GMMamba: Group Masking Mamba for Whole Slide Image Classification-
[pdf]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Tingting and Yao, Hongxun and Jiang, Kui and Xiao, Yi and Zhao, Sicheng}, title = {GMMamba: Group Masking Mamba for Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9935-9944} }
S3R-GS: Streamlining the Pipeline for Large-Scale Street Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Guangting and Deng, Jiajun and Chu, Xiaomeng and Yuan, Yu and Li, Houqiang and Zhang, Yanyong}, title = {S3R-GS: Streamlining the Pipeline for Large-Scale Street Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25594-25604} }
LIRA: Reasoning Reconstruction via Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zhen and Wang, Tong and Ma, Yunkai and Tan, Xiao and Jing, Fengshui}, title = {LIRA: Reasoning Reconstruction via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1762-1772} }
Generate, Transduct, Adapt: Iterative Transduction with VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saha_2025_ICCV, author = {Saha, Oindrila and Lawrence, Logan and Van Horn, Grant and Maji, Subhransu}, title = {Generate, Transduct, Adapt: Iterative Transduction with VLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1369-1379} }
Beyond One Shot, Beyond One Perspective: Cross-View and Long-Horizon Distillation for Better LiDAR Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Xiang and Kong, Lingdong and Wang, Song and Zhou, Chuanwei and Liu, Qingshan}, title = {Beyond One Shot, Beyond One Perspective: Cross-View and Long-Horizon Distillation for Better LiDAR Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25506-25518} }
Test-Time Retrieval-Augmented Adaptation for Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Xinqi and Chen, Xueli and Yang, Luoxiao and Yap, Chuin Hong and Qureshi, Rizwan and Dou, Qi and Yap, Moi Hoon and Shah, Mubarak}, title = {Test-Time Retrieval-Augmented Adaptation for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8810-8819} }
Causal Disentanglement and Cross-Modal Alignment for Enhanced Few-Shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Tianjiao and Zhang, Zhen and Liu, Yuhang and Shi, Javen Qinfeng}, title = {Causal Disentanglement and Cross-Modal Alignment for Enhanced Few-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {890-900} }
Seeing and Seeing Through the Glass: Real and Synthetic Data for Multi-Layer Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2025_ICCV, author = {Wen, Hongyu and Zuo, Yiming and Subramanian, Venkat and Chen, Patrick and Deng, Jia}, title = {Seeing and Seeing Through the Glass: Real and Synthetic Data for Multi-Layer Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6715-6725} }
GaussianUpdate: Continual 3D Gaussian Splatting Update for Changing Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Lin and Zhao, Boming and Hu, Jiarui and Shen, Xujie and Dang, Ziqiang and Bao, Hujun and Cui, Zhaopeng}, title = {GaussianUpdate: Continual 3D Gaussian Splatting Update for Changing Environments}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25800-25809} }
NavMorph: A Self-Evolving World Model for Vision-and-Language Navigation in Continuous Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_ICCV, author = {Yao, Xuan and Gao, Junyu and Xu, Changsheng}, title = {NavMorph: A Self-Evolving World Model for Vision-and-Language Navigation in Continuous Environments}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5536-5546} }
SiM3D: Single-instance Multiview Multimodal and Multisetup 3D Anomaly Detection Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Costanzino_2025_ICCV, author = {Costanzino, Alex and Ramirez, Pierluigi Zama and Lella, Luigi and Ragaglia, Matteo and Oliva, Alessandro and Lisanti, Giuseppe and Di Stefano, Luigi}, title = {SiM3D: Single-instance Multiview Multimodal and Multisetup 3D Anomaly Detection Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20944-20953} }
Distilling Diffusion Models to Efficient 3D LiDAR Scene Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shengyuan and Zhao, An and Yang, Ling and Li, Zejian and Meng, Chenye and Xu, Haoran and Chen, Tianrun and Wei, AnYang and Gu, Perry Pengyun and Sun, Lingyun}, title = {Distilling Diffusion Models to Efficient 3D LiDAR Scene Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5007-5016} }
Self-supervised Learning of Hybrid Part-aware 3D Representations of 2D Gaussians and Superquadrics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Zhirui and Yi, Renjiao and Huang, Yuhang and Chen, Wei and Zhu, Chenyang and Xu, Kai}, title = {Self-supervised Learning of Hybrid Part-aware 3D Representations of 2D Gaussians and Superquadrics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9649-9659} }
Teaching AI the Anatomy Behind the Scan: Addressing Anatomical Flaws in Medical Image Segmentation with Learnable Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeon_2025_ICCV, author = {Jeon, Young Seok and Yang, Hongfei and Fu, Huazhu and Feng, Mengling}, title = {Teaching AI the Anatomy Behind the Scan: Addressing Anatomical Flaws in Medical Image Segmentation with Learnable Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24024-24033} }
OCK: Unsupervised Dynamic Video Prediction with Object-Centric Kinematics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_ICCV, author = {Song, Yeon-Ji and Kim, Jaein and Choi, Suhyung and Kim, Jin-Hwa and Zhang, Byoung-Tak}, title = {OCK: Unsupervised Dynamic Video Prediction with Object-Centric Kinematics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11359-11368} }
Holistic Unlearning Benchmark: A Multi-Faceted Evaluation for Text-to-Image Diffusion Model Unlearning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moon_2025_ICCV, author = {Moon, Saemi and Lee, Minjong and Park, Sangdon and Kim, Dongwoo}, title = {Holistic Unlearning Benchmark: A Multi-Faceted Evaluation for Text-to-Image Diffusion Model Unlearning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16356-16366} }
AutoComPose: Automatic Generation of Pose Transition Descriptions for Composed Pose Retrieval Using Multimodal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2025_ICCV, author = {Shen, Yi-Ting and Eum, Sungmin and Lee, Doheon and Shete, Rohit and Wang, Chiao-Yi and Kwon, Heesung and Bhattacharyya, Shuvra S.}, title = {AutoComPose: Automatic Generation of Pose Transition Descriptions for Composed Pose Retrieval Using Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7409-7418} }
WorldScore: A Unified Evaluation Benchmark for World Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duan_2025_ICCV, author = {Duan, Haoyi and Yu, Hong-Xing and Chen, Sirui and Fei-Fei, Li and Wu, Jiajun}, title = {WorldScore: A Unified Evaluation Benchmark for World Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27713-27724} }
PhysTwin: Physics-Informed Reconstruction and Simulation of Deformable Objects from Videos-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Hanxiao and Hsu, Hao-Yu and Zhang, Kaifeng and Yu, Hsin-Ni and Wang, Shenlong and Li, Yunzhu}, title = {PhysTwin: Physics-Informed Reconstruction and Simulation of Deformable Objects from Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7219-7230} }
Physical Degradation Model-Guided Interferometric Hyperspectral Reconstruction with Unfolding Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yuansheng and Zou, Yunhao and Chen, Linwei and Fu, Ying}, title = {Physical Degradation Model-Guided Interferometric Hyperspectral Reconstruction with Unfolding Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13815-13825} }
EvaGaussians: Event Stream Assisted Gaussian Splatting from Blurry Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Wangbo and Feng, Chaoran and Li, Jianing and Tang, Jiye and Yang, Jiashu and Tang, Zhenyu and Cao, Meng and Jia, Xu and Yang, Yuchao and Yuan, Li and Tian, Yonghong}, title = {EvaGaussians: Event Stream Assisted Gaussian Splatting from Blurry Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24780-24790} }
RainbowPrompt: Diversity-Enhanced Prompt-Evolving for Continual Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hong_2025_ICCV, author = {Hong, Kiseong and Kim, Gyeong-hyeon and Kim, Eunwoo}, title = {RainbowPrompt: Diversity-Enhanced Prompt-Evolving for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1130-1140} }
OmniHuman-1: Rethinking the Scaling-Up of One-Stage Conditioned Human Animation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Gaojie and Jiang, Jianwen and Yang, Jiaqi and Zheng, Zerong and Liang, Chao and Zhang, Yuan and Liu, Jingtuo}, title = {OmniHuman-1: Rethinking the Scaling-Up of One-Stage Conditioned Human Animation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13847-13858} }
"Principal Components" Enable A New Language of Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2025_ICCV, author = {Wen, Xin and Zhao, Bingchen and Elezi, Ismail and Deng, Jiankang and Qi, Xiaojuan}, title = {''Principal Components'' Enable A New Language of Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16641-16651} }
Fair Generation without Unfair Distortions: Debiasing Text-to-Image Generation with Entanglement-Free Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Jeonghoon and Lee, Juyoung and Chung, Chaeyeon and Lee, Jaeseong and Choo, Jaegul and Gu, Jindong}, title = {Fair Generation without Unfair Distortions: Debiasing Text-to-Image Generation with Entanglement-Free Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17567-17576} }
GM-MoE: Low-Light Enhancement with Gated-Mechanism Mixture-of-Experts-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2025_ICCV, author = {Liao, Minwen and Dong, Haobo and Wang, Xinyi and Ubul, Kurban and Shao, Yihua and Yan, Ziyang}, title = {GM-MoE: Low-Light Enhancement with Gated-Mechanism Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8766-8776} }
Beyond Low-Rank Tuning: Model Prior-Guided Rank Allocation for Effective Transfer in Low-Data and Large-Gap Regimes.-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Chuyan and Wang, Kefan and Gu, Yun}, title = {Beyond Low-Rank Tuning: Model Prior-Guided Rank Allocation for Effective Transfer in Low-Data and Large-Gap Regimes.}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3337-3345} }
Reference-based Super-Resolution via Image-based Retrieval-Augmented Generation Diffusion-
[pdf]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Byeonghun and Cho, Hyunmin and Choi, Hong Gyu and Kang, Soo Min and Ahn, Iljun and Jin, Kyong Hwan}, title = {Reference-based Super-Resolution via Image-based Retrieval-Augmented Generation Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10764-10774} }
Backdoor Mitigation by Distance-Driven Detoxification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Shaokui and Liu, Jiayin and Zha, Hongyuan}, title = {Backdoor Mitigation by Distance-Driven Detoxification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4465-4474} }
FakeRadar: Probing Forgery Outliers to Detect Unknown Deepfake Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhaolun and Li, Jichang and Cai, Yinqi and Chen, Junye and Luo, Xiaonan and Li, Guanbin and Lan, Rushi}, title = {FakeRadar: Probing Forgery Outliers to Detect Unknown Deepfake Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13382-13392} }
CuMPerLay: Learning Cubical Multiparameter Persistence Vectorizations-
[pdf]
[supp]
[bibtex]@InProceedings{Korkmaz_2025_ICCV, author = {Korkmaz, Caner and Nuwagira, Brighton and Coskunuzer, Baris and Birdal, Tolga}, title = {CuMPerLay: Learning Cubical Multiparameter Persistence Vectorizations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27084-27094} }
Towards Privacy-preserved Pre-training of Remote Sensing Foundation Models with Federated Mutual-guidance Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2025_ICCV, author = {Tan, Jieyi and Zhang, Chengwei and Dang, Bo and Li, Yansheng}, title = {Towards Privacy-preserved Pre-training of Remote Sensing Foundation Models with Federated Mutual-guidance Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1804-1814} }
SynTag: Enhancing the Geometric Robustness of Inversion-based Generative Image Watermarking-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_ICCV, author = {Fang, Han and Chen, Kejiang and Ma, Zehua and Deng, Jiajun and Li, Yicong and Zhang, Weiming and Chang, Ee-Chien}, title = {SynTag: Enhancing the Geometric Robustness of Inversion-based Generative Image Watermarking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15416-15425} }
Recovering Parametric Scenes from Very Few Time-of-Flight Pixels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sifferman_2025_ICCV, author = {Sifferman, Carter and Li, Yiquan and Li, Yiming and Mu, Fangzhou and Gleicher, Michael and Gupta, Mohit and Li, Yin}, title = {Recovering Parametric Scenes from Very Few Time-of-Flight Pixels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27989-27999} }
DIMCIM: A Quantitative Evaluation Framework for Default-mode Diversity and Generalization in Text-to-Image Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Teotia_2025_ICCV, author = {Teotia, Revant and Ross, Candace and Ullrich, Karen and Chopra, Sumit and Romero-Soriano, Adriana and Hall, Melissa and Muckley, Matthew}, title = {DIMCIM: A Quantitative Evaluation Framework for Default-mode Diversity and Generalization in Text-to-Image Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16431-16440} }
SAUCE: Selective Concept Unlearning in Vision-Language Models with Sparse Autoencoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Geng_2025_ICCV, author = {Geng, Jiahui and Li, Qing}, title = {SAUCE: Selective Concept Unlearning in Vision-Language Models with Sparse Autoencoders}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3023-3033} }
ForCenNet: Foreground-Centric Network for Document Image Rectification-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cai_2025_ICCV, author = {Cai, Peng and Li, Qiang and Yang, Kaicheng and Guo, Dong and Li, Jia and Zhou, Nan and An, Xiang and Yang, Ninghua and Deng, Jiankang}, title = {ForCenNet: Foreground-Centric Network for Document Image Rectification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15137-15146} }
Adversarial Data Augmentation for Single Domain Generalization via Lyapunov Exponent-Guided Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zuyu and Chen, Ning and Liu, Yongshan and Zhang, Qinghua and Zhang, Xu}, title = {Adversarial Data Augmentation for Single Domain Generalization via Lyapunov Exponent-Guided Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {552-561} }
Always Skip Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2025_ICCV, author = {Ji, Yiping and Saratchandran, Hemanth and Moghadam, Peyman and Lucey, Simon}, title = {Always Skip Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23115-23123} }
ExCap3D: Expressive 3D Scene Understanding via Object Captioning with Varying Detail-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yeshwanth_2025_ICCV, author = {Yeshwanth, Chandan and Rozenberszki, D\'avid and Dai, Angela}, title = {ExCap3D: Expressive 3D Scene Understanding via Object Captioning with Varying Detail}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21699-21709} }
CMAD: Correlation-Aware and Modalities-Aware Distillation for Multimodal Sentiment Analysis with Missing Modalities-
[pdf]
[supp]
[bibtex]@InProceedings{Zhuang_2025_ICCV, author = {Zhuang, Yan and Liu, Minhao and Bai, Wei and Zhang, Yanru and Zhang, Xiaoyue and Deng, Jiawen and Ren, Fuji}, title = {CMAD: Correlation-Aware and Modalities-Aware Distillation for Multimodal Sentiment Analysis with Missing Modalities}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4626-4636} }
Describe, Don't Dictate: Semantic Image Editing with Natural Language Intent-
[pdf]
[supp]
[bibtex]@InProceedings{Ci_2025_ICCV, author = {Ci, En and Guan, Shanyan and Ge, Yanhao and Zhang, Yilin and Li, Wei and Zhang, Zhenyu and Yang, Jian and Tai, Ying}, title = {Describe, Don't Dictate: Semantic Image Editing with Natural Language Intent}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19185-19194} }
LiT: Delving into a Simple Linear Diffusion Transformer for Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Jiahao and Kang, Ning and Yao, Lewei and Chen, Mengzhao and Wu, Chengyue and Zhang, Songyang and Xue, Shuchen and Liu, Yong and Wu, Taiqiang and Liu, Xihui and Zhang, Kaipeng and Zhang, Shifeng and Shao, Wenqi and Li, Zhenguo and Luo, Ping}, title = {LiT: Delving into a Simple Linear Diffusion Transformer for Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16068-16078} }
LeGrad: An Explainability Method for Vision Transformers via Feature Formation Sensitivity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bousselham_2025_ICCV, author = {Bousselham, Walid and Boggust, Angie and Chaybouti, Sofian and Strobelt, Hendrik and Kuehne, Hilde}, title = {LeGrad: An Explainability Method for Vision Transformers via Feature Formation Sensitivity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20336-20345} }
GaussianFlowOcc: Sparse and Weakly Supervised Occupancy Estimation using Gaussian Splatting and Temporal Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Boeder_2025_ICCV, author = {Boeder, Simon and Gigengack, Fabian and Risse, Benjamin}, title = {GaussianFlowOcc: Sparse and Weakly Supervised Occupancy Estimation using Gaussian Splatting and Temporal Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24943-24954} }
DreamLayer: Simultaneous Multi-Layer Generation via Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Junjia and Yan, Pengxiang and Cai, Jinhang and Liu, Jiyang and Wang, Zhao and Wang, Yitong and Wu, Xinglong and Li, Guanbin}, title = {DreamLayer: Simultaneous Multi-Layer Generation via Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3357-3366} }
OD-RASE: Ontology-Driven Risk Assessment and Safety Enhancement for Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Shimomura_2025_ICCV, author = {Shimomura, Kota and Nambata, Masaki and Ishikawa, Atsuya and Mimura, Ryota and Inoue, Koki and Yamashita, Takayoshi and Kawabuchi, Takayuki}, title = {OD-RASE: Ontology-Driven Risk Assessment and Safety Enhancement for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26167-26177} }
FREE-Merging: Fourier Transform for Efficient Model Merging-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Shenghe and Wang, Hongzhi}, title = {FREE-Merging: Fourier Transform for Efficient Model Merging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3863-3873} }
ForgeLens: Data-Efficient Forgery Focus for Generalizable Forgery Image Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yingjian and Zhang, Lei and Niu, Yakun}, title = {ForgeLens: Data-Efficient Forgery Focus for Generalizable Forgery Image Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16270-16280} }
Serialization based Point Cloud Oversegmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Chenghui and Kwan, Jianlong and Li, Dilong and Chen, Ziyi and Guan, Haiyan}, title = {Serialization based Point Cloud Oversegmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25831-25840} }
Latent Expression Generation for Referring Image Segmentation and Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Seonghoon and Hong, Joonbeom and Lee, Joonseok and Son, Jeany}, title = {Latent Expression Generation for Referring Image Segmentation and Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21374-21383} }
NoiseController: Towards Consistent Multi-view Video Generation via Noise Decomposition and Collaboration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Haotian and Wang, Xin and Lin, Di and Wu, Yipeng and Chen, Qin and Liu, Ruonan and Yang, Kairui and Li, Ping and Guo, Qing}, title = {NoiseController: Towards Consistent Multi-view Video Generation via Noise Decomposition and Collaboration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14443-14452} }
Rethinking Cross-Modal Interaction in Multimodal Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2025_ICCV, author = {Lv, Zhengyao and Pan, Tianlin and Si, Chenyang and Chen, Zhaoxi and Zuo, Wangmeng and Liu, Ziwei and Wong, Kwan-Yee K.}, title = {Rethinking Cross-Modal Interaction in Multimodal Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5934-5943} }
MOERL: When Mixture-of-Experts Meet Reinforcement Learning for Adverse Weather Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Tao and Xia, Peiwen and Li, Bo and Jiang, Peng-Tao and Kong, Zhe and Zhang, Kaihao and Lu, Tong and Luo, Wenhan}, title = {MOERL: When Mixture-of-Experts Meet Reinforcement Learning for Adverse Weather Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13673-13683} }
DACoN: DINO for Anime Paint Bucket Colorization with Any Number of Reference Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nagata_2025_ICCV, author = {Nagata, Kazuma and Kaneko, Naoshi}, title = {DACoN: DINO for Anime Paint Bucket Colorization with Any Number of Reference Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17899-17908} }
Beyond Single Images: Retrieval Self-Augmented Unsupervised Camouflaged Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Ji and Wang, Xin and Hao, Fangwei and Yu, Mingyang and Chen, Chunyuan and Wu, Jiesheng and Wang, Bin and Xu, Jing and Li, Ping}, title = {Beyond Single Images: Retrieval Self-Augmented Unsupervised Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22131-22142} }
Multi-identity Human Image Animation with Structural Video Diffusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zhenzhi and Li, Yixuan and Zeng, Yanhong and Guo, Yuwei and Lin, Dahua and Xue, Tianfan and Dai, Bo}, title = {Multi-identity Human Image Animation with Structural Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11937-11947} }
UniVG: A Generalist Diffusion Model for Unified Image Generation and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_ICCV, author = {Fu, Tsu-Jui and Qian, Yusu and Chen, Chen and Hu, Wenze and Gan, Zhe and Yang, Yinfei}, title = {UniVG: A Generalist Diffusion Model for Unified Image Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17160-17170} }
CLOT: Closed Loop Optimal Transport for Unsupervised Action Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bueno-Benito_2025_ICCV, author = {Bueno-Benito, Elena and Dimiccoli, Mariella}, title = {CLOT: Closed Loop Optimal Transport for Unsupervised Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10719-10729} }
GeoSplatting: Towards Geometry Guided Gaussian Splatting for Physically-based Inverse Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_ICCV, author = {Ye, Kai and Gao, Chong and Li, Guanbin and Chen, Wenzheng and Chen, Baoquan}, title = {GeoSplatting: Towards Geometry Guided Gaussian Splatting for Physically-based Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28991-29000} }
Temporal-aware Query Routing for Real-time Video Instance Segmentation-
[pdf]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Zesen and Li, Kehan and Zhao, Yian and Zhang, Hang and Liu, Chang and Chen, Jie}, title = {Temporal-aware Query Routing for Real-time Video Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22467-22476} }
Accelerating Diffusion Sampling via Exploiting Local Transition Coherence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Shangwen and Zhang, Han and Yang, Zhantao and Peng, Qianyu and Pu, Zhao and Wang, Huangji and Cheng, Fan}, title = {Accelerating Diffusion Sampling via Exploiting Local Transition Coherence}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18284-18293} }
What Changed? Detecting and Evaluating Instruction-Guided Image Edits with Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Baraldi_2025_ICCV, author = {Baraldi, Lorenzo and Bucciarelli, Davide and Betti, Federico and Cornia, Marcella and Baraldi, Lorenzo and Sebe, Nicu and Cucchiara, Rita}, title = {What Changed? Detecting and Evaluating Instruction-Guided Image Edits with Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16217-16226} }
Saliency-Aware Quantized Imitation Learning for Efficient Robotic Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Seongmin and Kim, Hyungmin and Kim, Sangwoo and Jeon, Wonseok and Yang, Juyoung and Jeon, Byeongwook and Oh, Yoonseon and Choi, Jungwook}, title = {Saliency-Aware Quantized Imitation Learning for Efficient Robotic Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13140-13150} }
Where am I? Cross-View Geo-localization with Natural Language Descriptions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ye_2025_ICCV, author = {Ye, Junyan and Lin, Honglin and Ou, Leyan and Chen, Dairong and Wang, Zihao and Zhu, Qi and He, Conghui and Li, Weijia}, title = {Where am I? Cross-View Geo-localization with Natural Language Descriptions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5890-5900} }
Boost 3D Reconstruction using Diffusion-based Monocular Camera Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2025_ICCV, author = {Deng, Junyuan and Yin, Wei and Guo, Xiaoyang and Zhang, Qian and Hu, Xiaotao and Ren, Weiqiang and Long, Xiao-Xiao and Tan, Ping}, title = {Boost 3D Reconstruction using Diffusion-based Monocular Camera Calibration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7110-7121} }
GTR: Guided Thought Reinforcement Prevents Thought Collapse in RL-based VLM Agent Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Tong and Yang, Yijun and Xing, Junliang and Shi, Yuanchun and Lu, Zongqing and Ye, Deheng}, title = {GTR: Guided Thought Reinforcement Prevents Thought Collapse in RL-based VLM Agent Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18855-18865} }
Multi-view Gaze Target Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miao_2025_ICCV, author = {Miao, Qiaomu and Golani, Vivek Raju and Xu, Jingyi and Dutta, Progga Paromita and Hoai, Minh and Samaras, Dimitris}, title = {Multi-view Gaze Target Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5371-5381} }
Semantic Discrepancy-aware Detector for Image Forgery Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Ziye and Yu, Minghang and Xu, Chunyan and Cui, Zhen}, title = {Semantic Discrepancy-aware Detector for Image Forgery Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18388-18398} }
CapeLLM: Support-Free Category-Agnostic Pose Estimation with Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Junho and Chung, Hyungjin and Kim, Byung-Hoon}, title = {CapeLLM: Support-Free Category-Agnostic Pose Estimation with Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22889-22898} }
VisHall3D: Monocular Semantic Scene Completion from Reconstructing the Visible Regions to Hallucinating the Invisible Regions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Haoang and Su, Yuanqi and Zhang, Xiaoning and Gao, Longjun and Xue, Yu and Wang, Le}, title = {VisHall3D: Monocular Semantic Scene Completion from Reconstructing the Visible Regions to Hallucinating the Invisible Regions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28674-28684} }
Controllable 3D Outdoor Scene Generation via Scene Graphs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yuheng and Li, Xinke and Zhang, Yuning and Qi, Lu and Li, Xin and Wang, Wenping and Li, Chongshou and Li, Xueting and Yang, Ming-Hsuan}, title = {Controllable 3D Outdoor Scene Generation via Scene Graphs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28052-28062} }
JPEG Processing Neural Operator for Backward-Compatible Coding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Woo Kyoung and Lee, Yongjun and Lee, Byeonghun and Park, Sang Hyun and Im, Sunghoon and Jin, Kyong Hwan}, title = {JPEG Processing Neural Operator for Backward-Compatible Coding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19503-19512} }
DPoser-X: Diffusion Model as Robust 3D Whole-body Human Pose Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Junzhe and Lin, Jing and Dou, Hongkun and Zeng, Ailing and Deng, Yue and Liu, Xian and Cai, Zhongang and Yang, Lei and Zhang, Yulun and Wang, Haoqian and Liu, Ziwei}, title = {DPoser-X: Diffusion Model as Robust 3D Whole-body Human Pose Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9988-9997} }
Learning 4D Embodied World Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhen_2025_ICCV, author = {Zhen, Haoyu and Sun, Qiao and Zhang, Hongxin and Li, Junyan and Zhou, Siyuan and Du, Yilun and Gan, Chuang}, title = {Learning 4D Embodied World Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5337-5347} }
FlowTok: Flowing Seamlessly Across Text and Image Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Ju and Yu, Qihang and Liu, Qihao and Chen, Liang-Chieh}, title = {FlowTok: Flowing Seamlessly Across Text and Image Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16629-16640} }
PROGRESSOR: A Perceptually Guided Reward Estimator with Self-Supervised Online Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ayalew_2025_ICCV, author = {Ayalew, Tewodros W. and Zhang, Xiao and Wu, Kevin Yuanbo and Jiang, Tianchong and Maire, Michael and Walter, Matthew R.}, title = {PROGRESSOR: A Perceptually Guided Reward Estimator with Self-Supervised Online Refinement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10297-10306} }
UniOcc: A Unified Benchmark for Occupancy Forecasting and Prediction in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuping and Huang, Xiangyu and Sun, Xiaokang and Yan, Mingxuan and Xing, Shuo and Tu, Zhengzhong and Li, Jiachen}, title = {UniOcc: A Unified Benchmark for Occupancy Forecasting and Prediction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25560-25570} }
AdaHuman: Animatable Detailed 3D Human Generation with Compositional Multiview Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Yangyi and Yuan, Ye and Li, Xueting and Kautz, Jan and Iqbal, Umar}, title = {AdaHuman: Animatable Detailed 3D Human Generation with Compositional Multiview Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13533-13543} }
ReCamMaster: Camera-Controlled Generative Rendering from A Single Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2025_ICCV, author = {Bai, Jianhong and Xia, Menghan and Fu, Xiao and Wang, Xintao and Mu, Lianrui and Cao, Jinwen and Liu, Zuozhu and Hu, Haoji and Bai, Xiang and Wan, Pengfei and Zhang, Di}, title = {ReCamMaster: Camera-Controlled Generative Rendering from A Single Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14834-14844} }
MaskHand: Generative Masked Modeling for Robust Hand Mesh Reconstruction in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saleem_2025_ICCV, author = {Saleem, Muhammad Usama and Pinyoanuntapong, Ekkasit and Patel, Mayur Jagdishbhai and Xue, Hongfei and Helmy, Ahmed and Das, Srijan and Wang, Pu}, title = {MaskHand: Generative Masked Modeling for Robust Hand Mesh Reconstruction in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8372-8383} }
Image-Guided Shape-from-Template Using Mesh Inextensibility Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2025_ICCV, author = {Tran, Thuy and Chen, Ruochen and Parashar, Shaifali}, title = {Image-Guided Shape-from-Template Using Mesh Inextensibility Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7419-7428} }
Confound from All Sides, Distill with Resilience: Multi-Objective Adversarial Paths to Zero-Shot Robustness-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Junhao and Liu, Jiao and Qu, Xinghua and Ong, Yew-Soon}, title = {Confound from All Sides, Distill with Resilience: Multi-Objective Adversarial Paths to Zero-Shot Robustness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {624-634} }
NGD: Neural Gradient Based Deformation for Monocular Garment Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dasgupta_2025_ICCV, author = {Dasgupta, Soham and Naik, Shanthika and Savalia, Preet and Ingle, Sujay Kumar and Sharma, Avinash}, title = {NGD: Neural Gradient Based Deformation for Monocular Garment Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25485-25495} }
Learned Image Compression with Hierarchical Progressive Context Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yuqi and Zhang, Haotian and Li, Li and Liu, Dong}, title = {Learned Image Compression with Hierarchical Progressive Context Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18834-18843} }
U-ViLAR: Uncertainty-Aware Visual Localization for Autonomous Driving via Differentiable Association and Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Xiaofan and Xu, Zhihao and Wu, Chenming and Yang, Zhao and Zhang, Yumeng and Liu, Jiang-Jiang and Yu, Haibao and Ye, Xiaoqing and Wang, Yuan and Li, Shirui and Sun, Xun and Wan, Ji and Wang, Jun}, title = {U-ViLAR: Uncertainty-Aware Visual Localization for Autonomous Driving via Differentiable Association and Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24889-24898} }
CF3: Compact and Fast 3D Feature Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Hyunjoon and Min, Joonkyu and Park, Jaesik}, title = {CF3: Compact and Fast 3D Feature Fields}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27906-27916} }
Joint Self-Supervised Video Alignment and Action Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ali_2025_ICCV, author = {Ali, Ali Shah and Mahmood, Syed Ahmed and Saeed, Mubin and Konin, Andrey and Zia, M. Zeeshan and Tran, Quoc-Huy}, title = {Joint Self-Supervised Video Alignment and Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10807-10818} }
2.5 Years in Class: A Multimodal Textbook for Vision-Language Pretraining-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wenqi and Zhang, Hang and Li, Xin and Sun, Jiashuo and Shen, Yongliang and Lu, Weiming and Zhao, Deli and Zhuang, Yueting and Bing, Lidong}, title = {2.5 Years in Class: A Multimodal Textbook for Vision-Language Pretraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4647-4658} }
Learning 3D Scene Analogies with Neural Contextual Scene Maps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Junho and Bae, Gwangtak and Lee, Eun Sun and Kim, Young Min}, title = {Learning 3D Scene Analogies with Neural Contextual Scene Maps}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7828-7840} }
Are They the Same? Exploring Visual Correspondence Shortcomings of Multimodal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Yikang and Zhang, Tao and Xu, Shilin and Chen, Shihao and Zhou, Qianyu and Tong, Yunhai and Ji, Shunping and Zhang, Jiangning and Qi, Lu and Li, Xiangtai}, title = {Are They the Same? Exploring Visual Correspondence Shortcomings of Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17663-17674} }
FlowSeek: Optical Flow Made Easier with Depth Foundation Models and Motion Bases-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Poggi_2025_ICCV, author = {Poggi, Matteo and Tosi, Fabio}, title = {FlowSeek: Optical Flow Made Easier with Depth Foundation Models and Motion Bases}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5667-5679} }
EventUPS: Uncalibrated Photometric Stereo Using an Event Camera-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Jinxiu and Yu, Bohan and Yang, Siqi and Zhuang, Haotian and Ren, Jieji and Duan, Peiqi and Shi, Boxin}, title = {EventUPS: Uncalibrated Photometric Stereo Using an Event Camera}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7516-7525} }
WildSAT: Learning Satellite Image Representations from Wildlife Observations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Daroya_2025_ICCV, author = {Daroya, Rangel and Cole, Elijah and Mac Aodha, Oisin and Van Horn, Grant and Maji, Subhransu}, title = {WildSAT: Learning Satellite Image Representations from Wildlife Observations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6143-6154} }
Flow to the Mode: Mode-Seeking Diffusion Autoencoders for State-of-the-Art Image Tokenization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sargent_2025_ICCV, author = {Sargent, Kyle and Hsu, Kyle and Johnson, Justin and Fei-Fei, Li and Wu, Jiajun}, title = {Flow to the Mode: Mode-Seeking Diffusion Autoencoders for State-of-the-Art Image Tokenization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19471-19481} }
UniPhys: Unified Planner and Controller with Diffusion for Flexible Physics-Based Character Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Yan and Karunratanakul, Korrawe and Luo, Zhengyi and Tang, Siyu}, title = {UniPhys: Unified Planner and Controller with Diffusion for Flexible Physics-Based Character Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13214-13224} }
SPADE: Spatial-Aware Denoising Network for Open-vocabulary Panoptic Scene Graph Generation with Long- and Local-range Context Reasoning-
[pdf]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Xin and Qin, Ke and Duan, Guiduo and Li, Ming and Li, Yuan-Fang and He, Tao}, title = {SPADE: Spatial-Aware Denoising Network for Open-vocabulary Panoptic Scene Graph Generation with Long- and Local-range Context Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15562-15572} }
Few-Shot Pattern Detection via Template Matching and Regression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jo_2025_ICCV, author = {Jo, Eunchan and Kang, Dahyun and Kim, Sanghyun and Choi, Yunseon and Cho, Minsu}, title = {Few-Shot Pattern Detection via Template Matching and Regression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21578-21588} }
Humans as Checkerboards: Calibrating Camera Motion Scale for World-Coordinate Human Mesh Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Fengyuan and Gu, Kerui and Nguyen, Ha Linh and Tse, Tze Ho Elden and Yao, Angela}, title = {Humans as Checkerboards: Calibrating Camera Motion Scale for World-Coordinate Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6069-6079} }
Leveraging BEV Paradigm for Ground-to-Aerial Image Synthesis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ye_2025_ICCV, author = {Ye, Junyan and He, Jun and Li, Weijia and Lv, Zhutao and Lin, Yi and Yu, Jinhua and Yang, Haote and He, Conghui}, title = {Leveraging BEV Paradigm for Ground-to-Aerial Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28451-28461} }
Deciphering Cross-Modal Alignment in Large Vision-Language Models via Modality Integration Rate-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Qidong and Dong, Xiaoyi and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and Wang, Jiaqi and Zhang, Weiming and Yu, Nenghai}, title = {Deciphering Cross-Modal Alignment in Large Vision-Language Models via Modality Integration Rate}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {218-227} }
FPEM: Face Prior Enhanced Facial Attractiveness Prediction for Live Videos with Face Retouching-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Hui and Ren, Xiaoyu and Yu, Hongjiu and Chen, Ying and Li, Kai and Wang, L and Min, Xiongkuo and Duan, Huiyu and Zhai, Guangtao and Liu, Xu}, title = {FPEM: Face Prior Enhanced Facial Attractiveness Prediction for Live Videos with Face Retouching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11458-11468} }
MultiverSeg: Scalable Interactive Segmentation of Biomedical Imaging Datasets with In-Context Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wong_2025_ICCV, author = {Wong, Hallee E. and Ortiz, Jose Javier Gonzalez and Guttag, John and Dalca, Adrian V.}, title = {MultiverSeg: Scalable Interactive Segmentation of Biomedical Imaging Datasets with In-Context Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20966-20980} }
Is Meta-Learning Out? Rethinking Unsupervised Few-Shot Classification with Limited Entropy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2025_ICCV, author = {Guan, Yunchuan and Liu, Yu and Zhou, Ke and Shen, Zhiqi and Hwang, Jenq-Neng and Belongie, Serge and Li, Lei}, title = {Is Meta-Learning Out? Rethinking Unsupervised Few-Shot Classification with Limited Entropy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4188-4197} }
Synergistic Prompting for Robust Visual Recognition with Missing Modalities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zhihui and Dai, Luanyuan and Lin, Qika and Diao, Yunfeng and Jin, Guangyin and Guo, Yufei and Zhang, Jing and Hao, Xiaoshuai}, title = {Synergistic Prompting for Robust Visual Recognition with Missing Modalities}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1881-1890} }
DAMap: Distance-aware MapNet for High Quality HD Map Construction-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Jinpeng and Li, Chen and Lin, Yutong and Fu, Jingwen and Zhou, Sanping and Zheng, Nanning}, title = {DAMap: Distance-aware MapNet for High Quality HD Map Construction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5285-5294} }
Magic Insert: Style-Aware Drag-and-Drop-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ruiz_2025_ICCV, author = {Ruiz, Nataniel and Li, Yuanzhen and Wadhwa, Neal and Pritch, Yael and Rubinstein, Michael and Jacobs, David E. and Fruchter, Shlomi}, title = {Magic Insert: Style-Aware Drag-and-Drop}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15971-15981} }
ProGait: A Multi-Purpose Video Dataset and Benchmark for Transfemoral Prosthesis Users-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_ICCV, author = {Yin, Xiangyu and Yang, Boyuan and Liu, Weichen and Xue, Qiyao and Alamri, Abrar and Fiedler, Goeran and Gao, Wei}, title = {ProGait: A Multi-Purpose Video Dataset and Benchmark for Transfemoral Prosthesis Users}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8984-8993} }
MuGS: Multi-Baseline Generalizable Gaussian Splatting Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lou_2025_ICCV, author = {Lou, Yaopeng and Shen, Liao and Liu, Tianqi and Li, Jiaqi and Huang, Zihao and Sun, Huiqiang and Cao, Zhiguo}, title = {MuGS: Multi-Baseline Generalizable Gaussian Splatting Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25583-25593} }
HADES: Human Avatar with Dynamic Explicit Hair Strands-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2025_ICCV, author = {Liao, Zhanfeng and Tu, Hanzhang and Peng, Cheng and Zhang, Hongwen and Zhou, Boyao and Liu, Yebin}, title = {HADES: Human Avatar with Dynamic Explicit Hair Strands}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12318-12327} }
Aligning Effective Tokens with Video Anomaly in Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yingxian and Liu, Jiahui and Fan, Ruidi and Li, Yanwei and Chang, Chirui and Zhao, Shizhen and Fok, Wilton W. T. and Qi, Xiaojuan and Wu, Yik-Chung}, title = {Aligning Effective Tokens with Video Anomaly in Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22695-22706} }
PseudoMapTrainer: Learning Online Mapping without HD Maps-
[pdf]
[supp]
[bibtex]@InProceedings{Lowens_2025_ICCV, author = {L\"owens, Christian and Funke, Thorben and Xie, Jingchao and Condurache, Alexandru Paul}, title = {PseudoMapTrainer: Learning Online Mapping without HD Maps}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5263-5272} }
From Reflection to Perfection: Scaling Inference-Time Optimization for Text-to-Image Diffusion Models via Reflection Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuo_2025_ICCV, author = {Zhuo, Le and Zhao, Liangbing and Paul, Sayak and Liao, Yue and Zhang, Renrui and Xin, Yi and Gao, Peng and Elhoseiny, Mohamed and Li, Hongsheng}, title = {From Reflection to Perfection: Scaling Inference-Time Optimization for Text-to-Image Diffusion Models via Reflection Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15329-15339} }
FreeDance: Towards Harmonic Free-Number Group Dance Generation via a Unified Framework-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Yiwen and Wang, Yang and Wen, Liting and Zhang, Hengyuan and Qi, Xingqun}, title = {FreeDance: Towards Harmonic Free-Number Group Dance Generation via a Unified Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10560-10569} }
Beyond the Frame: Generating 360deg Panoramic Videos from Perspective Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Rundong and Wallingford, Matthew and Fahardi, Ali and Snavely, Noah and Ma, Wei-Chiu}, title = {Beyond the Frame: Generating 360deg Panoramic Videos from Perspective Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14336-14345} }
Multi-turn Consistent Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zijun and Deng, Yingying and He, Xiangyu and Dong, Weiming and Tang, Fan}, title = {Multi-turn Consistent Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15792-15801} }
HUMOTO: A 4D Dataset of Mocap Human Object Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Jiaxin and Huang, Chun-Hao Paul and Bhattacharya, Uttaran and Huang, Qixing and Zhou, Yi}, title = {HUMOTO: A 4D Dataset of Mocap Human Object Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10886-10897} }
Learning Visual Hierarchies in Hyperbolic Space for Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Ziwei and Ramasinghe, Sameera and Xu, Chenchen and Monteil, Julien and Bazzani, Loris and Ajanthan, Thalaiyasingam}, title = {Learning Visual Hierarchies in Hyperbolic Space for Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9924-9934} }
AutoPrompt: Automated Red-Teaming of Text-to-Image Models via LLM-Driven Adversarial Prompts-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yufan and Zhang, Wanqian and Chen, Huashan and Wang, Lin and Jia, Xiaojun and Lin, Zheng and Wang, Weiping}, title = {AutoPrompt: Automated Red-Teaming of Text-to-Image Models via LLM-Driven Adversarial Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17557-17566} }
LLM-assisted Entropy-based Adaptive Distillation for Unsupervised Fine-grained Visual Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Jianfeng and Luo, Danfeng and Liu, Daizong and Sun, Jie and Qu, Xiaoye and Yang, Xun and Liu, Dongsheng and Wang, Xun}, title = {LLM-assisted Entropy-based Adaptive Distillation for Unsupervised Fine-grained Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {383-392} }
Jailbreaking Multimodal Large Language Models via Shuffle Inconsistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Shiji and Duan, Ranjie and Wang, Fengxiang and Chen, Chi and Kang, Caixin and Ruan, Shouwei and Tao, Jialing and Chen, YueFeng and Xue, Hui and Wei, Xingxing}, title = {Jailbreaking Multimodal Large Language Models via Shuffle Inconsistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2045-2054} }
UMDATrack: Unified Multi-Domain Adaptive Tracking Under Adverse Weather Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_ICCV, author = {Yao, Siyuan and Zhu, Rui and Wang, Ziqi and Ren, Wenqi and Yan, Yanyang and Cao, Xiaochun}, title = {UMDATrack: Unified Multi-Domain Adaptive Tracking Under Adverse Weather Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6466-6475} }
Boosting MLLM Reasoning with Text-Debiased Hint-GRPO-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Qihan and Dai, Weilong and Liu, Jinlong and He, Wanggui and Jiang, Hao and Song, Mingli and Chen, Jingyuan and Yao, Chang and Song, Jie}, title = {Boosting MLLM Reasoning with Text-Debiased Hint-GRPO}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4848-4857} }
Learning on the Go: A Meta-learning Object Navigation Model-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2025_ICCV, author = {Qin, Xiaorong and Song, Xinhang and Zhang, Sixian and Yu, Xinyao and Zhang, Xinmiao and Jiang, Shuqiang}, title = {Learning on the Go: A Meta-learning Object Navigation Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8939-8949} }
Visual Interestingness Decoded: How GPT-4o Mirrors Human Interests-
[pdf]
[supp]
[bibtex]@InProceedings{Abdullahu_2025_ICCV, author = {Abdullahu, Fitim and Grabner, Helmut}, title = {Visual Interestingness Decoded: How GPT-4o Mirrors Human Interests}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15350-15364} }
PriOr-Flow: Enhancing Primitive Panoramic Optical Flow with Orthogonal View-
[pdf]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Longliang and Feng, Miaojie and Cheng, Junda and Xiang, Jijun and Zhu, Xuan and Yang, Xin}, title = {PriOr-Flow: Enhancing Primitive Panoramic Optical Flow with Orthogonal View}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5326-5336} }
A Simple yet Mighty Hartley Diffusion Versatilist for Generalizable Dense Vision Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Bi_2025_ICCV, author = {Bi, Qi and Yi, Jingjun and Huang, Huimin and Zheng, Hao and Zhan, Haolan and Ji, Wei and Huang, Yawen and Li, Yuexiang and Zheng, Yefeng}, title = {A Simple yet Mighty Hartley Diffusion Versatilist for Generalizable Dense Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6748-6760} }
Separation for Better Integration: Disentangling Edge and Motion in Event-based Deblurring-
[pdf]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yufei and Chen, Hao and Deng, Yongjian and You, Wei}, title = {Separation for Better Integration: Disentangling Edge and Motion in Event-based Deblurring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14732-14742} }
Bitrate-Controlled Diffusion for Disentangling Motion and Content in Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Xiao and Chen, Qi and Peng, Xiulian and Yu, Kai and Chen, Xie and Lu, Yan}, title = {Bitrate-Controlled Diffusion for Disentangling Motion and Content in Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12904-12914} }
LGA-Net: Learning Local and Global Affinities for Sparse Scribble based Image Colorization-
[pdf]
[supp]
[bibtex]@InProceedings{Lyu_2025_ICCV, author = {Lyu, Hongjin and Li, Bo and Rosin, Paul L. and Lai, Yu-Kun}, title = {LGA-Net: Learning Local and Global Affinities for Sparse Scribble based Image Colorization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8144-8153} }
Benchmarking Burst Super-Resolution for Polarization Images: Noise Dataset and Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hwang_2025_ICCV, author = {Hwang, Inseung and Choi, Kiseok and Ha, Hyunho and Kim, Min H.}, title = {Benchmarking Burst Super-Resolution for Polarization Images: Noise Dataset and Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24899-24909} }
JointDiT: Enhancing RGB-Depth Joint Modeling with Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Byung-Ki_2025_ICCV, author = {Byung-Ki, Kwon and Dai, Qi and Hyoseok, Lee and Luo, Chong and Oh, Tae-Hyun}, title = {JointDiT: Enhancing RGB-Depth Joint Modeling with Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25261-25271} }
Discretized Gaussian Representation for Tomographic Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Shaokai and Lu, Yuxiang and Guo, Yapan and Ji, Wei and Huang, Suizhi and Yang, Fengyu and Sirejiding, Shalayiding and He, Qichen and Tong, Jing and Ji, Yanbiao and Ding, Yue and Lu, Hongtao}, title = {Discretized Gaussian Representation for Tomographic Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25073-25082} }
ScoreHOI: Physically Plausible Reconstruction of Human-Object Interaction via Score-Guided Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Ao and Liu, Jinpeng and Zhu, Yixuan and Tang, Yansong}, title = {ScoreHOI: Physically Plausible Reconstruction of Human-Object Interaction via Score-Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7592-7602} }
CMB-ML: A Cosmic Microwave Background Dataset for the Oldest Possible Computer Vision Task-
[pdf]
[supp]
[bibtex]@InProceedings{Amato_2025_ICCV, author = {Amato, James and Xie, Yunan and Medina-Varela, Leonel and Aljerwi, Ammar and McCutcheon, Adam and Rippentrop, T. Seth and Gonzalez, Kristian and Delabrouille, Jacques and Ishak, Mustapha and Ruozzi, Nicholas}, title = {CMB-ML: A Cosmic Microwave Background Dataset for the Oldest Possible Computer Vision Task}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9418-9430} }
Deeply Supervised Flow-Based Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shin_2025_ICCV, author = {Shin, Inkyu and Yang, Chenglin and Chen, Liang-Chieh}, title = {Deeply Supervised Flow-Based Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16535-16544} }
Towards Efficient General Feature Prediction in Masked Skeleton Modeling-
[pdf]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Shengkai and Zhang, Zefan and Dong, Jianfeng and Cheng, Zhiyong and Chang, Xiaojun and Wang, Meng}, title = {Towards Efficient General Feature Prediction in Masked Skeleton Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12212-12221} }
Dynamic Point Maps: A Versatile Representation for Dynamic 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sucar_2025_ICCV, author = {Sucar, Edgar and Lai, Zihang and Insafutdinov, Eldar and Vedaldi, Andrea}, title = {Dynamic Point Maps: A Versatile Representation for Dynamic 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7295-7305} }
Language Decoupling with Fine-grained Knowledge Guidance for Referring Multi-object Tracking-
[pdf]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Guangyao and Zhuang, Siping and Jian, Yajun and Yan, Yan and Wang, Hanzi}, title = {Language Decoupling with Fine-grained Knowledge Guidance for Referring Multi-object Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23626-23635} }
Timestep-Aware Diffusion Model for Extreme Image Rescaling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Ce and Hu, Zhenyu and Sun, Wanjie and Chen, Zhenzhong}, title = {Timestep-Aware Diffusion Model for Extreme Image Rescaling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15594-15603} }
COVTrack: Continuous Open-Vocabulary Tracking via Adaptive Multi-Cue Fusion-
[pdf]
[bibtex]@InProceedings{Qian_2025_ICCV, author = {Qian, Zekun and Han, Ruize and Wang, Zhixiang and Hou, Junhui and Feng, Wei}, title = {COVTrack: Continuous Open-Vocabulary Tracking via Adaptive Multi-Cue Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10054-10063} }
Toward Material-Agnostic System Identification from Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Yizhou and Chen, Haoyu and Liu, Chunjiang and Li, Zhenyang and Herrmann, Charles and Hur, Junhwa and Li, Yinxiao and Yang, Ming-Hsuan and Raj, Bhiksha and Xu, Min}, title = {Toward Material-Agnostic System Identification from Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5944-5956} }
Referring Expression Comprehension for Small Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Goto_2025_ICCV, author = {Goto, Kanoko and Hirose, Takumi and Ukai, Mahiro and Kurita, Shuhei and Inoue, Nakamasa}, title = {Referring Expression Comprehension for Small Objects}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21231-21242} }
Robust 3D-Masked Part-level Editing in 3D Gaussian Splatting with Regularized Score Distillation Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Hayeon and Jang, Ji Ha and Chun, Se Young}, title = {Robust 3D-Masked Part-level Editing in 3D Gaussian Splatting with Regularized Score Distillation Sampling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5501-5510} }
InstaDrive: Instance-Aware Driving World Models for Realistic and Consistent Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Zhuoran and Guo, Xi and Ding, Chenjing and Wang, Chiyu and Wu, Wei and Zhang, Yanyong}, title = {InstaDrive: Instance-Aware Driving World Models for Realistic and Consistent Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25410-25420} }
Effective Training Data Synthesis for Improving MLLM Chart Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Yuwei and Zhang, Zeyu and Hou, Yunzhong and Li, Zhuowan and Liu, Gaowen and Payani, Ali and Ting, Yuan-Sen and Zheng, Liang}, title = {Effective Training Data Synthesis for Improving MLLM Chart Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2653-2663} }
DIVE: Taming DINO for Subject-Driven Video Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Yi and Xiong, Wei and Zhang, He and Chen, Chaoqi and Liu, Jianzhuang and Yan, Mingfu and Chen, Shifeng}, title = {DIVE: Taming DINO for Subject-Driven Video Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16004-16014} }
Neural Architecture Search Driven by Locally Guided Diffusion for Personalized Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2025_ICCV, author = {Liao, Peng and Wang, Xilu and Jin, Yaochu and Du, Wenli and Hu, Han}, title = {Neural Architecture Search Driven by Locally Guided Diffusion for Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4222-4231} }
Global-Aware Monocular Semantic Scene Completion with State Space Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Shijie and Cheng, Zhongyao and Li, Rong and Li, Shuai and Gall, Juergen and Xu, Xun and Yang, Xulei}, title = {Global-Aware Monocular Semantic Scene Completion with State Space Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25550-25559} }
ULTHO: Ultra-Lightweight yet Efficient Hyperparameter Optimization in Deep Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_ICCV, author = {Yuan, Mingqi and Li, Bo and Jin, Xin and Zeng, Wenjun}, title = {ULTHO: Ultra-Lightweight yet Efficient Hyperparameter Optimization in Deep Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2620-2630} }
GReg: Geometry-Aware Region Refinement for Sign Language Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Tongkai and Hu, Lianyu and Shang, Fanhua and Gao, Liqing and Feng, Wei}, title = {GReg: Geometry-Aware Region Refinement for Sign Language Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16472-16481} }
CarGait: Cross-Attention based Re-ranking for Gait recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Habib_2025_ICCV, author = {Habib, Gavriel and Barzilay, Noa and Shimshi, Or and Ben-Ari, Rami and Darshan, Nir}, title = {CarGait: Cross-Attention based Re-ranking for Gait recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11884-11894} }
Text2VDM: Text to Vector Displacement Maps for Expressive and Interactive 3D Sculpting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meng_2025_ICCV, author = {Meng, Hengyu and Wang, Duotun and Shao, Zhijing and Liu, Ligang and Wang, Zeyu}, title = {Text2VDM: Text to Vector Displacement Maps for Expressive and Interactive 3D Sculpting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16882-16892} }
DiffVSR: Revealing an Effective Recipe for Taming Robust Video Super-Resolution Against Complex Degradations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Xiaohui and Liu, Yihao and Cao, Shuo and Chen, Ziyan and Zhuang, Shaobin and Chen, Xiangyu and He, Yinan and Wang, Yi and Qiao, Yu}, title = {DiffVSR: Revealing an Effective Recipe for Taming Robust Video Super-Resolution Against Complex Degradations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15319-15328} }
3D Gaussian Splatting Driven Multi-View Robust Physical Adversarial Camouflage Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lou_2025_ICCV, author = {Lou, Tianrui and Jia, Xiaojun and Liang, Siyuan and Liang, Jiawei and Zhang, Ming and Xiao, Yanjun and Cao, Xiaochun}, title = {3D Gaussian Splatting Driven Multi-View Robust Physical Adversarial Camouflage Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28752-28762} }
GenieBlue: Integrating both Linguistic and Multimodal Capabilities for Large Language Models on Mobile Devices-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Xudong and Chen, Yinghao and Wu, Renshou and Gao, Haohao and Chen, Xi and Yang, Xue and Zhao, Xiangyu and Zhou, Aojun and Li, Fangyuan and Wen, Yafei and Chen, Xiaoxin and Ren, Shuai and Li, Hongsheng}, title = {GenieBlue: Integrating both Linguistic and Multimodal Capabilities for Large Language Models on Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4198-4210} }
MambaML: Exploring State Space Models for Multi-Label Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Xuelin and Liu, Jian and Cao, Jiuxin and Wang, Bing}, title = {MambaML: Exploring State Space Models for Multi-Label Image Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4743-4753} }
SuperMat: Physically Consistent PBR Material Estimation at Interactive Rates-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2025_ICCV, author = {Hong, Yijia and Guo, Yuan-Chen and Yi, Ran and Chen, Yulong and Cao, Yan-Pei and Ma, Lizhuang}, title = {SuperMat: Physically Consistent PBR Material Estimation at Interactive Rates}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25083-25093} }
Deep Adaptive Unfolded Network via Spatial Morphology Stripping and Spectral Filtration for Pan-sharpening-
[pdf]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Hebaixu and Ma, Jiayi}, title = {Deep Adaptive Unfolded Network via Spatial Morphology Stripping and Spectral Filtration for Pan-sharpening}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10730-10740} }
HiMTok: Learning Hierarchical Mask Tokens for Image Segmentation with Large Multimodal Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Tao and Cheng, Changxu and Wang, Lingfeng and Chen, Senda and Zhao, Wuyue}, title = {HiMTok: Learning Hierarchical Mask Tokens for Image Segmentation with Large Multimodal Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23267-23278} }
MixA-Q: Revisiting Activation Sparsity for Vision Transformers from a Mixed-Precision Quantization Perspective-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Weitian and Shubham, Rai and De La Parra, Cecilia and Kumar, Akash}, title = {MixA-Q: Revisiting Activation Sparsity for Vision Transformers from a Mixed-Precision Quantization Perspective}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22143-22152} }
A Quality-Guided Mixture of Score-Fusion Experts Framework for Human Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jie and Su, Yiyang and Kim, Minchul and Jain, Anil and Liu, Xiaoming}, title = {A Quality-Guided Mixture of Score-Fusion Experts Framework for Human Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13076-13086} }
EMD: Explicit Motion Modeling for High-Quality Street Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Xiaobao and Wuwu, Qingpo and Zhao, Zhongyu and Wu, Zhuangzhe and Huang, Nan and Lu, Ming and Ma, Ningning and Zhang, Shanghang}, title = {EMD: Explicit Motion Modeling for High-Quality Street Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28462-28472} }
MOVE: Motion-Guided Few-Shot Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ying_2025_ICCV, author = {Ying, Kaining and Hu, Hengrui and Ding, Henghui}, title = {MOVE: Motion-Guided Few-Shot Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11632-11642} }
Neural Multi-View Self-Calibrated Photometric Stereo without Photometric Stereo Cues-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Xu and Taketomi, Takafumi}, title = {Neural Multi-View Self-Calibrated Photometric Stereo without Photometric Stereo Cues}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27552-27562} }
CO2-Net: A Physics-Informed Spatio-Temporal Model for Global Surface CO2 Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Hao and Zheng, Yuting and Huang, Hanbo and Sun, Chaofan and Liao, Enhui and Liu, Lin and Han, Yi and Zhou, Hao and Liang, Shiyu}, title = {CO2-Net: A Physics-Informed Spatio-Temporal Model for Global Surface CO2 Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6220-6230} }
Multi-Modal Multi-Task Unified Embedding Model (M3T-UEM): A Task-Adaptive Representation Learning Framework-
[pdf]
[supp]
[bibtex]@InProceedings{Sharma_2025_ICCV, author = {Sharma, Rohan and Chen, Changyou and Chang, Feng-Ju and Yun, Seongjun and Xie, Xiaohu and Meng, Rui and Xu, Dehong and Mottini, Alejandro and Cui, Qingjun}, title = {Multi-Modal Multi-Task Unified Embedding Model (M3T-UEM): A Task-Adaptive Representation Learning Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22783-22793} }
Randomized Autoregressive Visual Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Qihang and He, Ju and Deng, Xueqing and Shen, Xiaohui and Chen, Liang-Chieh}, title = {Randomized Autoregressive Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18431-18441} }
Towards More Diverse and Challenging Pre-training for Point Cloud Learning: Self-Supervised Cross Reconstruction with Decoupled Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiangdong and Zhang, Shaofeng and Yan, Junchi}, title = {Towards More Diverse and Challenging Pre-training for Point Cloud Learning: Self-Supervised Cross Reconstruction with Decoupled Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28696-28706} }
Baking Gaussian Splatting into Diffusion Denoiser for Fast and Scalable Single-stage Image-to-3D Generation and Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cai_2025_ICCV, author = {Cai, Yuanhao and Zhang, He and Zhang, Kai and Liang, Yixun and Ren, Mengwei and Luan, Fujun and Liu, Qing and Kim, Soo Ye and Zhang, Jianming and Zhang, Zhifei and Zhou, Yuqian and Zhang, Yulun and Yang, Xiaokang and Lin, Zhe and Yuille, Alan}, title = {Baking Gaussian Splatting into Diffusion Denoiser for Fast and Scalable Single-stage Image-to-3D Generation and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25062-25072} }
Variance-Based Pruning for Accelerating and Compressing Trained Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Berisha_2025_ICCV, author = {Berisha, Uranik and Mehnert, Jens and Condurache, Alexandru Paul}, title = {Variance-Based Pruning for Accelerating and Compressing Trained Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4973-4982} }
Learning Normal Flow Directly From Events-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_ICCV, author = {Yuan, Dehao and Burner, Levi and Wu, Jiayi and Liu, Minghui and Chen, Jingxi and Aloimonos, Yiannis and Ferm\"uller, Cornelia}, title = {Learning Normal Flow Directly From Events}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7969-7979} }
Federated Prompt-Tuning with Heterogeneous and Incomplete Multimodal Client Data-
[pdf]
[supp]
[bibtex]@InProceedings{Phung_2025_ICCV, author = {Phung, Thu Hang and Nguyen, Duong M. and Huynh, Thanh Trung and Nguyen, Quoc Viet Hung and Hoang, Trong Nghia and Le Nguyen, Phi}, title = {Federated Prompt-Tuning with Heterogeneous and Incomplete Multimodal Client Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3936-3946} }
Trace3D: Consistent Segmentation Lifting via Gaussian Instance Tracing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2025_ICCV, author = {Shen, Hongyu and Ni, Junfeng and Chen, Yixin and Li, Weishuo and Pei, Mingtao and Huang, Siyuan}, title = {Trace3D: Consistent Segmentation Lifting via Gaussian Instance Tracing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6656-6666} }
LMM-Det: Make Large Multimodal Models Excel in Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Jincheng and Xie, Chunyu and Ao, Ji and Leng, Dawei and Yin, Yuhui}, title = {LMM-Det: Make Large Multimodal Models Excel in Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {308-318} }
Bootstrap3D: Improving Multi-view Diffusion Model with Synthetic Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Zeyi and Wu, Tong and Zhang, Pan and Zang, Yuhang and Dong, Xiaoyi and Xiong, Yuanjun and Lin, Dahua and Wang, Jiaqi}, title = {Bootstrap3D: Improving Multi-view Diffusion Model with Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15714-15726} }
Unsupervised Imaging Inverse Problems with Diffusion Distribution Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meanti_2025_ICCV, author = {Meanti, Giacomo and Ryckeboer, Thomas and Arbel, Michael and Mairal, Julien}, title = {Unsupervised Imaging Inverse Problems with Diffusion Distribution Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28364-28374} }
Towards Adversarial Robustness via Debiased High-Confidence Logit Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Kejia and Weng, Juanjuan and Li, Shaozi and Luo, Zhiming}, title = {Towards Adversarial Robustness via Debiased High-Confidence Logit Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2783-2792} }
FiffDepth: Feed-forward Transformation of Diffusion-Based Generators for Detailed Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2025_ICCV, author = {Bai, Yunpeng and Huang, Qixing}, title = {FiffDepth: Feed-forward Transformation of Diffusion-Based Generators for Detailed Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6023-6033} }
Reinforcement Learning-Guided Data Selection via Redundancy Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Suorong and Li, Peijia and Shen, Furao and Zhao, Jian}, title = {Reinforcement Learning-Guided Data Selection via Redundancy Assessment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1004-1015} }
Integrating Biological Knowledge for Robust Microscopy Image Profiling on De Novo Cell Lines-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Jiayuan and Pham, Thai-Hoang and Wang, Yuanlong and Zhang, Ping}, title = {Integrating Biological Knowledge for Robust Microscopy Image Profiling on De Novo Cell Lines}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22846-22856} }
OuroMamba: A Data-Free Quantization Framework for Vision Mamba-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ramachandran_2025_ICCV, author = {Ramachandran, Akshat and Lee, Mingyu and Xu, Huan and Kundu, Souvik and Krishna, Tushar}, title = {OuroMamba: A Data-Free Quantization Framework for Vision Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21177-21186} }
Optical Model-Driven Sharpness Mapping for Autofocus in Small Depth-of-Field and Severe Defocus Scenarios-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Chen-Liang and Cao, Mingpei and Hung, Chih Chien and Zhu, Yuesheng}, title = {Optical Model-Driven Sharpness Mapping for Autofocus in Small Depth-of-Field and Severe Defocus Scenarios}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6426-6435} }
DICE: Staleness-Centric Optimizations for Parallel Diffusion MoE Inference-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Jiajun and Luo, Lizhuo and Xu, Jianru and Song, Jiajun and Lu, Rongwei and Tang, Chen and Wang, Zhi}, title = {DICE: Staleness-Centric Optimizations for Parallel Diffusion MoE Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15481-15490} }
Safeguarding Vision-Language Models: Mitigating Vulnerabilities to Gaussian Noise in Perturbation-based Attacks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Jiawei and Zuo, Yushen and Chai, Yuanjun and Liu, Zhendong and Fu, Yicheng and Feng, Yichun and Lam, Kin-Man}, title = {Safeguarding Vision-Language Models: Mitigating Vulnerabilities to Gaussian Noise in Perturbation-based Attacks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2773-2782} }
SHeaP: Self-Supervised Head Geometry Predictor Learned via 2D Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Schoneveld_2025_ICCV, author = {Schoneveld, Liam and Chen, Zhe and Davoli, Davide and Tang, Jiapeng and Terazawa, Saimon and Nishino, Ko and Nie{\ss}ner, Matthias}, title = {SHeaP: Self-Supervised Head Geometry Predictor Learned via 2D Gaussians}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14162-14172} }
Pretrained Reversible Generation as Unsupervised Visual Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2025_ICCV, author = {Xue, Rongkun and Zhang, Jinouwen and Niu, Yazhe and Shen, Dazhong and Ma, Bingqi and Liu, Yu and Yang, Jing}, title = {Pretrained Reversible Generation as Unsupervised Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19216-19226} }
CompSlider: Compositional Slider for Disentangled Multiple-Attribute Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Zixin and Duarte, Kevin and Rizve, Mamshad Nayeem and Xu, Chengyuan and Kalarot, Ratheesh and Yuan, Junsong}, title = {CompSlider: Compositional Slider for Disentangled Multiple-Attribute Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16682-16691} }
MA-CIR: A Multimodal Arithmetic Benchmark for Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Byun_2025_ICCV, author = {Byun, Jaeseok and Jang, Young Kyun and Jeong, Seokhyeon and Kim, Donghyun and Moon, Taesup}, title = {MA-CIR: A Multimodal Arithmetic Benchmark for Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21342-21352} }
NAPPure: Adversarial Purification for Robust Image Classification under Non-Additive Perturbations-
[pdf]
[supp]
[bibtex]@InProceedings{Nan_2025_ICCV, author = {Nan, Junjie and Li, Jianing and Chen, Wei and Zhang, Mingkun and Cheng, Xueqi}, title = {NAPPure: Adversarial Purification for Robust Image Classification under Non-Additive Perturbations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2260-2269} }
Large-scale Pre-training for Grounded Video Caption Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kazakos_2025_ICCV, author = {Kazakos, Evangelos and Schmid, Cordelia and Sivic, Josef}, title = {Large-scale Pre-training for Grounded Video Caption Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24434-24444} }
MMReason: An Open-Ended Multi-Modal Multi-Step Reasoning Benchmark for MLLMs Toward AGI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_ICCV, author = {Yao, Huanjin and Huang, Jiaxing and Qiu, Yawen and Chen, Michael K. and Liu, Wenzheng and Zhang, Wei and Zeng, Wenjie and Zhang, Xikun and Zhang, Jingyi and Song, YuXin and Wu, Wenhao and Tao, Dacheng}, title = {MMReason: An Open-Ended Multi-Modal Multi-Step Reasoning Benchmark for MLLMs Toward AGI}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {273-283} }
SimMLM: A Simple Framework for Multi-modal Learning with Missing Modality-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Sijie and Chen, Chen and Han, Jungong}, title = {SimMLM: A Simple Framework for Multi-modal Learning with Missing Modality}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24068-24077} }
Dual Reciprocal Learning of Language-based Human Motion Understanding and Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Chen and Shi, Zhicheng and Wang, Wenguan and Yang, Yi}, title = {Dual Reciprocal Learning of Language-based Human Motion Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6252-6262} }
InteractAvatar: Modeling Hand-Face Interaction in Photorealistic Avatars with Deformable Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Kefan and Mohan, Sreyas and Theiss, Justin and Oprea, Sergiu and Sridhar, Srinath and Prakash, Aayush}, title = {InteractAvatar: Modeling Hand-Face Interaction in Photorealistic Avatars with Deformable Gaussians}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10410-10420} }
LINR-PCGC: Lossless Implicit Neural Representations for Point Cloud Geometry Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Wenjie and Yang, Qi and Xia, Shuting and Huang, He and Xu, Yiling and Li, Zhu}, title = {LINR-PCGC: Lossless Implicit Neural Representations for Point Cloud Geometry Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28577-28586} }
RoCo-Sim: Enhancing Roadside Collaborative Perception through Foreground Simulation-
[pdf]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Yuwen and Hu, Anning and Chao, Zichen and Lu, Yifan and Ge, Junhao and Liu, Genjia and Wu, Weitao and Wang, Lanjun and Chen, Siheng}, title = {RoCo-Sim: Enhancing Roadside Collaborative Perception through Foreground Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26977-26986} }
UKBOB: One Billion MRI Labeled Masks for Generalizable 3D Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bourigault_2025_ICCV, author = {Bourigault, Emmanuelle and Jamaludin, Amir and Hamdi, Abdullah}, title = {UKBOB: One Billion MRI Labeled Masks for Generalizable 3D Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21600-21611} }
To Label or Not to Label: PALM - A Predictive Model for Evaluating Sample Efficiency in Active Learning Models-
[pdf]
[supp]
[bibtex]@InProceedings{Machnio_2025_ICCV, author = {Machnio, Julia and Nielsen, Mads and Ghazi, Mostafa Mehdipour}, title = {To Label or Not to Label: PALM - A Predictive Model for Evaluating Sample Efficiency in Active Learning Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4039-4048} }
GaussianOcc: Fully Self-supervised and Efficient 3D Occupancy Estimation with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gan_2025_ICCV, author = {Gan, Wanshui and Liu, Fang and Xu, Hongbin and Mo, Ningkai and Yokoya, Naoto}, title = {GaussianOcc: Fully Self-supervised and Efficient 3D Occupancy Estimation with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28980-28990} }
Free-MoRef: Instantly Multiplexing Context Perception Capabilities of Video-MLLMs within Single Inference-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Kuo and Zheng, Quanlong and Xie, Junlin and Zhang, Yanhao and Luo, Jinguo and Lu, Haonan and Lin, Liang and Zhou, Fan and Li, Guanbin}, title = {Free-MoRef: Instantly Multiplexing Context Perception Capabilities of Video-MLLMs within Single Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22499-22508} }
DC-ControlNet: Decoupling Inter- and Intra-Element Conditions in Image Generation with Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Hongji and Han, Wencheng and Zhou, Yucheng and Shen, Jianbing}, title = {DC-ControlNet: Decoupling Inter- and Intra-Element Conditions in Image Generation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19065-19074} }
EfficientMT: Efficient Temporal Adaptation for Motion Transfer in Text-to-Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2025_ICCV, author = {Cai, Yufei and Han, Hu and Wei, Yuxiang and Shan, Shiguang and Chen, Xilin}, title = {EfficientMT: Efficient Temporal Adaptation for Motion Transfer in Text-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10592-10601} }
Dynamic Multimodal Prototype Learning in Vision-Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Xingyu and Wang, Shuo and Zhu, Beier and Li, Miaoge and Li, Yunfan and Fang, Junfeng and Wang, Zhicai and Wang, Dongsheng and Zhang, Hanwang}, title = {Dynamic Multimodal Prototype Learning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2501-2511} }
Liberated-GS: 3D Gaussian Splatting Independent from SfM Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2025_ICCV, author = {Pan, Weihong and Zhang, Xiaoyu and Zhai, Hongjia and Xiang, Xiaojun and Jiang, Hanqing and Zhang, Guofeng}, title = {Liberated-GS: 3D Gaussian Splatting Independent from SfM Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26675-26685} }
Client2Vec: Improving Federated Learning by Distribution Shifts Aware Client Indexing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Yongxin and Wang, Lin and Tang, Xiaoying and Lin, Tao}, title = {Client2Vec: Improving Federated Learning by Distribution Shifts Aware Client Indexing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1433-1443} }
FuXi-RTM: A Physics-Guided Prediction Framework with Radiative Transfer Modeling-
[pdf]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Qiusheng and Zhong, Xiaohui and Fan, Xu and Li, Hao}, title = {FuXi-RTM: A Physics-Guided Prediction Framework with Radiative Transfer Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8852-8862} }
Enhanced Pansharpening via Quaternion Spatial-Spectral Interactions-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Dong and Luo, Chunhui and Bao, Yuanfei and Yang, Gang and Xiao, Jie and Fu, Xueyang and Zha, Zheng-Jun}, title = {Enhanced Pansharpening via Quaternion Spatial-Spectral Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10908-10918} }
Loss Functions for Predictor-based Neural Architecture Search-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2025_ICCV, author = {Ji, Han and Feng, Yuqi and Fan, Jiahao and Sun, Yanan}, title = {Loss Functions for Predictor-based Neural Architecture Search}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1624-1633} }
Learning to Generalize without Bias for Open-Vocabulary Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Yating and Cao, Congqi and Zhang, Yifan and Zhang, Yanning}, title = {Learning to Generalize without Bias for Open-Vocabulary Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12800-12810} }
Metric Convolutions: A Unifying Theory to Adaptive Image Convolutions-
[pdf]
[supp]
[bibtex]@InProceedings{Dages_2025_ICCV, author = {Dag\`es, Thomas and Lindenbaum, Michael and Bruckstein, Alfred M.}, title = {Metric Convolutions: A Unifying Theory to Adaptive Image Convolutions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13974-13984} }
SDMatte: Grafting Diffusion Models for Interactive Matting-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Longfei and Liang, Yu and Zhang, Hao and Chen, Jinwei and Dong, Wei and Chen, Lunde and Liu, Wanyu and Li, Bo and Jiang, Peng-Tao}, title = {SDMatte: Grafting Diffusion Models for Interactive Matting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15229-15239} }
Scaling Inference-Time Search with Vision Value Model for Improved Visual Comprehension-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Xiyao and Yang, Zhengyuan and Li, Linjie and Lu, Hongjin and Xu, Yuancheng and Lin, Chung-Ching and Lin, Kevin and Huang, Furong and Wang, Lijuan}, title = {Scaling Inference-Time Search with Vision Value Model for Improved Visual Comprehension}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1173-1184} }
Cooperative Pseudo Labeling for Unsupervised Federated Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Kuangpu and Sheng, Lijun and Yu, Yongcan and Liang, Jian and Wang, Zilei and He, Ran}, title = {Cooperative Pseudo Labeling for Unsupervised Federated Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3326-3336} }
FreeDNA: Endowing Domain Adaptation of Diffusion-Based Dense Prediction with Training-Free Domain Noise Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Hang and Huang, Jie and Huang, Linjiang and Li, Dong and Liu, Yidi and Zhao, Feng}, title = {FreeDNA: Endowing Domain Adaptation of Diffusion-Based Dense Prediction with Training-Free Domain Noise Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3268-3279} }
DOLLAR: Few-Step Video Generation via Distillation and Latent Reward Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2025_ICCV, author = {Ding, Zihan and Jin, Chi and Liu, Difan and Zheng, Haitian and Singh, Krishna Kumar and Zhang, Qiang and Kang, Yan and Lin, Zhe and Liu, Yuchen}, title = {DOLLAR: Few-Step Video Generation via Distillation and Latent Reward Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17961-17971} }
Semantic Alignment and Reinforcement for Data-Free Quantization of Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_ICCV, author = {Zhong, Yunshan and Zhou, Yuyao and Zhang, Yuxin and Sui, Wanchen and Li, Shen and Li, Yong and Chao, Fei and Ji, Rongrong}, title = {Semantic Alignment and Reinforcement for Data-Free Quantization of Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12479-12490} }
Zero-AVSR: Zero-Shot Audio-Visual Speech Recognition with LLMs by Learning Language-Agnostic Speech Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Yeo_2025_ICCV, author = {Yeo, Jeong Hun and Kim, Minsu and Kim, Chae Won and Petridis, Stavros and Ro, Yong Man}, title = {Zero-AVSR: Zero-Shot Audio-Visual Speech Recognition with LLMs by Learning Language-Agnostic Speech Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6693-6703} }
Supervised Exploratory Learning for Long-Tailed Visual Recognition-
[pdf]
[bibtex]@InProceedings{Jian_2025_ICCV, author = {Jian, Zhongquan and Chen, Yanhao and Wang, Yancheng and Yao, Junfeng and Wang, Meihong and Wu, Qingqiang}, title = {Supervised Exploratory Learning for Long-Tailed Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1870-1880} }
Enhanced Event-based Dense Stereo via Cross-Sensor Knowledge Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Haihao and Zhang, Yunjian and Li, Jianing and Zhu, Lin and Lv, Meng and Zhu, Yao and Liu, Yanwei and Ji, Xiangyang}, title = {Enhanced Event-based Dense Stereo via Cross-Sensor Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5437-5447} }
Disentangled World Models: Learning to Transfer Semantic Knowledge from Distracting Videos for Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Qi and Zhang, Zhipeng and Xie, Baao and Jin, Xin and Wang, Yunbo and Wang, Shiyu and Zheng, Liaomo and Yang, Xiaokang and Zeng, Wenjun}, title = {Disentangled World Models: Learning to Transfer Semantic Knowledge from Distracting Videos for Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2599-2608} }
StreamDiffusion: A Pipeline-level Solution for Real-Time Interactive Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kodaira_2025_ICCV, author = {Kodaira, Akio and Xu, Chenfeng and Hazama, Toshiki and Yoshimoto, Takanori and Ohno, Kohei and Mitsuhori, Shogo and Sugano, Soichi and Cho, Hanying and Liu, Zhijian and Tomizuka, Masayoshi and Keutzer, Kurt}, title = {StreamDiffusion: A Pipeline-level Solution for Real-Time Interactive Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12371-12380} }
Toward Better Out-painting: Improving the Image Composition with Initialization Policy Model-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Xuan and Zhao, Yihao and Ge, Yanhao and You, Mingyu}, title = {Toward Better Out-painting: Improving the Image Composition with Initialization Policy Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16938-16947} }
When Large Vision-Language Model Meets Large Remote Sensing Imagery: Coarse-to-Fine Text-Guided Token Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Junwei and Zhang, Yingying and Yang, Xue and Wu, Kang and Zhu, Qi and Liang, Lei and Chen, Jingdong and Li, Yansheng}, title = {When Large Vision-Language Model Meets Large Remote Sensing Imagery: Coarse-to-Fine Text-Guided Token Pruning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9206-9217} }
StreamMind: Unlocking Full Frame Rate Streaming Video Dialogue through Event-Gated Cognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2025_ICCV, author = {Ding, Xin and Wu, Hao and Yang, Yifan and Jiang, Shiqi and Zhang, Qianxi and Bai, Donglin and Chen, Zhibo and Cao, Ting}, title = {StreamMind: Unlocking Full Frame Rate Streaming Video Dialogue through Event-Gated Cognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13448-13459} }
DecAD: Decoupling Anomalies in Latent Space for Multi-Class Unsupervised Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Xiaolei and Wang, Xiaoyang and Bai, Huihui and Lim, Eng Gee and Xiao, Jimin}, title = {DecAD: Decoupling Anomalies in Latent Space for Multi-Class Unsupervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21568-21577} }
Discovering Divergent Representations between Text-to-Image Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dunlap_2025_ICCV, author = {Dunlap, Lisa and Gonzalez, Joseph E. and Darrell, Trevor and Heilbron, Fabian Caba and Sivic, Josef and Russell, Bryan}, title = {Discovering Divergent Representations between Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17516-17525} }
Kaputt: A Large-Scale Dataset for Visual Defect Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Hofer_2025_ICCV, author = {H\"ofer, Sebastian and Henning, Dorian F. and Amiranashvili, Artemij and Morrison, Douglas and Tzes, Mariliza and Posner, Ingmar and Matvienko, Marc and Rennola, Alessandro and Milan, Anton}, title = {Kaputt: A Large-Scale Dataset for Visual Defect Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24224-24233} }
Self-Ensembling Gaussian Splatting for Few-Shot Novel View Synthesis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Chen and Wang, Xuan and Zhang, Tong and Javed, Saqib and Salzmann, Mathieu}, title = {Self-Ensembling Gaussian Splatting for Few-Shot Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4940-4950} }
Spectral Image Tokenizer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Esteves_2025_ICCV, author = {Esteves, Carlos and Suhail, Mohammed and Makadia, Ameesh}, title = {Spectral Image Tokenizer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17181-17190} }
GameFactory: Creating New Games with Generative Interactive Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Jiwen and Qin, Yiran and Wang, Xintao and Wan, Pengfei and Zhang, Di and Liu, Xihui}, title = {GameFactory: Creating New Games with Generative Interactive Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11590-11599} }
Occupancy Learning with Spatiotemporal Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Leng_2025_ICCV, author = {Leng, Ziyang and Yang, Jiawei and Yi, Wenlong and Zhou, Bolei}, title = {Occupancy Learning with Spatiotemporal Memory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26569-26578} }
Understanding Co-speech Gestures in-the-wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hegde_2025_ICCV, author = {Hegde, Sindhu B and Prajwal, K R and Kwon, Taein and Zisserman, Andrew}, title = {Understanding Co-speech Gestures in-the-wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9977-9987} }
CAD-Recode: Reverse Engineering CAD Code from Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Rukhovich_2025_ICCV, author = {Rukhovich, Danila and Dupont, Elona and Mallis, Dimitrios and Cherenkova, Kseniya and Kacem, Anis and Aouada, Djamila}, title = {CAD-Recode: Reverse Engineering CAD Code from Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9801-9811} }
ObjectRelator: Enabling Cross-View Object Relation Understanding Across Ego-Centric and Exo-Centric Perspectives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_ICCV, author = {Fu, Yuqian and Wang, Runze and Ren, Bin and Sun, Guolei and Gong, Biao and Fu, Yanwei and Paudel, Danda Pani and Huang, Xuanjing and Van Gool, Luc}, title = {ObjectRelator: Enabling Cross-View Object Relation Understanding Across Ego-Centric and Exo-Centric Perspectives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6530-6540} }
Tune-Your-Style: Intensity-tunable 3D Style Transfer with Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Yian and Ye, Rushi and Zheng, Ruochong and Cheng, Zesen and Feng, Chaoran and Yang, Jiashu and Qiao, Pengchong and Liu, Chang and Chen, Jie}, title = {Tune-Your-Style: Intensity-tunable 3D Style Transfer with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19032-19042} }
Stylized-Face: A Million-level Stylized Face Dataset for Face Recognition-
[pdf]
[bibtex]@InProceedings{Peng_2025_ICCV, author = {Peng, Zhengyuan and Xu, Jianqing and Huang, Yuge and Hao, Jinkun and Ding, Shouhong and Zhang, Zhizhong and Tan, Xin and Ma, Lizhuang}, title = {Stylized-Face: A Million-level Stylized Face Dataset for Face Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13053-13064} }
Bring Your Rear Cameras for Egocentric 3D Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Akada_2025_ICCV, author = {Akada, Hiroyasu and Wang, Jian and Golyanik, Vladislav and Theobalt, Christian}, title = {Bring Your Rear Cameras for Egocentric 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9497-9507} }
ReferEverything: Towards Segmenting Everything We Can Speak of in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bagchi_2025_ICCV, author = {Bagchi, Anurag and Bao, Zhipeng and Wang, Yu-Xiong and Tokmakov, Pavel and Hebert, Martial}, title = {ReferEverything: Towards Segmenting Everything We Can Speak of in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23221-23231} }
GestureHYDRA: Semantic Co-speech Gesture Synthesis via Hybrid Modality Diffusion Transformer and Cascaded-Synchronized Retrieval-Augmented Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Quanwei and Huang, Luying and Wang, Kaisiyuan and Guan, Jiazhi and He, Shengyi and Li, Fengguo and Zhou, Hang and Yu, Lingyun and Li, Yingying and Feng, Haocheng and Xie, Hongtao}, title = {GestureHYDRA: Semantic Co-speech Gesture Synthesis via Hybrid Modality Diffusion Transformer and Cascaded-Synchronized Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12615-12625} }
Make Your Training Flexible: Towards Deployment-Efficient Video Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Chenting and Li, Kunchang and Jiang, Tianxiang and Zeng, Xiangyu and Wang, Yi and Wang, Limin}, title = {Make Your Training Flexible: Towards Deployment-Efficient Video Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23880-23891} }
Federated Continual Instruction Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Haiyang and Zeng, Fanhu and Zhu, Fei and Liu, Wenzhuo and Wang, Da-Han and Xu, Jian and Zhang, Xu-Yao and Liu, Cheng-Lin}, title = {Federated Continual Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1325-1335} }
HUST: High-Fidelity Unbiased Skin Tone Estimation via Texture Quantization-
[pdf]
[supp]
[bibtex]@InProceedings{Ran_2025_ICCV, author = {Ran, Zimin and Ren, Xingyu and An, Xiang and Yang, Kaicheng and Feng, Ziyong and Yang, Jing and Potamias, Rolandos Alexandros and Zhu, Linchao and Deng, Jiankang}, title = {HUST: High-Fidelity Unbiased Skin Tone Estimation via Texture Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13523-13532} }
DLFR-Gen: Diffusion-based Video Generation with Dynamic Latent Frame Rate-
[pdf]
[bibtex]@InProceedings{Yuan_2025_ICCV, author = {Yuan, Zhihang and Xie, Rui and Shang, Yuzhang and Zhang, Hanling and Wang, Siyuan and Yan, Shengen and Dai, Guohao and Wang, Yu}, title = {DLFR-Gen: Diffusion-based Video Generation with Dynamic Latent Frame Rate}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16410-16419} }
What's in a Latent? Leveraging Diffusion Latent Space for Domain Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Thomas_2025_ICCV, author = {Thomas, Xavier and Ghadiyaram, Deepti}, title = {What's in a Latent? Leveraging Diffusion Latent Space for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2183-2194} }
DiffSim: Taming Diffusion Models for Evaluating Visual Similarity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_ICCV, author = {Song, Yiren and Liu, Xiaokang and Shou, Mike Zheng}, title = {DiffSim: Taming Diffusion Models for Evaluating Visual Similarity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16904-16915} }
Controllable Feature Whitening for Hyperparameter-Free Bias Mitigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cho_2025_ICCV, author = {Cho, Yooshin and Cho, Hanbyel and Lee, Janghyeon and Hong, HyeongGwon and Ahn, Jaesung and Kim, Junmo}, title = {Controllable Feature Whitening for Hyperparameter-Free Bias Mitigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4550-4560} }
iManip: Skill-Incremental Learning for Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Zexin and Cai, Jia-Feng and Wu, Xiao-Ming and Wei, Yi-Lin and Tang, Yu-Ming and Wu, Ancong and Zheng, Wei-Shi}, title = {iManip: Skill-Incremental Learning for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13890-13900} }
RadarSplat: Radar Gaussian Splatting for High-Fidelity Data Synthesis and 3D Reconstruction of Autonomous Driving Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kung_2025_ICCV, author = {Kung, Pou-Chun and Harisha, Skanda and Vasudevan, Ram and Eid, Aline and Skinner, Katherine A.}, title = {RadarSplat: Radar Gaussian Splatting for High-Fidelity Data Synthesis and 3D Reconstruction of Autonomous Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27596-27606} }
Hierarchical 3D Scene Graphs Construction Outdoors-
[pdf]
[supp]
[bibtex]@InProceedings{Nyffeler_2025_ICCV, author = {Nyffeler, Jon and Tombari, Federico and Barath, Daniel}, title = {Hierarchical 3D Scene Graphs Construction Outdoors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26817-26826} }
SIC: Similarity-Based Interpretable Image Classification with Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wolf_2025_ICCV, author = {Wolf, Tom Nuno and Kavak, Emre and Bongratz, Fabian and Wachinger, Christian}, title = {SIC: Similarity-Based Interpretable Image Classification with Neural Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24276-24285} }
Towards Cross-modal Backward-compatible Representation Learning for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2025_ICCV, author = {Jang, Young Kyun and Lim, Ser-nam}, title = {Towards Cross-modal Backward-compatible Representation Learning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1783-1792} }
Fuse Before Transfer: Knowledge Fusion for Heterogeneous Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Guopeng and Wang, Qiang and Yan, Ke and Ding, Shouhong and Gao, Yuan and Xia, Gui-Song}, title = {Fuse Before Transfer: Knowledge Fusion for Heterogeneous Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3445-3454} }
CoLMDriver: LLM-based Negotiation Benefits Cooperative Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Changxing and Liu, Genjia and Wang, Zijun and Yang, Jinchang and Chen, Siheng}, title = {CoLMDriver: LLM-based Negotiation Benefits Cooperative Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25951-25960} }
Dual Domain Control via Active Learning for Remote Sensing Domain Incremental Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Jiachen and Cheng, De and Yang, Xi and Wang, Nannan}, title = {Dual Domain Control via Active Learning for Remote Sensing Domain Incremental Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3809-3818} }
VideoSetDiff: Identifying and Reasoning Similarities and Differences in Similar Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2025_ICCV, author = {Qiu, Yue and Sun, Yanjun and Yagi, Takuma and Egami, Shusaku and Miyata, Natsuki and Fukuda, Ken and Hara, Kensho and Sagawa, Ryusuke}, title = {VideoSetDiff: Identifying and Reasoning Similarities and Differences in Similar Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12242-12252} }
GRAB: A Challenging GRaph Analysis Benchmark for Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Roberts_2025_ICCV, author = {Roberts, Jonathan and Han, Kai and Albanie, Samuel}, title = {GRAB: A Challenging GRaph Analysis Benchmark for Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1644-1654} }
Transparent Vision: A Theory of Hierarchical Invariant Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2025_ICCV, author = {Qi, Shuren and Zhang, Yushu and Wang, Chao and Xia, Zhihua and Cao, Xiaochun and Fan, Fenglei}, title = {Transparent Vision: A Theory of Hierarchical Invariant Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3435-3444} }
Multi-View 3D Point Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Rajic_2025_ICCV, author = {Raji\v{c}, Frano and Xu, Haofei and Mihajlovic, Marko and Li, Siyuan and Demir, Irem and G\"undo\u{g}du, Emircan and Ke, Lei and Prokudin, Sergey and Pollefeys, Marc and Tang, Siyu}, title = {Multi-View 3D Point Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {59-68} }
InsideOut: Integrated RGB-Radiative Gaussian Splatting for Comprehensive 3D Object Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Jungmin and Hong, Seonghyuk and Lee, Juyong and Lee, Jaeyoon and Choi, Jongwon}, title = {InsideOut: Integrated RGB-Radiative Gaussian Splatting for Comprehensive 3D Object Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25820-25830} }
Splat-LOAM: Gaussian Splatting LiDAR Odometry and Mapping-
[pdf]
[supp]
[bibtex]@InProceedings{Giacomini_2025_ICCV, author = {Giacomini, Emanuele and Di Giammarino, Luca and De Rebotti, Lorenzo and Grisetti, Giorgio and Oswald, Martin R.}, title = {Splat-LOAM: Gaussian Splatting LiDAR Odometry and Mapping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27630-27639} }
LUSD: Localized Update Score Distillation for Text-Guided Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chinchuthakun_2025_ICCV, author = {Chinchuthakun, Worameth and Saengja, Tossaporn and Tritrong, Nontawat and Rewatbowornwong, Pitchaporn and Khungurn, Pramook and Suwajanakorn, Supasorn}, title = {LUSD: Localized Update Score Distillation for Text-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15298-15307} }
ChartCap: Mitigating Hallucination of Dense Chart Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lim_2025_ICCV, author = {Lim, Junyoung and Ahn, Jaewoo and Kim, Gunhee}, title = {ChartCap: Mitigating Hallucination of Dense Chart Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13171-13182} }
RTMap: Real-Time Recursive Mapping with Change Detection and Localization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Yuheng and Yang, Sheng and Wang, Lingxuan and Hou, Zhenghua and Cai, Chengying and Tan, Zhitao and Chen, Mingxia and Huang, Shi-Sheng and Li, Qiang}, title = {RTMap: Real-Time Recursive Mapping with Change Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28021-28030} }
Scaling 3D Compositional Models for Robust Classification and Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Yuan_2025_ICCV, author = {Yuan, Xiaoding and Zhang, Guofeng and Kaushik, Prakhar and Jesslen, Artur and Kortylewski, Adam and Yuille, Alan}, title = {Scaling 3D Compositional Models for Robust Classification and Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6406-6415} }
MultiVerse: A Multi-Turn Conversation Benchmark for Evaluating Large Vision and Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Young-Jun and Lee, Byung-Kwan and Zhang, Jianshu and Hwang, Yechan and Ko, Byungsoo and Kim, Han-Gyu and Yao, Dongyu and Rong, Xuankun and Joo, Eojin and Han, Seung-Ho and Ko, Bowon and Choi, Ho-Jin}, title = {MultiVerse: A Multi-Turn Conversation Benchmark for Evaluating Large Vision and Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {708-719} }
Contact-Aware Refinement of Human Pose Pseudo-Ground Truth via Bioimpedance Sensing-
[pdf]
[bibtex]@InProceedings{Forte_2025_ICCV, author = {Forte, Maria-Paola and Athanasiou, Nikos and Ballardini, Giulia and Bartels, Jan Ulrich and Kuchenbecker, Katherine J. and Black, Michael J.}, title = {Contact-Aware Refinement of Human Pose Pseudo-Ground Truth via Bioimpedance Sensing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5071-5080} }
Embodied VideoAgent: Persistent Memory from Egocentric Videos and Embodied Sensors Enables Dynamic Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Yue and Ma, Xiaojian and Su, Rongpeng and Guo, Jun and Wu, Rujie and Chen, Xi and Li, Qing}, title = {Embodied VideoAgent: Persistent Memory from Egocentric Videos and Embodied Sensors Enables Dynamic Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6342-6352} }
CuRe: Cultural Gaps in the Long Tail of Text-to-Image Systems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rege_2025_ICCV, author = {Rege, Aniket and Nie, Zinnia and Ramesh, Mahesh and Raskar, Unmesh and Yu, Zhuoran and Kusupati, Aditya and Lee, Yong Jae and Vinayak, Ramya Korlakai}, title = {CuRe: Cultural Gaps in the Long Tail of Text-to-Image Systems}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15680-15691} }
Exploiting Domain Properties in Language-Driven Domain Generalization for Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Jeon_2025_ICCV, author = {Jeon, Seogkyu and Hong, Kibeom and Byun, Hyeran}, title = {Exploiting Domain Properties in Language-Driven Domain Generalization for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20791-20801} }
OVG-HQ: Online Video Grounding with Hybrid-modal Queries-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Runhao and Mao, Jiaqi and Lai, Minghao and Phan, Minh Hieu and Dong, Yanjie and Wang, Wei and Chen, Qi and Hu, Xiping}, title = {OVG-HQ: Online Video Grounding with Hybrid-modal Queries}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21085-21096} }
ConformalSAM: Unlocking the Potential of Foundational Segmentation Models in Semi-Supervised Semantic Segmentation with Conformal Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Danhui and Liu, Ziquan and Yang, Chuxi and Wang, Dan and Yan, Yan and Xu, Yi and Ji, Xiangyang}, title = {ConformalSAM: Unlocking the Potential of Foundational Segmentation Models in Semi-Supervised Semantic Segmentation with Conformal Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24045-24055} }
V2PE: Improving Multimodal Long-Context Capability of Vision-Language Models with Variable Visual Position Encoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2025_ICCV, author = {Ge, Junqi and Chen, Ziyi and Lin, Jintao and Zhu, Jinguo and Liu, Xihui and Dai, Jifeng and Zhu, Xizhou}, title = {V2PE: Improving Multimodal Long-Context Capability of Vision-Language Models with Variable Visual Position Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21070-21084} }
PRO-VPT: Distribution-Adaptive Visual Prompt Tuning via Prompt Relocation-
[pdf]
[supp]
[bibtex]@InProceedings{Shang_2025_ICCV, author = {Shang, Chikai and Li, Mengke and Zhang, Yiqun and Chen, Zhen and Wu, Jinlin and Gu, Fangqing and Lu, Yang and Cheung, Yiu-Ming}, title = {PRO-VPT: Distribution-Adaptive Visual Prompt Tuning via Prompt Relocation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1558-1568} }
CoMPaSS: Enhancing Spatial Understanding in Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Gaoyang and Fu, Bingtao and Fan, Qingnan and Zhang, Qi and Liu, Runxing and Gu, Hong and Zhang, Huaqi and Liu, Xinguo}, title = {CoMPaSS: Enhancing Spatial Understanding in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15253-15265} }
AdaDrive: Self-Adaptive Slow-Fast System for Language-Grounded Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Ruifei and Xie, Junlin and Zhang, Wei and Chen, Weikai and Tan, Xiao and Wan, Xiang and Li, Guanbin}, title = {AdaDrive: Self-Adaptive Slow-Fast System for Language-Grounded Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5112-5121} }
Multi-modal Segment Anything Model for Camouflaged Scene Segmentation-
[pdf]
[bibtex]@InProceedings{Ren_2025_ICCV, author = {Ren, Guangyu and Liu, Hengyan and Lazarou, Michalis and Stathaki, Tania}, title = {Multi-modal Segment Anything Model for Camouflaged Scene Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19882-19892} }
LLaVA-PruMerge: Adaptive Token Reduction for Efficient Large Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Shang_2025_ICCV, author = {Shang, Yuzhang and Cai, Mu and Xu, Bingxin and Lee, Yong Jae and Yan, Yan}, title = {LLaVA-PruMerge: Adaptive Token Reduction for Efficient Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22857-22867} }
Event-boosted Deformable 3D Gaussians for Dynamic Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Wenhao and Weng, Wenming and Zhang, Yueyi and Xu, Ruikang and Xiong, Zhiwei}, title = {Event-boosted Deformable 3D Gaussians for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28334-28343} }
Gradient-Reweighted Adversarial Camouflage for Physical Object Detection Evasion-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Jiawei and Liang, Siyuan and Lou, Tianrui and Zhang, Ming and Li, Wenjin and Fan, Dunqiu and Cao, Xiaochun}, title = {Gradient-Reweighted Adversarial Camouflage for Physical Object Detection Evasion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13880-13889} }
Stereo Any Video: Temporally Consistent Stereo Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jing_2025_ICCV, author = {Jing, Junpeng and Luo, Weixun and Mao, Ye and Mikolajczyk, Krystian}, title = {Stereo Any Video: Temporally Consistent Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20836-20846} }
ASCENT: Annotation-free Self-supervised Contrastive Embeddings for 3D Neuron Tracking in Fluorescence Microscopy-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Haejun and Lu, Hang}, title = {ASCENT: Annotation-free Self-supervised Contrastive Embeddings for 3D Neuron Tracking in Fluorescence Microscopy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14676-14687} }
ViLU: Learning Vision-Language Uncertainties for Failure Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Lafon_2025_ICCV, author = {Lafon, Marc and Karmim, Yannis and Silva-Rodr{\'\i}guez, Julio and Couairon, Paul and Rambour, Cl\'ement and Fournier-Sniehotta, Raphael and Ben Ayed, Ismail and Dolz, Jose and Thome, Nicolas}, title = {ViLU: Learning Vision-Language Uncertainties for Failure Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17807-17817} }
A Linear N-Point Solver for Structure and Motion from Asynchronous Tracks-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2025_ICCV, author = {Su, Hang and Feng, Yunlong and Gehrig, Daniel and Jiang, Panfeng and Gao, Ling and Lagorce, Xavier and Kneip, Laurent}, title = {A Linear N-Point Solver for Structure and Motion from Asynchronous Tracks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4339-4348} }
Constraint-Aware Feature Learning for Parametric Point Cloud-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Xi and Lei, Ruiqi and Huang, Di and Liao, Zhichao and Piao, Fengyuan and Chen, Yan and Feng, Pingfa and Zeng, Long}, title = {Constraint-Aware Feature Learning for Parametric Point Cloud}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28114-28124} }
Unleashing Vecset Diffusion Model for Fast Shape Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2025_ICCV, author = {Lai, Zeqiang and Zhao, Yunfei and Zhao, Zibo and Liu, Haolin and Wang, Fuyun and Shi, Huiwen and Yang, Xianghui and Lin, Qingxiang and Huang, Jingwei and Liu, Yuhong and Jiang, Jie and Guo, Chunchao and Yue, Xiangyu}, title = {Unleashing Vecset Diffusion Model for Fast Shape Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2523-2533} }
Revisiting Point Cloud Completion: Are We Ready For The Real-World?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pathak_2025_ICCV, author = {Pathak, Stuti and Kumar, Prashant and Baiju, Dheeraj and Mboga, Nicholus and Steenackers, Gunther and Penne, Rudi}, title = {Revisiting Point Cloud Completion: Are We Ready For The Real-World?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25388-25398} }
RayZer: A Self-supervised Large View Synthesis Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Hanwen and Tan, Hao and Wang, Peng and Jin, Haian and Zhao, Yue and Bi, Sai and Zhang, Kai and Luan, Fujun and Sunkavalli, Kalyan and Huang, Qixing and Pavlakos, Georgios}, title = {RayZer: A Self-supervised Large View Synthesis Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4918-4929} }
Perception-as-Control: Fine-grained Controllable Image Animation with 3D-aware Motion Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yingjie and Men, Yifang and Yao, Yuan and Cui, Miaomiao and Bo, Liefeng}, title = {Perception-as-Control: Fine-grained Controllable Image Animation with 3D-aware Motion Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14380-14389} }
Structure-aware Semantic Discrepancy and Consistency for 3D Medical Image Self-supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2025_ICCV, author = {Pan, Tan and Tan, Zhaorui and Guo, Kaiyu and Xu, Dongli and Xu, Weidi and Jiang, Chen and Guo, Xin and Qi, Yuan and Cheng, Yuan}, title = {Structure-aware Semantic Discrepancy and Consistency for 3D Medical Image Self-supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20257-20267} }
Do It Yourself: Learning Semantic Correspondence from Pseudo-Labels-
[pdf]
[supp]
[bibtex]@InProceedings{Dunkel_2025_ICCV, author = {D\"unkel, Olaf and Wimmer, Thomas and Theobalt, Christian and Rupprecht, Christian and Kortylewski, Adam}, title = {Do It Yourself: Learning Semantic Correspondence from Pseudo-Labels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5834-5844} }
SA-LUT: Spatial Adaptive 4D Look-Up Table for Photorealistic Style Transfer-
[pdf]
[supp]
[bibtex]@InProceedings{Gong_2025_ICCV, author = {Gong, Zerui and Wu, Zhonghua and Tao, Qingyi and Li, Qinyue and Loy, Chen Change}, title = {SA-LUT: Spatial Adaptive 4D Look-Up Table for Photorealistic Style Transfer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18294-18303} }
DuET: Dual Incremental Object Detection via Exemplar-Free Task Arithmetic-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Monga_2025_ICCV, author = {Monga, Munish and Chudasama, Vishal and Wasnik, Pankaj and Banerjee, Biplab}, title = {DuET: Dual Incremental Object Detection via Exemplar-Free Task Arithmetic}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3121-3131} }
BATCLIP: Bimodal Online Test-Time Adaptation for CLIP-
[pdf]
[supp]
[bibtex]@InProceedings{Maharana_2025_ICCV, author = {Maharana, Sarthak and Zhang, Baoming and Karlinsky, Leonid and Feris, Rogerio and Guo, Yunhui}, title = {BATCLIP: Bimodal Online Test-Time Adaptation for CLIP}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1569-1579} }
MagShield: Towards Better Robustness in Sparse Inertial Motion Capture Under Magnetic Disturbances-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2025_ICCV, author = {Shao, Yunzhe and Yi, Xinyu and Yin, Lu and Guo, Shihui and Yong, Junhai and Xu, Feng}, title = {MagShield: Towards Better Robustness in Sparse Inertial Motion Capture Under Magnetic Disturbances}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29021-29030} }
SHIFT: Smoothing Hallucinations by Information Flow Tuning for Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Sudong and Zhang, Yunjian and Zhu, Yao and Liu, Enci and Li, Jianing and Liu, Yanwei and Ji, Xiangyang}, title = {SHIFT: Smoothing Hallucinations by Information Flow Tuning for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3639-3649} }
Bias-Resilient Weakly Supervised Semantic Segmentation Using Normalizing Flows-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2025_ICCV, author = {Qiu, Xianglin and Wang, Xiaoyang and Zhang, Zhen and Xiao, Jimin}, title = {Bias-Resilient Weakly Supervised Semantic Segmentation Using Normalizing Flows}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21321-21330} }
DSO: Aligning 3D Generators with Simulation Feedback for Physical Soundness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Ruining and Zheng, Chuanxia and Rupprecht, Christian and Vedaldi, Andrea}, title = {DSO: Aligning 3D Generators with Simulation Feedback for Physical Soundness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6772-6783} }
Align Your Rhythm: Generating Highly Aligned Dance Poses with Gating-Enhanced Rhythm-Aware Feature Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Congyi and Guan, Jian and Zhao, Xuanjia and Xu, Dongli and Lin, Youtian and Ye, Tong and Feng, Pengming and Pan, Haiwei}, title = {Align Your Rhythm: Generating Highly Aligned Dance Poses with Gating-Enhanced Rhythm-Aware Feature Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13193-13202} }
Stable Score Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Haiming and Xu, Yangyang and Xu, Chenshu and Shen, Tingrui and Liu, Wenxi and Du, Yong and Yu, Jun and He, Shengfeng}, title = {Stable Score Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16597-16606} }
UAVScenes: A Multi-Modal Dataset for UAVs-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Sijie and Li, Siqi and Zhang, Yawei and Yu, Shangshu and Yuan, Shenghai and She, Rui and Guo, Quanjiang and Zheng, JinXuan and Howe, Ong Kang and Chandra, Leonrich and Srijeyan, Shrivarshann and Sivadas, Aditya and Aggarwal, Toshan and Liu, Heyuan and Zhang, Hongming and Chen, Chujie and Jiang, Junyu and Xie, Lihua and Tay, Wee Peng}, title = {UAVScenes: A Multi-Modal Dataset for UAVs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28946-28958} }
FoundIR: Unleashing Million-scale Training Data to Advance Foundation Models for Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Hao and Chen, Xiang and Dong, Jiangxin and Tang, Jinhui and Pan, Jinshan}, title = {FoundIR: Unleashing Million-scale Training Data to Advance Foundation Models for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12626-12636} }
LightsOut: Diffusion-based Outpainting for Enhanced Lens Flare Removal-
[pdf]
[supp]
[bibtex]@InProceedings{Tsai_2025_ICCV, author = {Tsai, Shr-Ruei and Chang, Wei-Cheng and Lee, Jie-Ying and Su, Chih-Hai and Liu, Yu-Lun}, title = {LightsOut: Diffusion-based Outpainting for Enhanced Lens Flare Removal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6353-6363} }
TPG-INR: Target Prior-Guided Implicit 3D CT Reconstruction for Enhanced Sparse-view Imaging-
[pdf]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Qinglei and Tang, Ziyao and Tang, Xiaoqin}, title = {TPG-INR: Target Prior-Guided Implicit 3D CT Reconstruction for Enhanced Sparse-view Imaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28239-28248} }
From Holistic to Localized: Local Enhanced Adapters for Efficient Visual Instruction Fine-Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiao_2025_ICCV, author = {Jiao, Pengkun and Zhu, Bin and Chen, Jingjing and Ngo, Chong-Wah and Jiang, Yu-Gang}, title = {From Holistic to Localized: Local Enhanced Adapters for Efficient Visual Instruction Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2728-2737} }
Ouroboros: Single-step Diffusion Models for Cycle-consistent Forward and Inverse Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Shanlin and Wang, Yifan and Zhang, Hanwen and Xiong, Yifeng and Ren, Qin and Fang, Ruogu and Xie, Xiaohui and You, Chenyu}, title = {Ouroboros: Single-step Diffusion Models for Cycle-consistent Forward and Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10386-10397} }
Photolithography Overlay Map Generation with Implicit Knowledge Distillation Diffusion Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Yuan-Fu and Hsiao, Hsiu-Hui}, title = {Photolithography Overlay Map Generation with Implicit Knowledge Distillation Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15288-15297} }
S4M: Boosting Semi-Supervised Instance Segmentation with SAM-
[pdf]
[supp]
[bibtex]@InProceedings{Yoon_2025_ICCV, author = {Yoon, Heeji and Shin, Heeseong and Hong, Eunbeen and Choi, Hyunwook and Cho, Hansang and Jeong, Daun and Kim, Seungryong}, title = {S4M: Boosting Semi-Supervised Instance Segmentation with SAM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20226-20236} }
Debiased Curriculum Adaptation for Safe Transfer Learning in Chest X-ray Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Mingyang and Chen, Xinyang and Shu, Yang and Li, Xiucheng and Guan, Weili and Nie, Liqiang}, title = {Debiased Curriculum Adaptation for Safe Transfer Learning in Chest X-ray Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22610-22619} }
Diorama: Unleashing Zero-shot Single-view 3D Indoor Scene Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Qirui and Iliash, Denys and Ritchie, Daniel and Savva, Manolis and Chang, Angel X.}, title = {Diorama: Unleashing Zero-shot Single-view 3D Indoor Scene Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8896-8907} }
Boosting Multi-View Indoor 3D Object Detection via Adaptive 3D Volume Construction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Runmin and Yu, Zhu and Cao, Si-Yuan and Zhu, Lingyu and Zhang, Guangyi and Bai, Xiaokai and Shen, Hui-Liang}, title = {Boosting Multi-View Indoor 3D Object Detection via Adaptive 3D Volume Construction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5980-5989} }
VQ-VLA: Improving Vision-Language-Action Models via Scaling Vector-Quantized Action Tokenizers-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yating and Zhu, Haoyi and Liu, Mingyu and Yang, Jiange and Fang, Hao-Shu and He, Tong}, title = {VQ-VLA: Improving Vision-Language-Action Models via Scaling Vector-Quantized Action Tokenizers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11089-11099} }
Quadratic Gaussian Splatting: High Quality Surface Reconstruction with Second-order Geometric Primitives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Ziyu and Huang, Binbin and Jiang, Hanqing and Zhou, Liyang and Xiang, Xiaojun and Shen, Shuhan}, title = {Quadratic Gaussian Splatting: High Quality Surface Reconstruction with Second-order Geometric Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28260-28270} }
UDC-VIT: A Real-World Video Dataset for Under-Display Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Ahn_2025_ICCV, author = {Ahn, Kyusu and Kim, JiSoo and Lee, Sangik and Lee, HyunGyu and Ko, Byeonghyun and Park, Chanwoo and Lee, Jaejin}, title = {UDC-VIT: A Real-World Video Dataset for Under-Display Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10950-10960} }
Prompt Guidance and Human Proximal Perception for HOT Prediction with Regional Joint Loss-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuxiao and Lei, Yu and Wei, Zhenao and Xue, Weiying and Jiang, Xinyu and Zhuang, Nan and Liu, Qi}, title = {Prompt Guidance and Human Proximal Perception for HOT Prediction with Regional Joint Loss}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23636-23645} }
Towards Omnimodal Expressions and Reasoning in Referring Audio-Visual Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ying_2025_ICCV, author = {Ying, Kaining and Ding, Henghui and Jie, Guangquan and Jiang, Yu-Gang}, title = {Towards Omnimodal Expressions and Reasoning in Referring Audio-Visual Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22575-22585} }
Gait-X: Exploring X modality for Generalized Gait Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zengbin and Hou, Saihui and Li, Junjie and Liu, Xu and Cao, Chunshui and Huang, Yongzhen and Wang, Siye and Zhang, Man}, title = {Gait-X: Exploring X modality for Generalized Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13259-13269} }
Learning Separable Fine-Grained Representation via Dendrogram Construction from Coarse Labels for Fine-grained Visual Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Guanghui and Liang, Xuefeng and Li, Wenjie and Lin, Xiaoyu}, title = {Learning Separable Fine-Grained Representation via Dendrogram Construction from Coarse Labels for Fine-grained Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {870-879} }
DyGS-SLAM: Real-Time Accurate Localization and Gaussian Reconstruction for Dynamic Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Xinggang and Zhang, Chenyangguang and Zhao, Mingyuan and Gui, Yuanze and Zhang, Xiangkui and Ji, Xiangyang}, title = {DyGS-SLAM: Real-Time Accurate Localization and Gaussian Reconstruction for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9561-9571} }
D3QE: Learning Discrete Distribution Discrepancy-aware Quantization Error for Autoregressive-Generated Image Detection-
[pdf]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yanran and Yu, Bingyao and Zheng, Yu and Zheng, Wenzhao and Duan, Yueqi and Chen, Lei and Zhou, Jie and Lu, Jiwen}, title = {D3QE: Learning Discrete Distribution Discrepancy-aware Quantization Error for Autoregressive-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16292-16301} }
Towards a Universal 3D Medical Multi-modality Generalization via Learning Personalized Invariant Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2025_ICCV, author = {Tan, Zhaorui and Yang, Xi and Pan, Tan and Liu, Tianyi and Jiang, Chen and Guo, Xin and Wang, Qiufeng and Nguyen, Anh and Qi, Yuan and Huang, Kaizhu and Cheng, Yuan}, title = {Towards a Universal 3D Medical Multi-modality Generalization via Learning Personalized Invariant Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21895-21905} }
StrandHead: Text to Hair-Disentangled 3D Head Avatars Using Human-Centric Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Xiaokun and Cai, Zeyu and Tai, Ying and Yang, Jian and Zhang, Zhenyu}, title = {StrandHead: Text to Hair-Disentangled 3D Head Avatars Using Human-Centric Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13393-13404} }
MCAM: Multimodal Causal Analysis Model for Ego-Vehicle-Level Driving Video Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Tongtong and Li, Rongzhen and Xiong, Yixin and Zhang, Tao and Wang, Jing and Liu, Kai}, title = {MCAM: Multimodal Causal Analysis Model for Ego-Vehicle-Level Driving Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5479-5489} }
Adversarial Robust Memory-Based Continual Learner-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mi_2025_ICCV, author = {Mi, Xiaoyue and Tang, Fan and Yang, Zonghan and Wang, Danding and Cao, Juan and Li, Peng and Liu, Yang}, title = {Adversarial Robust Memory-Based Continual Learner}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {562-572} }
SurfaceSplat: Connecting Surface Reconstruction and Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Zihui and Bian, Jia-Wang and Lin, Guosheng and Chen, Hao and Shen, Chunhua}, title = {SurfaceSplat: Connecting Surface Reconstruction and Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28525-28534} }
TransiT: Transient Transformer for Non-line-of-sight Videography-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Ruiqian and Shen, Siyuan and Xia, Suan and Wang, Ziheng and Peng, Xingyue and Song, Chengxuan and Zhu, Yingsheng and Wu, Tao and Li, Shiying and Yu, Jingyi}, title = {TransiT: Transient Transformer for Non-line-of-sight Videography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27542-27551} }
Penalizing Boundary Activation for Object Completeness in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Haoyang and Zhao, Tianhao and Yang, Sibei and Lin, Yutian}, title = {Penalizing Boundary Activation for Object Completeness in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14962-14972} }
DMQ: Dissecting Outliers of Diffusion Models for Post-Training Quantization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Dongyeun and Hur, Jiwan and Shon, Hyounguk and Lee, Jae Young and Kim, Junmo}, title = {DMQ: Dissecting Outliers of Diffusion Models for Post-Training Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18510-18520} }
QK-Edit: Revisiting Attention-based Injection in MM-DiT for Image and Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_ICCV, author = {Shen, Tiancheng and Huang, Zilong and Li, Xiangtai and Lin, Zhijie and Liu, Jiyang and Wang, Yitong and Feng, Jiashi and Yang, Ming-Hsuan and Liew, Jun Hao}, title = {QK-Edit: Revisiting Attention-based Injection in MM-DiT for Image and Video Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19043-19053} }
Visual Chronicles: Using Multimodal LLMs to Analyze Massive Collections of Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2025_ICCV, author = {Deng, Boyang and Peng, Songyou and Genova, Kyle and Wetzstein, Gordon and Snavely, Noah and Guibas, Leonidas and Funkhouser, Thomas}, title = {Visual Chronicles: Using Multimodal LLMs to Analyze Massive Collections of Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12769-12778} }
SAFT: Shape and Appearance of Fabrics from Template via Differentiable Physical Simulations from Monocular Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stotko_2025_ICCV, author = {Stotko, David and Klein, Reinhard}, title = {SAFT: Shape and Appearance of Fabrics from Template via Differentiable Physical Simulations from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27660-27670} }
Gradient Short-Circuit: Efficient Out-of-Distribution Detection via Feature Intervention-
[pdf]
[supp]
[bibtex]@InProceedings{Gu_2025_ICCV, author = {Gu, Jiawei and Qiao, Ziyue and Li, Zechao}, title = {Gradient Short-Circuit: Efficient Out-of-Distribution Detection via Feature Intervention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {457-466} }
AD-GS: Object-Aware B-Spline Gaussian Splatting for Self-Supervised Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Jiawei and Deng, Kai and Fan, Zexin and Wang, Shenlong and Xie, Jin and Yang, Jian}, title = {AD-GS: Object-Aware B-Spline Gaussian Splatting for Self-Supervised Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24770-24779} }
HRScene: How Far Are VLMs from Effective High-Resolution Image Understanding?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yusen and Zheng, Wenliang and Madasu, Aashrith and Shi, Peng and Kamoi, Ryo and Zhou, Hao and Zou, Zhuoyang and Zhao, Shu and Das, Sarkar Snigdha Sarathi and Gupta, Vipul and Lu, Xiaoxin and Zhang, Nan and Zhang, Ranran Haoran and Iyer, Avitej and Lou, Renze and Yin, Wenpeng and Zhang, Rui}, title = {HRScene: How Far Are VLMs from Effective High-Resolution Image Understanding?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22922-22933} }
Decoding Correlation-Induced Misalignment in the Stable Diffusion Workflow for Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Tong_2025_ICCV, author = {Tong, Yunze and Zhang, Fengda and Zhu, Didi and Xiao, Jun and Kuang, Kun}, title = {Decoding Correlation-Induced Misalignment in the Stable Diffusion Workflow for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18187-18196} }
CityNav: A Large-Scale Dataset for Real-World Aerial Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Jungdae and Miyanishi, Taiki and Kurita, Shuhei and Sakamoto, Koya and Azuma, Daichi and Matsuo, Yutaka and Inoue, Nakamasa}, title = {CityNav: A Large-Scale Dataset for Real-World Aerial Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5912-5922} }
Neuromanifold-Regularized KANs for Shape-fair Feature Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Arslan_2025_ICCV, author = {Arslan, Mazlum Ferhat and Guo, Weihong and Li, Shuo}, title = {Neuromanifold-Regularized KANs for Shape-fair Feature Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12790-12799} }
Diffusion Guided Adaptive Augmentation for Generalization in Visual Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Jeong Woon and Hwang, Hyoseok}, title = {Diffusion Guided Adaptive Augmentation for Generalization in Visual Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {880-889} }
Graph Domain Adaptation with Dual-branch Encoder and Two-level Alignment for Whole Slide Image-based Survival Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shou_2025_ICCV, author = {Shou, Yuntao and Cao, Xiangyong and Yan, Peiqiang and Hui, Qiao and Zhao, Qian and Meng, Deyu}, title = {Graph Domain Adaptation with Dual-branch Encoder and Two-level Alignment for Whole Slide Image-based Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19925-19935} }
Vision-Language Interactive Relation Mining for Open-Vocabulary Scene Graph Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Min_2025_ICCV, author = {Min, Yukuan and Yang, Muli and Zhang, Jinhao and Wang, Yuxuan and Wu, Aming and Deng, Cheng}, title = {Vision-Language Interactive Relation Mining for Open-Vocabulary Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16755-16764} }
FedAGC: Federated Continual Learning with Asymmetric Gradient Correction-
[pdf]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Chengchao and Shang, Fanhua and Liu, Hongying and Wan, Liang and Feng, Wei}, title = {FedAGC: Federated Continual Learning with Asymmetric Gradient Correction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3841-3850} }
Free-running vs Synchronous: Single-Photon Lidar for High-flux 3D Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kitichotkul_2025_ICCV, author = {Kitichotkul, Ruangrawee and Bharadwaj, Shashwath and Rapp, Joshua and Ma, Yanting and Mehta, Alexander and Goyal, Vivek K}, title = {Free-running vs Synchronous: Single-Photon Lidar for High-flux 3D Imaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25972-25982} }
Continuous-Time Human Motion Field from Event Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Ziyun and Zhang, Ruijun and Liu, Zi-Yan and Wang, Yufu and Daniilidis, Kostas}, title = {Continuous-Time Human Motion Field from Event Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11502-11512} }
MagicMirror: ID-Preserved Video Generation in Video Diffusion Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yuechen and Liu, Yaoyang and Xia, Bin and Peng, Bohao and Yan, Zexin and Lo, Eric and Jia, Jiaya}, title = {MagicMirror: ID-Preserved Video Generation in Video Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14464-14474} }
Unified Category-Level Object Detection and Pose Estimation from RGB Images using 3D Prototypes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fischer_2025_ICCV, author = {Fischer, Tom and Zhang, Xiaojie and Ilg, Eddy}, title = {Unified Category-Level Object Detection and Pose Estimation from RGB Images using 3D Prototypes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9790-9800} }
GT-Mean Loss: A Simple Yet Effective Solution for Brightness Mismatch in Low-Light Image Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_ICCV, author = {Liao, Jingxi and Hao, Shijie and Hong, Richang and Wang, Meng}, title = {GT-Mean Loss: A Simple Yet Effective Solution for Brightness Mismatch in Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6112-6121} }
OpenVision: A Fully-Open, Cost-Effective Family of Advanced Vision Encoders for Multimodal Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Xianhang and Liu, Yanqing and Tu, Haoqin and Xie, Cihang}, title = {OpenVision: A Fully-Open, Cost-Effective Family of Advanced Vision Encoders for Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3977-3987} }
Unified Multimodal Understanding via Byte-Pair Visual Encoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wanpeng and Feng, Yicheng and Luo, Hao and Li, Yijiang and Yue, Zihao and Zheng, Sipeng and Lu, Zongqing}, title = {Unified Multimodal Understanding via Byte-Pair Visual Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12976-12986} }
MobileIE: An Extremely Lightweight and Effective ConvNet for Real-Time Image Enhancement on Mobile Devices-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_ICCV, author = {Yan, Hailong and Li, Ao and Zhang, Xiangtao and Liu, Zhe and Shi, Zenglin and Zhu, Ce and Zhang, Le}, title = {MobileIE: An Extremely Lightweight and Effective ConvNet for Real-Time Image Enhancement on Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21949-21960} }
Learning Pixel-adaptive Multi-layer Perceptrons for Real-time Image Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lou_2025_ICCV, author = {Lou, Junyu and Zhao, Xiaorui and Shi, Kexuan and Gu, Shuhang}, title = {Learning Pixel-adaptive Multi-layer Perceptrons for Real-time Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14095-14105} }
ACE-G: Improving Generalization of Scene Coordinate Regression Through Query Pre-Training-
[pdf]
[supp]
[bibtex]@InProceedings{Bruns_2025_ICCV, author = {Bruns, Leonard and Barroso-Laguna, Axel and Cavallari, Tommaso and Monszpart, Aron and Munukutla, Sowmya and Prisacariu, Victor Adrian and Brachmann, Eric}, title = {ACE-G: Improving Generalization of Scene Coordinate Regression Through Query Pre-Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26751-26761} }
Everything is a Video: Unifying Modalities through Next-Frame Prediction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hudson_2025_ICCV, author = {Hudson, G. Thomas and Slack, Dean and Winterbottom, Thomas and Sterling, Jamie and Xiao, Chenghao and Shentu, Junjie and Al Moubayed, Noura}, title = {Everything is a Video: Unifying Modalities through Next-Frame Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22004-22013} }
ViT-EnsembleAttack: Augmenting Ensemble Models for Stronger Adversarial Transferability in Vision Transformers-
[pdf]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Hanwen and Lu, Haobo and Wang, Xiaosen and He, Kun}, title = {ViT-EnsembleAttack: Augmenting Ensemble Models for Stronger Adversarial Transferability in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2000-2009} }
LayerTracer: Cognitive-Aligned Layered SVG Synthesis via Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_ICCV, author = {Song, Yiren and Chen, Danze and Shou, Mike Zheng}, title = {LayerTracer: Cognitive-Aligned Layered SVG Synthesis via Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19731-19741} }
TopoTTA: Topology-Enhanced Test-Time Adaptation for Tubular Structure Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Jiale and Wang, Wenhan and Li, Shikun and Qu, Xiaolei and Guo, Xin and Liu, Yizhong and Tang, Wenzhong and Lin, Xun and Zheng, Yefeng}, title = {TopoTTA: Topology-Enhanced Test-Time Adaptation for Tubular Structure Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24123-24134} }
Counting Stacked Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Dumery_2025_ICCV, author = {Dumery, Corentin and Ett\'e, Noa and Fan, Aoxiang and Li, Ren and Xu, Jingyi and Le, Hieu and Fua, Pascal}, title = {Counting Stacked Objects}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19774-19783} }
VOccl3D: A Video Benchmark Dataset for 3D Human Pose and Shape Estimation under real Occlusions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Garg_2025_ICCV, author = {Garg, Yash and Bachu, Saketh and Dutta, Arindam and Lal, Rohit and Bose, Sarosij and Ta, Calvin-Khang and Asif, M. Salman and Roy-Chowdhury, Amit}, title = {VOccl3D: A Video Benchmark Dataset for 3D Human Pose and Shape Estimation under real Occlusions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7350-7360} }
PoseAnchor: Robust Root Position Estimation for 3D Human Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jun-Hee and Han, Jumin and Lee, Seong-Whan}, title = {PoseAnchor: Robust Root Position Estimation for 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7079-7088} }
Sparfels: Fast Reconstruction from Sparse Unposed Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jena_2025_ICCV, author = {Jena, Shubhendu and Ouasfi, Amine and Younes, Mae and Boukhayma, Adnane}, title = {Sparfels: Fast Reconstruction from Sparse Unposed Imagery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27476-27487} }
Unsupervised Visual Chain-of-Thought Reasoning via Preference Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Kesen and Zhu, Beier and Sun, Qianru and Zhang, Hanwang}, title = {Unsupervised Visual Chain-of-Thought Reasoning via Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2303-2312} }
On the Complexity-Faithfulness Trade-off of Gradient-Based Explanations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mehrpanah_2025_ICCV, author = {Mehrpanah, Amir and Gamba, Matteo and Smith, Kevin and Azizpour, Hossein}, title = {On the Complexity-Faithfulness Trade-off of Gradient-Based Explanations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3531-3541} }
Adaptive Dual Uncertainty Optimization: Boosting Monocular 3D Object Detection under Test-Time Shifts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Zixuan and Li, Dongxiao and Ma, Xinzhu and Tang, Shixiang and Li, Xiaotong and Yang, Wenhan and Duan, Ling-Yu}, title = {Adaptive Dual Uncertainty Optimization: Boosting Monocular 3D Object Detection under Test-Time Shifts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7273-7283} }
RayPose: Ray Bundling Diffusion for Template Views in Unseen 6D Object Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Junwen and Vutukur, Shishir Reddy and Yu, Peter KT and Navab, Nassir and Ilic, Slobodan and Busam, Benjamin}, title = {RayPose: Ray Bundling Diffusion for Template Views in Unseen 6D Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9102-9112} }
DuoLoRA : Cycle-consistent and Rank-disentangled Content-Style Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Roy_2025_ICCV, author = {Roy, Aniket and Borse, Shubhankar and Kadambi, Shreya and Das, Debasmit and Mahajan, Shweta and Garrepalli, Risheek and Park, Hyojin and Nayak, Ankita and Chellappa, Rama and Hayat, Munawar and Porikli, Fatih}, title = {DuoLoRA : Cycle-consistent and Rank-disentangled Content-Style Personalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15395-15404} }
F-Bench: Rethinking Human Preference Evaluation Metrics for Benchmarking Face Generation, Customization, and Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Lu and Duan, Huiyu and Hu, Qiang and Yang, Liu and Cai, Chunlei and Ye, Tianxiao and Liu, Huayu and Zhang, Xiaoyun and Zhai, Guangtao}, title = {F-Bench: Rethinking Human Preference Evaluation Metrics for Benchmarking Face Generation, Customization, and Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10982-10994} }
MikuDance: Animating Character Art with Mixed Motion Dynamics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jiaxu and Zeng, Xianfang and Chen, Xin and Zuo, Wei and Yu, Gang and Tu, Zhigang}, title = {MikuDance: Animating Character Art with Mixed Motion Dynamics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19689-19699} }
GaussRender: Learning 3D Occupancy with Gaussian Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chambon_2025_ICCV, author = {Chambon, Loick and Zablocki, Eloi and Boulch, Alexandre and Chen, Mickael and Cord, Matthieu}, title = {GaussRender: Learning 3D Occupancy with Gaussian Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27010-27020} }
UniDxMD: Towards Unified Representation for Cross-Modal Unsupervised Domain Adaptation in 3D Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Zhengyin and Yin, Hui and Liang, Min and Du, Qianqian and Yang, Ying and Huang, Hua}, title = {UniDxMD: Towards Unified Representation for Cross-Modal Unsupervised Domain Adaptation in 3D Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20346-20356} }
GFPack++: Attention-Driven Gradient Fields for Optimizing 2D Irregular Packing-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2025_ICCV, author = {Xue, Tianyang and Lu, Lin and Liu, Yang and Wu, Mingdong and Dong, Hao and Zhang, Yanbin and Han, Renmin and Chen, Baoquan}, title = {GFPack++: Attention-Driven Gradient Fields for Optimizing 2D Irregular Packing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18014-18023} }
PLA: Prompt Learning Attack against Text-to-Image Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lyu_2025_ICCV, author = {Lyu, Xinqi and Liu, Yihao and Li, Yanjie and Xiao, Bin}, title = {PLA: Prompt Learning Attack against Text-to-Image Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16851-16860} }
HumanOLAT: A Large-Scale Dataset for Full-Body Human Relighting and Novel-View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Teufel_2025_ICCV, author = {Teufel, Timo and Gera, Pulkit and Zhou, Xilong and Iqbal, Umar and Rao, Pramod and Kautz, Jan and Golyanik, Vladislav and Theobalt, Christian}, title = {HumanOLAT: A Large-Scale Dataset for Full-Body Human Relighting and Novel-View Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29131-29141} }
Integrating Task-Specific and Universal Adapters for Pre-Trained Model-based Class-Incremental Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yan and Zhou, Da-Wei and Ye, Han-Jia}, title = {Integrating Task-Specific and Universal Adapters for Pre-Trained Model-based Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {806-816} }
RoboFactory: Exploring Embodied Agent Collaboration with Compositional Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2025_ICCV, author = {Qin, Yiran and Kang, Li and Song, Xiufeng and Yin, Zhenfei and Liu, Xiaohong and Liu, Xihui and Zhang, Ruimao and Bai, Lei}, title = {RoboFactory: Exploring Embodied Agent Collaboration with Compositional Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10075-10085} }
Momentum-GS: Momentum Gaussian Self-Distillation for High-Quality Large Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Jixuan and Li, Wanhua and Han, Yifei and Dai, Tianru and Tang, Yansong}, title = {Momentum-GS: Momentum Gaussian Self-Distillation for High-Quality Large Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25250-25260} }
SEHDR: Single-Exposure HDR Novel View Synthesis via 3D Gaussian Bracketing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yiyu and Wang, Haoyuan and Xu, Ke and Hancke, Gerhard Petrus and Lau, Rynson W.H.}, title = {SEHDR: Single-Exposure HDR Novel View Synthesis via 3D Gaussian Bracketing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26045-26054} }
TriDi: Trilateral Diffusion of 3D Humans, Objects, and Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Petrov_2025_ICCV, author = {Petrov, Ilya A. and Marin, Riccardo and Chibane, Julian and Pons-Moll, Gerard}, title = {TriDi: Trilateral Diffusion of 3D Humans, Objects, and Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5523-5535} }
GEOBench-VLM: Benchmarking Vision-Language Models for Geospatial Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Danish_2025_ICCV, author = {Danish, Muhammad and Munir, Muhammad Akhtar and Shah, Syed Roshaan Ali and Kuckreja, Kartik and Khan, Fahad Shahbaz and Fraccaro, Paolo and Lacoste, Alexandre and Khan, Salman}, title = {GEOBench-VLM: Benchmarking Vision-Language Models for Geospatial Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7132-7142} }
UPP: Unified Point-Level Prompting for Robust Point Cloud Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ai_2025_ICCV, author = {Ai, Zixiang and Cui, Zhenyu and Peng, Yuxin and Zhou, Jiahuan}, title = {UPP: Unified Point-Level Prompting for Robust Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27359-27368} }
HypDAE: Hyperbolic Diffusion Autoencoders for Hierarchical Few-shot Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Lingxiao and Fan, Kaixuan and Gong, Boqing and Yue, Xiangyu}, title = {HypDAE: Hyperbolic Diffusion Autoencoders for Hierarchical Few-shot Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17119-17128} }
High-Resolution Spatiotemporal Modeling with Global-Local State Space Models for Video-Based Human Pose Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Feng_2025_ICCV, author = {Feng, Runyang and Chang, Hyung Jin and Tse, Tze Ho Elden and Kim, Boeun and Chang, Yi and Gao, Yixing}, title = {High-Resolution Spatiotemporal Modeling with Global-Local State Space Models for Video-Based Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8929-8938} }
ShadowHack: Hacking Shadows via Luminance-Color Divide and Conquer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Jin and Li, Mingjia and Guo, Xiaojie}, title = {ShadowHack: Hacking Shadows via Luminance-Color Divide and Conquer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11403-11413} }
REPA-E: Unlocking VAE for End-to-End Tuning of Latent Diffusion Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Leng_2025_ICCV, author = {Leng, Xingjian and Singh, Jaskirat and Hou, Yunzhong and Xing, Zhenchang and Xie, Saining and Zheng, Liang}, title = {REPA-E: Unlocking VAE for End-to-End Tuning of Latent Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18262-18272} }
MOSAIC: Generating Consistent, Privacy-Preserving Scenes from Multiple Depth Views in Multi-Room Environments-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Zhixuan and Zhu, Haokun and Chen, Rui and Francis, Jonathan and Hwang, Soonmin and Zhang, Ji and Oh, Jean}, title = {MOSAIC: Generating Consistent, Privacy-Preserving Scenes from Multiple Depth Views in Multi-Room Environments}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27456-27465} }
RoBridge: A Hierarchical Architecture Bridging Cognition and Execution for General Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Kaidong and Xu, Rongtao and Ren, Pengzhen and Lin, Junfan and Wu, Hefeng and Lin, Liang and Liang, Xiaodan}, title = {RoBridge: A Hierarchical Architecture Bridging Cognition and Execution for General Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14590-14601} }
PersonaCraft: Personalized and Controllable Full-Body Multi-Human Scene Generation Using Occlusion-Aware 3D-Conditioned Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Gwanghyun and Jeon, Suh Yoon and Lee, Seunggyu and Chun, Se Young}, title = {PersonaCraft: Personalized and Controllable Full-Body Multi-Human Scene Generation Using Occlusion-Aware 3D-Conditioned Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12034-12044} }
Controllable and Expressive One-Shot Video Head Swapping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2025_ICCV, author = {Ji, Chaonan and Qi, Jinwei and Zhang, Peng and Zhang, Bang and Bo, Liefeng}, title = {Controllable and Expressive One-Shot Video Head Swapping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10239-10250} }
SkySense V2: A Unified Foundation Model for Multi-modal Remote Sensing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yingying and Ru, Lixiang and Wu, Kang and Yu, Lei and Liang, Lei and Li, Yansheng and Chen, Jingdong}, title = {SkySense V2: A Unified Foundation Model for Multi-modal Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9136-9146} }
Towards Video Thinking Test: A Holistic Benchmark for Advanced Video Reasoning and Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yuanhan and Chew, Yunice and Dong, Yuhao and Leo, Aria and Hu, Bo and Liu, Ziwei}, title = {Towards Video Thinking Test: A Holistic Benchmark for Advanced Video Reasoning and Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20626-20636} }
Intra-view and Inter-view Correlation Guided Multi-view Novel Class Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wan_2025_ICCV, author = {Wan, Xinhang and Liu, Jiyuan and Qu, Qian and Liu, Suyuan and Zhang, Chuyu and Wang, Fangdi and Liu, Xinwang and Zhu, En and He, Kunlun}, title = {Intra-view and Inter-view Correlation Guided Multi-view Novel Class Discovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4114-4124} }
Decoupled Diffusion Sparks Adaptive Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Yunsong and Ye, Naisheng and Ljungbergh, William and Li, Tianyu and Yang, Jiazhi and Yang, Zetong and Zhu, Hongzi and Petersson, Christoffer and Li, Hongyang}, title = {Decoupled Diffusion Sparks Adaptive Scene Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27760-27770} }
Unsupervised Joint Learning of Optical Flow and Intensity with Event Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Shuang and Hamann, Friedhelm and Gallego, Guillermo}, title = {Unsupervised Joint Learning of Optical Flow and Intensity with Event Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7980-7989} }
Intra-modal and Cross-modal Synchronization for Audio-visual Deepfake Detection and Temporal Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Anshul_2025_ICCV, author = {Anshul, Ashutosh and Gopal, Shreyas and Rajan, Deepu and Chng, Eng Siong}, title = {Intra-modal and Cross-modal Synchronization for Audio-visual Deepfake Detection and Temporal Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13826-13836} }
YOLO-Count: Differentiable Object Counting for Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Guanning and Zhang, Xiang and Wang, Zirui and Xu, Haiyang and Chen, Zeyuan and Li, Bingnan and Tu, Zhuowen}, title = {YOLO-Count: Differentiable Object Counting for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16765-16775} }
PriorMotion: Generative Class-Agnostic Motion Prediction with Raster-Vector Motion Field Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2025_ICCV, author = {Qian, Kangan and Miao, Jinyu and Jiao, Xinyu and Luo, Ziang and Fu, Zheng and Shi, Yining and Wang, Yunlong and Jiang, Kun and Yang, Diange}, title = {PriorMotion: Generative Class-Agnostic Motion Prediction with Raster-Vector Motion Field Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27284-27294} }
Scaling Tumor Segmentation: Best Lessons from Real and Synthetic Data-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Qi and Zhou, Xinze and Liu, Chen and Chen, Hao and Li, Wenxuan and Jiang, Zekun and Huang, Ziyan and Zhao, Yuxuan and Yu, Dexin and He, Junjun and Zheng, Yefeng and Shao, Ling and Yuille, Alan and Zhou, Zongwei}, title = {Scaling Tumor Segmentation: Best Lessons from Real and Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24001-24013} }
DCT-Shield: A Robust Frequency Domain Defense against Malicious Image Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Bala_2025_ICCV, author = {Bala, Aniruddha and Chowdhury, Rohit and Jaiswal, Rohan and Roheda, Siddharth}, title = {DCT-Shield: A Robust Frequency Domain Defense against Malicious Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18876-18884} }
MobileViCLIP: An Efficient Video-Text Model for Mobile Devices-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Min and Jia, Zihan and Dai, Zhilin and Guo, Sheng and Wang, Limin}, title = {MobileViCLIP: An Efficient Video-Text Model for Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20824-20835} }
SignRep: Enhancing Self-Supervised Sign Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wong_2025_ICCV, author = {Wong, Ryan and Camgoz, Necati Cihan and Bowden, Richard}, title = {SignRep: Enhancing Self-Supervised Sign Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22804-22814} }
Towards Comprehensive Lecture Slides Understanding: Large-scale Dataset and Effective Method-
[pdf]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Enming and Li, Yuzhe and Liu, Yuliang and Zhu, Yingying and Bai, Xiang}, title = {Towards Comprehensive Lecture Slides Understanding: Large-scale Dataset and Effective Method}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4455-4464} }
Visual Relation Diffusion for Human-Object Interaction Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Ping and Tang, Yepeng and Zhang, Chunjie and Zheng, Xiaolong and Liang, Chao and Wei, Yunchao and Zhao, Yao}, title = {Visual Relation Diffusion for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23551-23560} }
Egocentric Action-aware Inertial Localization in Point Clouds with Vision-Language Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Mingfang and Yonetani, Ryo and Huang, Yifei and Ouyang, Liangyang and Liu, Ruicong and Sato, Yoichi}, title = {Egocentric Action-aware Inertial Localization in Point Clouds with Vision-Language Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27209-27219} }
Hybrid-TTA: Continual Test-time Adaptation via Dynamic Domain Shift Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Hyewon and Park, Hyejin and Ko, Jueun and Min, Dongbo}, title = {Hybrid-TTA: Continual Test-time Adaptation via Dynamic Domain Shift Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2877-2886} }
NavQ: Learning a Q-Model for Foresighted Vision-and-Language Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Peiran and Gong, Xicheng and Mu, Yadong}, title = {NavQ: Learning a Q-Model for Foresighted Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6327-6341} }
PartField: Learning 3D Feature Fields for Part Segmentation and Beyond-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Minghua and Uy, Mikaela Angelina and Xiang, Donglai and Su, Hao and Fidler, Sanja and Sharp, Nicholas and Gao, Jun}, title = {PartField: Learning 3D Feature Fields for Part Segmentation and Beyond}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9704-9715} }
Anti-Tamper Protection for Unauthorized Individual Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zelin and Zong, Ruohan and Liu, Yifan and Yao, Ruichen and Liu, Yaokun and Zhang, Yang and Wang, Dong}, title = {Anti-Tamper Protection for Unauthorized Individual Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15501-15510} }
EgoAdapt: Adaptive Multisensory Distillation and Policy Learning for Efficient Egocentric Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chowdhury_2025_ICCV, author = {Chowdhury, Sanjoy and Biswas, Subrata and Nag, Sayan and Nagarajan, Tushar and Murdock, Calvin and Ananthabhotla, Ishwarya and Qian, Yijun and Ithapu, Vamsi Krishna and Manocha, Dinesh and Gao, Ruohan}, title = {EgoAdapt: Adaptive Multisensory Distillation and Policy Learning for Efficient Egocentric Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10741-10752} }
EVT: Efficient View Transformation for Multi-Modal 3D Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Yongjin and Jeong, Hyeon-Mun and Jeon, Yurim and Kim, Sanghyun}, title = {EVT: Efficient View Transformation for Multi-Modal 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26632-26642} }
Mitigating Object Hallucinations via Sentence-Level Early Intervention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_ICCV, author = {Peng, Shangpin and Yang, Senqiao and Jiang, Li and Tian, Zhuotao}, title = {Mitigating Object Hallucinations via Sentence-Level Early Intervention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {635-646} }
RobAVA: A Large-scale Dataset and Baseline Towards Video based Robotic Arm Action Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Baoli and Wang, Ning and Ma, Xinzhu and Zou, Anqi and Lu, Yihang and Fan, Chuixuan and Wang, Zhihui and Lu, Kun and Wang, Zhiyong}, title = {RobAVA: A Large-scale Dataset and Baseline Towards Video based Robotic Arm Action Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13985-13994} }
DisenQ: Disentangling Q-Former for Activity-Biometrics-
[pdf]
[supp]
[bibtex]@InProceedings{Azad_2025_ICCV, author = {Azad, Shehreen and Rawat, Yogesh Singh}, title = {DisenQ: Disentangling Q-Former for Activity-Biometrics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13502-13512} }
Vision-Language Neural Graph Featurization for Extracting Retinal Lesions-
[pdf]
[bibtex]@InProceedings{Hassan_2025_ICCV, author = {Hassan, Taimur and Sohail, Anabia and Naseer, Muzammal and Werghi, Naoufel}, title = {Vision-Language Neural Graph Featurization for Extracting Retinal Lesions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23700-23709} }
PhysSplat: Efficient Physics Simulation for 3D Scenes via MLLM-Guided Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Haoyu and Wang, Hao and Zhao, Xingyue and Fei, Hao and Wang, Hongqiu and Long, Chengjiang and Zou, Hua}, title = {PhysSplat: Efficient Physics Simulation for 3D Scenes via MLLM-Guided Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5242-5252} }
Zero-Shot Depth Aware Image Editing with Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Parihar_2025_ICCV, author = {Parihar, Rishubh and VS, Sachidanand and Babu, R. Venkatesh}, title = {Zero-Shot Depth Aware Image Editing with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15748-15759} }
OrderChain: Towards General Instruct-Tuning for Stimulating the Ordinal Understanding Ability of MLLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Jinhong and Tong, Shuo and Liu, Jian and Tang, Dongqi and Wang, Weiqiang and Li, Wentong and Xu, Hongxia and Chen, Danny Z. and Chen, Jintai and Wu, Jian}, title = {OrderChain: Towards General Instruct-Tuning for Stimulating the Ordinal Understanding Ability of MLLM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3477-3487} }
V2XPnP: Vehicle-to-Everything Spatio-Temporal Fusion for Multi-Agent Perception and Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zewei and Xiang, Hao and Zheng, Zhaoliang and Zhao, Seth Z. and Lei, Mingyue and Zhang, Yun and Cai, Tianhui and Liu, Xinyi and Liu, Johnson and Bajji, Maheswari and Xia, Xin and Huang, Zhiyu and Zhou, Bolei and Ma, Jiaqi}, title = {V2XPnP: Vehicle-to-Everything Spatio-Temporal Fusion for Multi-Agent Perception and Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25399-25409} }
Scaling Action Detection: AdaTAD++ with Transformer-Enhanced Temporal-Spatial Adaptation-
[pdf]
[bibtex]@InProceedings{Agrawal_2025_ICCV, author = {Agrawal, Tanay and Ali, Abid and Dantcheva, Antitza and Bremond, Francois}, title = {Scaling Action Detection: AdaTAD++ with Transformer-Enhanced Temporal-Spatial Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12222-12231} }
Fix-CLIP: Dual-Branch Hierarchical Contrastive Learning via Synthetic Captions for Better Understanding of Long Text-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Bingchao and Ning, Zhiwei and Ding, Jianyu and Gao, Xuanang and Li, Yin and Jiang, Dongsheng and Yang, Jie and Liu, Wei}, title = {Fix-CLIP: Dual-Branch Hierarchical Contrastive Learning via Synthetic Captions for Better Understanding of Long Text}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20694-20704} }
MotionCtrl: A Real-time Controllable Vision-Language-Motion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Bin and Zheng, Sipeng and Wang, Ye and Xia, Lujie and Wei, Qianshan and Jin, Qin and Liu, Jing and Lu, Zongqing}, title = {MotionCtrl: A Real-time Controllable Vision-Language-Motion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12253-12262} }
Benchmarking Multimodal Large Language Models Against Image Corruptions-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2025_ICCV, author = {Qiu, Xinkuan and Kan, Meina and Zhou, Yongbin and Shan, Shiguang}, title = {Benchmarking Multimodal Large Language Models Against Image Corruptions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9014-9023} }
ETVA: Evaluation of Text-to-Video Alignment via Fine-grained Question Generation and Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2025_ICCV, author = {Guan, Kaisi and Lai, Zhengfeng and Sun, Yuchong and Zhang, Peng and Liu, Wei and Liu, Kieran and Cao, Meng and Song, Ruihua}, title = {ETVA: Evaluation of Text-to-Video Alignment via Fine-grained Question Generation and Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21299-21309} }
Adaptive Routing of Text-to-Image Generation Requests Between Large Cloud Model and Light-Weight Edge Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xin_2025_ICCV, author = {Xin, Zewei and Li, Qinya and Niu, Chaoyue and Wu, Fan and Chen, Guihai}, title = {Adaptive Routing of Text-to-Image Generation Requests Between Large Cloud Model and Light-Weight Edge Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19482-19491} }
Outdoor Monocular SLAM with Global Scale-Consistent 3D Gaussian Pointmaps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Chong and Yu, Sicheng and Wang, Zijian and Zhou, Yifan and Wang, Hao}, title = {Outdoor Monocular SLAM with Global Scale-Consistent 3D Gaussian Pointmaps}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26035-26044} }
FlowStyler: Artistic Video Stylization via Transformation Fields Transports-
[pdf]
[supp]
[bibtex]@InProceedings{Gong_2025_ICCV, author = {Gong, Yuning and Chen, Jiaming and Ren, Xiaohua and Liao, Yuanjun and Zhang, Yanci}, title = {FlowStyler: Artistic Video Stylization via Transformation Fields Transports}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10229-10238} }
7DGS: Unified Spatial-Temporal-Angular Gaussian Splatting-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Zhongpai and Planche, Benjamin and Zheng, Meng and Choudhuri, Anwesa and Chen, Terrence and Wu, Ziyan}, title = {7DGS: Unified Spatial-Temporal-Angular Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26316-26325} }
Robust and Efficient 3D Gaussian Splatting for Urban Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_ICCV, author = {Yuan, Zhensheng and Huang, Haozhi and Xiong, Zhen and Wang, Di and Yang, Guanghua}, title = {Robust and Efficient 3D Gaussian Splatting for Urban Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26209-26219} }
DWIM: Towards Tool-aware Visual Reasoning via Discrepancy-aware Workflow Generation & Instruct-Masking Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2025_ICCV, author = {Ke, Fucai and G, Vijay Kumar B and Leng, Xingjian and Cai, Zhixi and Khan, Zaid and Wang, Weiqing and Haghighi, Pari Delir and Rezatofighi, Hamid and Chandraker, Manmohan}, title = {DWIM: Towards Tool-aware Visual Reasoning via Discrepancy-aware Workflow Generation \& Instruct-Masking Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3378-3389} }
Iris: Breaking GUI Complexity with Adaptive Focus and Self-Refining-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ge_2025_ICCV, author = {Ge, Zhiqi and Li, Juncheng and Pang, Xinglei and Gao, Minghe and Pan, Kaihang and Lin, Wang and Fei, Hao and Zhang, Wenqiao and Tang, Siliang and Zhuang, Yueting}, title = {Iris: Breaking GUI Complexity with Adaptive Focus and Self-Refining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24559-24568} }
Rethink Sparse Signals for Pose-guided Text-to-image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xuan_2025_ICCV, author = {Xuan, Wenjie and Zhang, Jing and Liu, Juhua and Du, Bo and Tao, Dacheng}, title = {Rethink Sparse Signals for Pose-guided Text-to-image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15896-15906} }
RoboTrom-Nav: A Unified Framework for Embodied Navigation Integrating Perception, Planning, and Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2025_ICCV, author = {Zhong, Yufeng and Feng, Chengjian and Yan, Feng and Liu, Fanfan and Zheng, Liming and Ma, Lin}, title = {RoboTrom-Nav: A Unified Framework for Embodied Navigation Integrating Perception, Planning, and Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6416-6425} }
PatchScaler: An Efficient Patch-Independent Diffusion Model for Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yong and Dong, Hang and Pan, Jinshan and Dong, Qingji and Chen, Kai and Zhang, Rongxiang and Fu, Lean and Wang, Fei}, title = {PatchScaler: An Efficient Patch-Independent Diffusion Model for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11283-11293} }
RefEdit: A Benchmark and Method for Improving Instruction-based Image Editing Model on Referring Expressions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pathiraja_2025_ICCV, author = {Pathiraja, Bimsara and Patel, Maitreya and Singh, Shivam and Yang, Yezhou and Baral, Chitta}, title = {RefEdit: A Benchmark and Method for Improving Instruction-based Image Editing Model on Referring Expressions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15646-15656} }
ETA: Energy-based Test-time Adaptation for Depth Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chung_2025_ICCV, author = {Chung, Younjoon and Park, Hyoungseob and Rim, Patrick and Zhang, Xiaoran and He, Jihe and Zeng, Ziyao and Cicek, Safa and Hong, Byung-Woo and Duncan, James S. and Wong, Alex}, title = {ETA: Energy-based Test-time Adaptation for Depth Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6001-6012} }
Amodal3R: Amodal 3D Reconstruction from Occluded 2D Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Tianhao and Zheng, Chuanxia and Guan, Frank and Vedaldi, Andrea and Cham, Tat-Jen}, title = {Amodal3R: Amodal 3D Reconstruction from Occluded 2D Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9181-9193} }
A Unified Framework for Motion Reasoning and Generation in Human Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Jeongeun and Choi, Sungjoon and Yun, Sangdoo}, title = {A Unified Framework for Motion Reasoning and Generation in Human Interaction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10698-10707} }
Dynamic Group Detection using VLM-augmented Temporal Groupness Graph-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yokoyama_2025_ICCV, author = {Yokoyama, Kaname and Nakatani, Chihiro and Ukita, Norimichi}, title = {Dynamic Group Detection using VLM-augmented Temporal Groupness Graph}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10475-10484} }
SciVid: Cross-Domain Evaluation of Video Models in Scientific Applications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hasson_2025_ICCV, author = {Hasson, Yana and Luc, Pauline and Momeni, Liliane and Ovsjanikov, Maks and Le Moing, Guillaume and Kuznetsova, Alina and Ktena, Ira and Sun, Jennifer J. and Koppula, Skanda and Gokay, Dilara and Heyward, Joseph and Pot, Etienne and Zisserman, Andrew}, title = {SciVid: Cross-Domain Evaluation of Video Models in Scientific Applications}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21800-21811} }
RoboPearls: Editable Video Simulation for Robot Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2025_ICCV, author = {Tao, Tang and Zhang, Likui and Wen, Youpeng and Zhang, Kaidong and Bian, Jia-Wang and Zhou, Xia and Yan, Tianyi and Zhan, Kun and Jia, Peng and Wu, Hefeng and Lin, Liang and Liang, Xiaodan}, title = {RoboPearls: Editable Video Simulation for Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10118-10129} }
FreeMorph: Tuning-Free Generalized Image Morphing with Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Yukang and Si, Chenyang and Wang, Jinghao and Liu, Ziwei}, title = {FreeMorph: Tuning-Free Generalized Image Morphing with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18111-18120} }
ConstStyle: Robust Domain Generalization with Unified Style Transformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2025_ICCV, author = {Tran, Nam Duong and Phuong, Nam Nguyen and Pham, Hieu H. and Le Nguyen, Phi and Thai, My T.}, title = {ConstStyle: Robust Domain Generalization with Unified Style Transformation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3174-3183} }
Vision-Language Models Can't See the Obvious-
[pdf]
[bibtex]@InProceedings{Huynh_2025_ICCV, author = {Huynh, Ngoc Dung and Le-Khac, Phuc H and Para, Wamiq Reyaz and Singh, Ankit and Narayan, Sanath}, title = {Vision-Language Models Can't See the Obvious}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24159-24169} }
Perspective-Aware Teaching: Adapting Knowledge for Heterogeneous Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Jhe-Hao and Yao, Yi and Hsu, Chan-Feng and Xie, Hong-Xia and Shuai, Hong-Han and Cheng, Wen-Huang}, title = {Perspective-Aware Teaching: Adapting Knowledge for Heterogeneous Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4178-4187} }
GazeGaussian: High-Fidelity Gaze Redirection with 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Xiaobao and Chen, Peng and Li, Guangyu and Lu, Ming and Chen, Hui and Tian, Feng}, title = {GazeGaussian: High-Fidelity Gaze Redirection with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13293-13303} }
Balancing Task-invariant Interaction and Task-specific Adaptation for Unified Image Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Xingyu and Jiang, Junjun and Wang, Chenyang and Jiang, Kui and Liu, Xianming and Ma, Jiayi}, title = {Balancing Task-invariant Interaction and Task-specific Adaptation for Unified Image Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11262-11272} }
Stroke2Sketch: Harnessing Stroke Attributes for Training-Free Sketch Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Rui and Li, Huining and Long, Yiyi and Wu, Xiaojun and He, Shengfeng}, title = {Stroke2Sketch: Harnessing Stroke Attributes for Training-Free Sketch Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16545-16554} }
Mitigating Catastrophic Overfitting in Fast Adversarial Training via Label Information Elimination-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2025_ICCV, author = {Pan, Chao and Tang, Ke and Li, Qing and Yao, Xin}, title = {Mitigating Catastrophic Overfitting in Fast Adversarial Training via Label Information Elimination}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2991-3000} }
Pi-GPS: Enhancing Geometry Problem Solving by Unleashing the Power of Diagrammatic Information-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Junbo and Zhang, Ting and Sun, Jiayu and Tian, Mi and Huang, Hua}, title = {Pi-GPS: Enhancing Geometry Problem Solving by Unleashing the Power of Diagrammatic Information}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1526-1536} }
CountSE: Soft Exemplar Open-set Object Counting-
[pdf]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Shuai and Zhang, Peng and Zhang, Shiwei and Ke, Wei}, title = {CountSE: Soft Exemplar Open-set Object Counting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21536-21546} }
Beyond Isolated Words: Diffusion Brush for Handwritten Text-Line Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2025_ICCV, author = {Dai, Gang and Zhang, Yifan and Qin, Yutao and Guo, Qiangya and Huang, Shuangping and Yan, Shuicheng}, title = {Beyond Isolated Words: Diffusion Brush for Handwritten Text-Line Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19054-19064} }
Wasserstein Style Distribution Analysis and Transform for Stylized Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Xi and Gu, Xiang and Shi, Zhihao and Sun, Jian}, title = {Wasserstein Style Distribution Analysis and Transform for Stylized Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17496-17505} }
PRE-Mamba: A 4D State Space Model for Ultra-High-Frequent Event Camera Deraining-
[pdf]
[supp]
[bibtex]@InProceedings{Ruan_2025_ICCV, author = {Ruan, Ciyu and Guo, Ruishan and Gong, Zihang and Xu, Jingao and Yang, Wenhan and Chen, Xinlei}, title = {PRE-Mamba: A 4D State Space Model for Ultra-High-Frequent Event Camera Deraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9169-9180} }
Future-Aware Interaction Network For Motion Forecasting-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Shijie and Liu, Chunyu and Xu, Xun and Yeo, Si Yong and Yang, Xulei}, title = {Future-Aware Interaction Network For Motion Forecasting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7505-7515} }
SuMa: A Subspace Mapping Approach for Robust and Effective Concept Erasure in Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Kien and Tran, Anh and Pham, Cuong}, title = {SuMa: A Subspace Mapping Approach for Robust and Effective Concept Erasure in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19587-19596} }
Hierarchical-aware Orthogonal Disentanglement Framework for Fine-grained Skeleton-based Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2025_ICCV, author = {Chang, Haochen and Ren, Pengfei and Zhang, Haoyang and Xie, Liang and Chen, Hongbo and Yin, Erwei}, title = {Hierarchical-aware Orthogonal Disentanglement Framework for Fine-grained Skeleton-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11252-11261} }
Stochastic Interpolants for Revealing Stylistic Flows across the History of Art-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Pingchuan and Gui, Ming and Schusterbauer, Johannes and Yang, Xiaopei and Grebenkova, Olga and Hu, Vincent Tao and Ommer, Bj\"orn}, title = {Stochastic Interpolants for Revealing Stylistic Flows across the History of Art}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5867-5878} }
MIORe & VAR-MIORe: Benchmarks to Push the Boundaries of Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Ciubotariu_2025_ICCV, author = {Ciubotariu, George and Zhou, Zhuyun and Wu, Zongwei and Timofte, Radu}, title = {MIORe \& VAR-MIORe: Benchmarks to Push the Boundaries of Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19784-19793} }
Depth AnyEvent: A Cross-Modal Distillation Paradigm for Event-Based Monocular Depth Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Bartolomei_2025_ICCV, author = {Bartolomei, Luca and Mannocci, Enrico and Tosi, Fabio and Poggi, Matteo and Mattoccia, Stefano}, title = {Depth AnyEvent: A Cross-Modal Distillation Paradigm for Event-Based Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19669-19678} }
Thermal Polarimetric Multi-view Stereo-
[pdf]
[bibtex]@InProceedings{Kushida_2025_ICCV, author = {Kushida, Takahiro and Tanaka, Kenichiro}, title = {Thermal Polarimetric Multi-view Stereo}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27390-27399} }
MeshMamba: State Space Models for Articulated 3D Mesh Generation and Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yoshiyasu_2025_ICCV, author = {Yoshiyasu, Yusuke and Sun, Leyuan and Sagawa, Ryusuke}, title = {MeshMamba: State Space Models for Articulated 3D Mesh Generation and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6563-6574} }
MH-LVC: Multi-Hypothesis Temporal Prediction for Learned Conditional Residual Video Coding-
[pdf]
[supp]
[bibtex]@InProceedings{Phung_2025_ICCV, author = {Phung, Huu-Tai and Gao, Zong-Lin and Yao, Yi-Chen and Ho, Kuan-Wei and Chen, Yi-Hsin and Lin, Yu-Hsiang and Gnutti, Alessandro and Peng, Wen-Hsiao}, title = {MH-LVC: Multi-Hypothesis Temporal Prediction for Learned Conditional Residual Video Coding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19649-19658} }
Image Intrinsic Scale Assessment: Bridging the Gap Between Quality and Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hosu_2025_ICCV, author = {Hosu, Vlad and Agnolucci, Lorenzo and Iso, Daisuke and Saupe, Dietmar}, title = {Image Intrinsic Scale Assessment: Bridging the Gap Between Quality and Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12863-12872} }
Temporal Rate Reduction Clustering for Human Motion Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meng_2025_ICCV, author = {Meng, Xianghan and Tong, Zhengyu and Huang, Zhiyuan and Li, Chun-Guang}, title = {Temporal Rate Reduction Clustering for Human Motion Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14644-14654} }
SparseVILA: Decoupling Visual Sparsity for Efficient VLM Inference-
[pdf]
[supp]
[bibtex]@InProceedings{Khaki_2025_ICCV, author = {Khaki, Samir and Guo, Junxian and Tang, Jiaming and Yang, Shang and Chen, Yukang and Plataniotis, Konstantinos N. and Lu, Yao and Han, Song and Liu, Zhijian}, title = {SparseVILA: Decoupling Visual Sparsity for Efficient VLM Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23784-23794} }
Trust but Verify: Programmatic VLM Evaluation in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Prabhu_2025_ICCV, author = {Prabhu, Viraj and Purushwalkam, Senthil and Yan, An and Xiong, Caiming and Xu, Ran}, title = {Trust but Verify: Programmatic VLM Evaluation in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3258-3267} }
Relative Illumination Fields: Learning Medium and Light Independent Underwater Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{She_2025_ICCV, author = {She, Mengkun and Seegr\"aber, Felix and Nakath, David and Sch\"ontag, Patricia and K\"oser, Kevin}, title = {Relative Illumination Fields: Learning Medium and Light Independent Underwater Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29110-29119} }
Embodied Representation Alignment with Mirror Neurons-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Wentao and Zhang, Zhining and Ren, Yuwei and Huang, Yin and Xu, Hao and Wang, Yizhou}, title = {Embodied Representation Alignment with Mirror Neurons}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11948-11957} }
Wave-MambaAD: Wavelet-driven State Space Model for Multi-class Unsupervised Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Qiao and Shao, Mingwen and Chen, Xinyuan and Lv, Xiang and Xu, Kai}, title = {Wave-MambaAD: Wavelet-driven State Space Model for Multi-class Unsupervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20868-20877} }
MonoSOWA: Scalable Monocular 3D Object Detector Without Human Annotations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Skvrna_2025_ICCV, author = {Skvrna, Jan and Neumann, Lukas}, title = {MonoSOWA: Scalable Monocular 3D Object Detector Without Human Annotations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7613-7623} }
Measuring the Impact of Rotation Equivariance on Aerial Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Xiuyu and Wang, Xinhao and Zhu, Xiubin and Yang, Lan and Liu, Jiyuan and Hu, Xingchen}, title = {Measuring the Impact of Rotation Equivariance on Aerial Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7329-7339} }
Inter2Former: Dynamic Hybrid Attention for Efficient High-Precision Interactive Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, You and Chen, Lichao and Ji, Jiayi and Cao, Liujuan and Zhang, Shengchuan and Ji, Rongrong}, title = {Inter2Former: Dynamic Hybrid Attention for Efficient High-Precision Interactive Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19816-19826} }
LEGION: Learning to Ground and Explain for Synthetic Image Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_ICCV, author = {Kang, Hengrui and Wen, Siwei and Wen, Zichen and Ye, Junyan and Li, Weijia and Feng, Peilin and Zhou, Baichuan and Wang, Bin and Lin, Dahua and Zhang, Linfeng and He, Conghui}, title = {LEGION: Learning to Ground and Explain for Synthetic Image Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18937-18947} }
KDA: Knowledge Diffusion Alignment with Enhanced Context for Video Temporal Grounding-
[pdf]
[bibtex]@InProceedings{Ran_2025_ICCV, author = {Ran, Ran and Wei, Jiwei and He, Shiyuan and Ma, Zeyu and Zhang, Chaoning and Xie, Ning and Yang, Yang}, title = {KDA: Knowledge Diffusion Alignment with Enhanced Context for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23311-23320} }
NormalCrafter: Learning Temporally Consistent Normals from Video Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bin_2025_ICCV, author = {Bin, Yanrui and Hu, Wenbo and Wang, Haoyuan and Chen, Xinya and Wang, Bing}, title = {NormalCrafter: Learning Temporally Consistent Normals from Video Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8330-8339} }
TrackAny3D: Transferring Pretrained 3D Models for Category-unified 3D Point Cloud Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Mengmeng and Wang, Haonan and Li, Yulong and Kong, Xiangjie and Du, Jiaxin and Shen, Guojiang and Xia, Feng}, title = {TrackAny3D: Transferring Pretrained 3D Models for Category-unified 3D Point Cloud Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28249-28259} }
Color Matching Using Hypernetwork-Based Kolmogorov-Arnold Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nikonorov_2025_ICCV, author = {Nikonorov, Artem and Perevozchikov, Georgy and Korepanov, Andrei and Mehta, Nancy and Afifi, Mahmoud and Ershov, Egor and Timofte, Radu}, title = {Color Matching Using Hypernetwork-Based Kolmogorov-Arnold Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7099-7109} }
S2M2: Scalable Stereo Matching Model for Reliable Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Min_2025_ICCV, author = {Min, Junhong and Jeon, Youngpil and Kim, Jimin and Choi, Minyong}, title = {S2M2: Scalable Stereo Matching Model for Reliable Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26729-26739} }
NETracer: A Topology-Aware Iterative Tracing Approach for Tubular Structure Extraction-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Chao and Jiang, Yangbo and Zheng, Nenggan}, title = {NETracer: A Topology-Aware Iterative Tracing Approach for Tubular Structure Extraction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20593-20602} }
MGSfM: Multi-Camera Geometry Driven Global Structure-from-Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2025_ICCV, author = {Tao, Peilin and Cui, Hainan and Tu, Diantao and Shen, Shuhan}, title = {MGSfM: Multi-Camera Geometry Driven Global Structure-from-Motion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5232-5241} }
FaceLift: Learning Generalizable Single Image 3D Face Reconstruction from Synthetic Heads-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lyu_2025_ICCV, author = {Lyu, Weijie and Zhou, Yi and Yang, Ming-Hsuan and Shu, Zhixin}, title = {FaceLift: Learning Generalizable Single Image 3D Face Reconstruction from Synthetic Heads}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12691-12701} }
Towards Open-World Generation of Stereo Images and Unsupervised Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiao_2025_ICCV, author = {Qiao, Feng and Xiong, Zhexiao and Xing, Eric and Jacobs, Nathan}, title = {Towards Open-World Generation of Stereo Images and Unsupervised Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26579-26589} }
PolarAnything: Diffusion-based Polarimetric Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Kailong and Lyu, Youwei and Guo, Heng and Li, Si and Ma, Zhanyu and Shi, Boxin}, title = {PolarAnything: Diffusion-based Polarimetric Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26466-26476} }
What's Making That Sound Right Now? Video-centric Audio-Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2025_ICCV, author = {Choi, Hahyeon and Lee, Junhoo and Kwak, Nojun}, title = {What's Making That Sound Right Now? Video-centric Audio-Visual Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20095-20104} }
WonderPlay: Dynamic 3D Scene Generation from a Single Image and Actions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zizhang and Yu, Hong-Xing and Liu, Wei and Yang, Yin and Herrmann, Charles and Wetzstein, Gordon and Wu, Jiajun}, title = {WonderPlay: Dynamic 3D Scene Generation from a Single Image and Actions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9080-9090} }
SRefiner: Soft-Braid Attention for Multi-Agent Trajectory Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_ICCV, author = {Xiao, Liwen and Pan, Zhiyu and Wang, Zhicheng and Cao, Zhiguo and Li, Wei}, title = {SRefiner: Soft-Braid Attention for Multi-Agent Trajectory Refinement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {960-969} }
Scheduling Weight Transitions for Quantization-Aware Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Junghyup and Jeon, Jeimin and Kim, Dohyung and Ham, Bumsub}, title = {Scheduling Weight Transitions for Quantization-Aware Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23466-23475} }
Cross-Granularity Online Optimization with Masked Compensated Information for Learned Image Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Kuang_2025_ICCV, author = {Kuang, Haowei and Yang, Wenhan and Guo, Zongming and Liu, Jiaying}, title = {Cross-Granularity Online Optimization with Masked Compensated Information for Learned Image Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16514-16523} }
Efficient Spiking Point Mamba for Point Cloud Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Peixi and Chai, Bosong and Zheng, Menghua and Li, Wei and Hu, Zhangchi and Chen, Jie and Zhang, Zheyu and Li, Hebei and Sun, Xiaoyan}, title = {Efficient Spiking Point Mamba for Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26393-26403} }
VideoVAE+: Large Motion Video Autoencoding with Cross-modal Video VAE-
[pdf]
[supp]
[bibtex]@InProceedings{Xing_2025_ICCV, author = {Xing, Yazhou and Fei, Yang and He, Yingqing and Chen, Jingye and Xie, Jiaxin and Chi, Xiaowei and Chen, Qifeng}, title = {VideoVAE+: Large Motion Video Autoencoding with Cross-modal Video VAE}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17951-17960} }
Learning 3D Object Spatial Relationships from Pre-trained 2D Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Baik_2025_ICCV, author = {Baik, Sangwon and Kim, Hyeonwoo and Joo, Hanbyul}, title = {Learning 3D Object Spatial Relationships from Pre-trained 2D Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8418-8428} }
Event-guided Unified Framework for Low-light Video Enhancement, Frame Interpolation, and Deblurring-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Taewoo and Yoon, Kuk-Jin}, title = {Event-guided Unified Framework for Low-light Video Enhancement, Frame Interpolation, and Deblurring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8524-8534} }
FE-CLIP: Frequency Enhanced CLIP Model for Zero-Shot Anomaly Detection and Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Gong_2025_ICCV, author = {Gong, Tao and Chu, Qi and Liu, Bin and Zhou, Wei and Yu, Nenghai}, title = {FE-CLIP: Frequency Enhanced CLIP Model for Zero-Shot Anomaly Detection and Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21220-21230} }
MotionDiff: Training-free Zero-shot Interactive Motion Editing via Flow-assisted Multi-view Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Yikun and Li, Yiqing and Wu, Jiawei and Luo, Xing and Jin, Zhi}, title = {MotionDiff: Training-free Zero-shot Interactive Motion Editing via Flow-assisted Multi-view Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14475-14485} }
Alleviating Textual Reliance in Medical Language-guided Segmentation via Prototype-driven Semantic Approximation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_ICCV, author = {Ye, Shuchang and Naseem, Usman and Meng, Mingyuan and Kim, Jinman}, title = {Alleviating Textual Reliance in Medical Language-guided Segmentation via Prototype-driven Semantic Approximation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22316-22326} }
CoTMR: Chain-of-Thought Multi-Scale Reasoning for Training-Free Zero-Shot Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Zelong and Jing, Dong and Lu, Zhiwu}, title = {CoTMR: Chain-of-Thought Multi-Scale Reasoning for Training-Free Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22675-22684} }
PASTA: Part-Aware Sketch-to-3D Shape Generation with Text-Aligned Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Seunggwan and Jung, Hwanhee and Koh, Byoungsoo and Huang, Qixing and Yoon, Sang Ho and Kim, Sangpil}, title = {PASTA: Part-Aware Sketch-to-3D Shape Generation with Text-Aligned Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18585-18595} }
BASIC: Boosting Visual Alignment with Intrinsic Refined Embeddings in Multimodal Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tang_2025_ICCV, author = {Tang, Jianting and Wang, Yubo and Cao, Haoyu and Xu, Linli}, title = {BASIC: Boosting Visual Alignment with Intrinsic Refined Embeddings in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20582-20592} }
Long-term Traffic Simulation with Interleaved Autoregressive Motion and Scenario Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Xiuyu and Tan, Shuhan and Kr\"ahenb\"uhl, Philipp}, title = {Long-term Traffic Simulation with Interleaved Autoregressive Motion and Scenario Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25305-25314} }
VMBench: A Benchmark for Perception-Aligned Video Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ling_2025_ICCV, author = {Ling, Xinran and Zhu, Chen and Wu, Meiqi and Li, Hangyu and Feng, Xiaokun and Yang, Cundian and Hao, Aiming and Zhu, Jiashu and Wu, Jiahong and Chu, Xiangxiang}, title = {VMBench: A Benchmark for Perception-Aligned Video Motion Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13087-13098} }
Physics Context Builders: A Modular Framework for Physical Reasoning in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Balazadeh_2025_ICCV, author = {Balazadeh, Vahid and Ataei, Mohammadmehdi and Cheong, Hyunmin and Khasahmadi, Amir Hosein and Krishnan, Rahul G.}, title = {Physics Context Builders: A Modular Framework for Physical Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7318-7328} }
RhythmGuassian: Repurposing Generalizable Gaussian Model For Remote Physiological Measurement-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Hao and Zhang, Yuting and Tang, Jiaqi and Fu, Bowen and Ge, Wenhang and Wei, Wei and Wu, Kaishun and Chen, Yingcong}, title = {RhythmGuassian: Repurposing Generalizable Gaussian Model For Remote Physiological Measurement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20780-20790} }
Bridging the Sky and Ground: Towards View-Invariant Feature Learning for Aerial-Ground Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Khalid_2025_ICCV, author = {Khalid, Wajahat and Liu, Bin and Li, Xulin and Waqas, Muhammad and Afgan, Muhammad Sher}, title = {Bridging the Sky and Ground: Towards View-Invariant Feature Learning for Aerial-Ground Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9749-9758} }
Prototype Guided Backdoor Defense via Activation Space Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Amula_2025_ICCV, author = {Amula, Venkat Adithya and Samavedam, Sunayana and Saini, Saurabh and Gupta, Avani and Narayanan, P J}, title = {Prototype Guided Backdoor Defense via Activation Space Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2195-2205} }
EvolvingGrasp: Evolutionary Grasp Generation via Efficient Preference Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yufei and Zhong, Yiming and Yang, Zemin and Cong, Peishan and Yu, Jingyi and Zhu, Xinge and Ma, Yuexin}, title = {EvolvingGrasp: Evolutionary Grasp Generation via Efficient Preference Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11665-11674} }
HyTIP: Hybrid Temporal Information Propagation for Masked Conditional Residual Video Coding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yi-Hsin and Yao, Yi-Chen and Ho, Kuan-Wei and Wu, Chun-Hung and Phung, Huu-Tai and Benjak, Martin and Ostermann, J\"orn and Peng, Wen-Hsiao}, title = {HyTIP: Hybrid Temporal Information Propagation for Masked Conditional Residual Video Coding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17889-17898} }
Achieving More with Less: Additive Prompt Tuning for Rehearsal-Free Class-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Haoran and Wang, Ping and Zhou, Zihan and Zhang, Xu and Wu, Zuxuan and Jiang, Yu-Gang}, title = {Achieving More with Less: Additive Prompt Tuning for Rehearsal-Free Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {340-349} }
PromptDresser: Improving the Quality and Controllability of Virtual Try-On via Generative Textual Prompt and Prompt-aware Mask-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jeongho and Jin, Hoiyeong and Park, Sunghyun and Choo, Jaegul}, title = {PromptDresser: Improving the Quality and Controllability of Virtual Try-On via Generative Textual Prompt and Prompt-aware Mask}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16026-16036} }
VLRMBench: A Comprehensive and Challenging Benchmark for Vision-Language Reward Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ruan_2025_ICCV, author = {Ruan, Jiacheng and Yuan, Wenzhen and Gao, Xian and Guo, Ye and Zhang, Daoxin and Xu, Zhe and Hu, Yao and Liu, Ting and Fu, Yuzhuo}, title = {VLRMBench: A Comprehensive and Challenging Benchmark for Vision-Language Reward Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3163-3173} }
SITE: towards Spatial Intelligence Thorough Evaluation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Wenqi and Tan, Reuben and Zhu, Pengyue and Yang, Jianwei and Yang, Zhengyuan and Wang, Lijuan and Kolobov, Andrey and Gao, Jianfeng and Gong, Boqing}, title = {SITE: towards Spatial Intelligence Thorough Evaluation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9058-9069} }
DASH: Detection and Assessment of Systematic Hallucinations of VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Augustin_2025_ICCV, author = {Augustin, Maximilian and Neuhaus, Yannic and Hein, Matthias}, title = {DASH: Detection and Assessment of Systematic Hallucinations of VLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22748-22759} }
FW-Merging: Scaling Model Merging with Frank-Wolfe Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Hao Mark and Hu, Shell Xu and Luk, Wayne and Hospedales, Timothy and Fan, Hongxiang}, title = {FW-Merging: Scaling Model Merging with Frank-Wolfe Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3390-3400} }
On the Provable Importance of Gradients for Autonomous Language-Assisted Image Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2025_ICCV, author = {Peng, Bo and Lu, Jie and Zhang, Guangquan and Fang, Zhen}, title = {On the Provable Importance of Gradients for Autonomous Language-Assisted Image Clustering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19805-19815} }
ZipVL: Accelerating Vision-Language Models through Dynamic Token Sparsity-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Yefei and Chen, Feng and Liu, Jing and Shao, Wenqi and Zhou, Hong and Zhang, Kaipeng and Zhuang, Bohan}, title = {ZipVL: Accelerating Vision-Language Models through Dynamic Token Sparsity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20477-20486} }
RayletDF: Raylet Distance Fields for Generalizable 3D Surface Reconstruction from Point Clouds or Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Shenxing and Li, Jinxi and Yang, Yafei and Zhou, Siyuan and Yang, Bo}, title = {RayletDF: Raylet Distance Fields for Generalizable 3D Surface Reconstruction from Point Clouds or Gaussians}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25616-25626} }
VOVTrack: Exploring the Potentiality in Raw Videos for Open-Vocabulary Multi-Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2025_ICCV, author = {Qian, Zekun and Han, Ruize and Hou, Junhui and Song, Linqi and Feng, Wei}, title = {VOVTrack: Exploring the Potentiality in Raw Videos for Open-Vocabulary Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7472-7482} }
Planar Affine Rectification from Local Change of Scale and Orientation-
[pdf]
[supp]
[bibtex]@InProceedings{Nissan_2025_ICCV, author = {Nissan, Yuval and Pollefeys, Marc and Barath, Daniel}, title = {Planar Affine Rectification from Local Change of Scale and Orientation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27147-27155} }
Grouped Speculative Decoding for Autoregressive Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{So_2025_ICCV, author = {So, Junhyuk and Shin, Juncheol and Kook, Hyunho and Park, Eunhyeok}, title = {Grouped Speculative Decoding for Autoregressive Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15375-15384} }
You Share Beliefs, I Adapt: Progressive Heterogeneous Collaborative Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Si_2025_ICCV, author = {Si, Hao and Javanmardi, Ehsan and Tsukada, Manabu}, title = {You Share Beliefs, I Adapt: Progressive Heterogeneous Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27521-27530} }
Music-Aligned Holistic 3D Dance Generation via Hierarchical Motion Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Xiaojie and Li, Ronghui and Fang, Shukai and Xie, Shuzhao and Guo, Xiaoyang and Zhou, Jiaqing and Peng, Junkun and Wang, Zhi}, title = {Music-Aligned Holistic 3D Dance Generation via Hierarchical Motion Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14420-14430} }
Advancing Visual Large Language Model for Multi-granular Versatile Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2025_ICCV, author = {Xiang, Wentao and Tan, Haoxian and Zhong, Yujie and Wei, Cong and Li, Dengjie and Yang, Yujiu}, title = {Advancing Visual Large Language Model for Multi-granular Versatile Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22153-22164} }
Neural Solver of Dichromatic Reflection Model for Specular Highlight Removal-
[pdf]
[bibtex]@InProceedings{Fu_2025_ICCV, author = {Fu, Gang}, title = {Neural Solver of Dichromatic Reflection Model for Specular Highlight Removal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7241-7250} }
MatchDiffusion: Training-free Generation of Match-Cuts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pardo_2025_ICCV, author = {Pardo, Alejandro and Pizzati, Fabio and Zhang, Tong and Pondaven, Alexander and Torr, Philip and Perez, Juan Camilo and Ghanem, Bernard}, title = {MatchDiffusion: Training-free Generation of Match-Cuts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14973-14982} }
STD-GS: Exploring Frame-Event Interaction for SpatioTemporal-Disentangled Gaussian Splatting to Reconstruct High-Dynamic Scene-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Hanyu and Wang, Haonan and Liu, Haoyue and Duan, Yuxing and Yan, Luxin and Lee, Gim Hee}, title = {STD-GS: Exploring Frame-Event Interaction for SpatioTemporal-Disentangled Gaussian Splatting to Reconstruct High-Dynamic Scene}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24801-24810} }
C2MIL: Synchronizing Semantic and Topological Causalities in Multiple Instance Learning for Robust and Interpretable Survival Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Cen_2025_ICCV, author = {Cen, Min and Zhuang, Zhenfeng and Zhang, Yuzhe and Zeng, Min and Magnier, Baptiste and Yu, Lequan and Zhang, Hong and Wang, Liansheng}, title = {C2MIL: Synchronizing Semantic and Topological Causalities in Multiple Instance Learning for Robust and Interpretable Survival Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24392-24401} }
Weakly-Supervised Learning of Dense Functional Correspondences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stojanov_2025_ICCV, author = {Stojanov, Stefan and Zhao, Linan and Zhang, Yunzhi and Yamins, Daniel L. K. and Wu, Jiajun}, title = {Weakly-Supervised Learning of Dense Functional Correspondences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6981-6993} }
Proxy-Bridged Game Transformer for Interactive Extreme Motion Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_ICCV, author = {Fang, Yanwen and Jia, Wenqi and Cao, Xu and Jiang, Peng-Tao and Li, Guodong and Chen, Jintai}, title = {Proxy-Bridged Game Transformer for Interactive Extreme Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13912-13921} }
ReFlex: Text-Guided Editing of Real Images in Rectified Flow via Mid-Step Feature Extraction and Attention Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jimyeong and Park, Jungwon and Song, Yeji and Kwak, Nojun and Rhee, Wonjong}, title = {ReFlex: Text-Guided Editing of Real Images in Rectified Flow via Mid-Step Feature Extraction and Attention Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15939-15948} }
Co-Painter: Fine-Grained Controllable Image Stylization via Implicit Decoupling and Adaptive Injection-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2025_ICCV, author = {Fu, Bowen and Wei, Wei and Tang, Jiaqi and Nie, Jiangtao and Ye, Yanyu and Xu, Xiaogang and Chen, Ying-Cong and Zhang, Lei}, title = {Co-Painter: Fine-Grained Controllable Image Stylization via Implicit Decoupling and Adaptive Injection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16830-16839} }
Object-level Correlation for Few-Shot Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2025_ICCV, author = {Wen, Chunlin and Zhang, Yu and Fan, Jie and Zhu, Hongyuan and Wei, Xiu-Shen and Wang, Yijun and Kou, Zhiqiang and Sun, Shuzhou}, title = {Object-level Correlation for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23689-23699} }
Hipandas: Hyperspectral Image Joint Denoising and Super-Resolution by Image Fusion with the Panchromatic Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Shuang and Zhao, Zixiang and Bai, Haowen and Yu, Chang and Peng, Jiangjun and Cao, Xiangyong and Meng, Deyu}, title = {Hipandas: Hyperspectral Image Joint Denoising and Super-Resolution by Image Fusion with the Panchromatic Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12002-12011} }
SAMO: A Lightweight Sharpness-Aware Approach for Multi-Task Optimization with Joint Global-Local Perturbation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ban_2025_ICCV, author = {Ban, Hao and Subramani, Gokul Ram and Ji, Kaiyi}, title = {SAMO: A Lightweight Sharpness-Aware Approach for Multi-Task Optimization with Joint Global-Local Perturbation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {785-795} }
RoboTron-Mani: All-in-One Multimodal Large Model for Robotic Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2025_ICCV, author = {Yan, Feng and Liu, Fanfan and Huang, Yiyang and Guan, Zechao and Zheng, Liming and Zhong, Yufeng and Feng, Chengjian and Ma, Lin}, title = {RoboTron-Mani: All-in-One Multimodal Large Model for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13707-13718} }
FastJSMA: Accelerating Jacobian-based Saliency Map Attacks through Gradient Decoupling-
[pdf]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Zhenghao and Xu, Shengjie and Li, Zijing and Chen, Meixi and Yu, Chaojian and Shao, Yuanjie and Gao, Changxin}, title = {FastJSMA: Accelerating Jacobian-based Saliency Map Attacks through Gradient Decoupling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1506-1515} }
TAR3D: Creating High-Quality 3D Assets via Next-Part Prediction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xuying and Liu, Yutong and Li, Yangguang and Zhang, Renrui and Liu, Yufei and Wang, Kai and Ouyang, Wanli and Xiong, Zhiwei and Gao, Peng and Hou, Qibin and Cheng, Ming-Ming}, title = {TAR3D: Creating High-Quality 3D Assets via Next-Part Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5134-5145} }
CVFusion: Cross-View Fusion of 4D Radar and Camera for 3D Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_ICCV, author = {Zhong, Hanzhi and Xiang, Zhiyu and Xu, Ruoyu and Fu, Jingyun and Xu, Peng and Wang, Shaohong and Yang, Zhihao and Pu, Tianyu and Liu, Eryun}, title = {CVFusion: Cross-View Fusion of 4D Radar and Camera for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28188-28197} }
Efficient Input-level Backdoor Defense on Text-to-Image Synthesis via Neuron Activation Variation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhai_2025_ICCV, author = {Zhai, Shengfang and Li, Jiajun and Liu, Yue and Chen, Huanran and Tian, Zhihua and Qu, Wenjie and Shen, Qingni and Jia, Ruoxi and Dong, Yinpeng and Zhang, Jiaheng}, title = {Efficient Input-level Backdoor Defense on Text-to-Image Synthesis via Neuron Activation Variation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15182-15193} }
MoMa-Kitchen: A 100K+ Benchmark for Affordance-Grounded Last-Mile Navigation in Mobile Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Pingrui and Gao, Xianqiang and Wu, Yuhan and Liu, Kehui and Wang, Dong and Wang, Zhigang and Zhao, Bin and Ding, Yan and Li, Xuelong}, title = {MoMa-Kitchen: A 100K+ Benchmark for Affordance-Grounded Last-Mile Navigation in Mobile Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6315-6326} }
ViT-Linearizer: Distilling Quadratic Knowledge into Linear-Time Vision Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Guoyizhe and Chellappa, Rama}, title = {ViT-Linearizer: Distilling Quadratic Knowledge into Linear-Time Vision Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20737-20747} }
RobustSplat: Decoupling Densification and Dynamics for Transient-Free 3DGS-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_ICCV, author = {Fu, Chuanyu and Zhang, Yuqi and Yao, Kunbin and Chen, Guanying and Xiong, Yuan and Huang, Chuan and Cui, Shuguang and Cao, Xiaochun}, title = {RobustSplat: Decoupling Densification and Dynamics for Transient-Free 3DGS}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27126-27136} }
Generalized Deep Multi-view Clustering via Causal Learning with Partially Aligned Cross-view Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Xihong and Wang, Siwei and Jin, Jiaqi and Wang, Fangdi and Liu, Tianrui and Jin, Yueming and Liu, Xinwang and Zhu, En and He, Kunlun}, title = {Generalized Deep Multi-view Clustering via Causal Learning with Partially Aligned Cross-view Correspondence}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1990-1999} }
MeasureXpert: Automatic Anthropometric Measurement Extraction from Two Unregistered, Partial, Posed, and Dressed Body Scans-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Ran and Dai, Xinxin and Hu, Pengpeng and Palade, Vasile and Munteanu, Adrian}, title = {MeasureXpert: Automatic Anthropometric Measurement Extraction from Two Unregistered, Partial, Posed, and Dressed Body Scans}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9605-9615} }
FedMeNF: Privacy-Preserving Federated Meta-Learning for Neural Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yun_2025_ICCV, author = {Yun, Junhyeog and Hong, Minui and Kim, Gunhee}, title = {FedMeNF: Privacy-Preserving Federated Meta-Learning for Neural Fields}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2161-2171} }
MeshLLM: Empowering Large Language Models to Progressively Understand and Generate 3D Mesh-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_ICCV, author = {Fang, Shuangkang and Shen, I-Chao and Wang, Yufeng and Tsai, Yi-Hsuan and Yang, Yi and Zhou, Shuchang and Ding, Wenrui and Igarashi, Takeo and Yang, Ming-Hsuan}, title = {MeshLLM: Empowering Large Language Models to Progressively Understand and Generate 3D Mesh}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14061-14072} }
Split-and-Combine: Enhancing Style Augmentation for Single Domain Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zhen and Yang, Shuai and Dang, Qianlong and Wu, Zhize and Gu, Lichuan}, title = {Split-and-Combine: Enhancing Style Augmentation for Single Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15616-15625} }
FOLDER: Accelerating Multi-Modal Large Language Models with Enhanced Performance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Haicheng and Yu, Zhemeng and Spadaro, Gabriele and Ju, Chen and Qu\'etu, Victor and Xiao, Shuai and Tartaglione, Enzo}, title = {FOLDER: Accelerating Multi-Modal Large Language Models with Enhanced Performance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23614-23625} }
A Hyperdimensional One Place Signature to Represent Them All: Stackable Descriptors For Visual Place Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Malone_2025_ICCV, author = {Malone, Connor and Hussaini, Somayeh and Fischer, Tobias and Milford, Michael}, title = {A Hyperdimensional One Place Signature to Represent Them All: Stackable Descriptors For Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9822-9833} }
Large Multi-modal Models Can Interpret Features in Large Multi-modal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Kaichen and Shen, Yifei and Li, Bo and Liu, Ziwei}, title = {Large Multi-modal Models Can Interpret Features in Large Multi-modal Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3650-3661} }
Progressive Artwork Outpainting via Latent Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2025_ICCV, author = {Song, Dae-Young and Yu, Jung-Jae and Cho, Donghyeon}, title = {Progressive Artwork Outpainting via Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15405-15415} }
Are VLMs Ready for Autonomous Driving? An Empirical Study from the Reliability, Data and Metric Perspectives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Shaoyuan and Kong, Lingdong and Dong, Yuhao and Sima, Chonghao and Zhang, Wenwei and Chen, Qi Alfred and Liu, Ziwei and Pan, Liang}, title = {Are VLMs Ready for Autonomous Driving? An Empirical Study from the Reliability, Data and Metric Perspectives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6585-6597} }
Learn2Synth: Learning Optimal Data Synthesis Using Hypergradients for Brain Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Xiaoling and Zeng, Xiangrui and Puonti, Oula and Iglesias, Juan Eugenio and Fischl, Bruce and Balbastre, Ya\"el}, title = {Learn2Synth: Learning Optimal Data Synthesis Using Hypergradients for Brain Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20368-20378} }
Deep Incomplete Multi-view Clustering with Distribution Dual-Consistency Recovery Guidance-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jin_2025_ICCV, author = {Jin, Jiaqi and Wang, Siwei and Dong, Zhibin and Yang, Xihong and Liu, Xinwang and Zhu, En and He, Kunlun}, title = {Deep Incomplete Multi-view Clustering with Distribution Dual-Consistency Recovery Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1016-1026} }
3D Mesh Editing using Masked LRMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Will and Wang, Dilin and Fan, Yuchen and Bozic, Aljaz and Stuyck, Tuur and Li, Zhengqin and Dong, Zhao and Ranjan, Rakesh and Sarafianos, Nikolaos}, title = {3D Mesh Editing using Masked LRMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7154-7165} }
Acknowledging Focus Ambiguity in Visual Questions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Chongyan and Tseng, Yu-Yun and Li, Zhuoheng and Venkatesh, Anush and Gurari, Danna}, title = {Acknowledging Focus Ambiguity in Visual Questions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1228-1238} }
DRaM-LHM: A Quaternion Framework for Iterative Camera Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Chen and Du, Weizhi and Min, Zhixiang and She, Baochen and Dunn, Enrique and Hanson, Sonya M.}, title = {DRaM-LHM: A Quaternion Framework for Iterative Camera Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6447-6455} }
Bolt3D: Generating 3D Scenes in Seconds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Szymanowicz_2025_ICCV, author = {Szymanowicz, Stanislaw and Zhang, Jason Y. and Srinivasan, Pratul and Gao, Ruiqi and Brussee, Arthur and Holynski, Aleksander and Martin-Brualla, Ricardo and Barron, Jonathan T. and Henzler, Philipp}, title = {Bolt3D: Generating 3D Scenes in Seconds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24846-24857} }
Revisiting Image Fusion for Multi-Illuminant White-Balance Correction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Serrano-Lozano_2025_ICCV, author = {Serrano-Lozano, David and Arora, Aditya and Herranz, Luis and Derpanis, Konstantinos G. and Brown, Michael S. and Vazquez-Corral, Javier}, title = {Revisiting Image Fusion for Multi-Illuminant White-Balance Correction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8275-8284} }
Combinative Matching for Geometric Shape Assembly-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Nahyuk and Min, Juhong and Lee, Junhong and Park, Chunghyun and Cho, Minsu}, title = {Combinative Matching for Geometric Shape Assembly}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9540-9549} }
Know Your Attention Maps: Class-specific Token Masking for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hanna_2025_ICCV, author = {Hanna, Jo\"elle and Borth, Damian}, title = {Know Your Attention Maps: Class-specific Token Masking for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23763-23772} }
DAViD: Data-efficient and Accurate Vision Models from Synthetic Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saleh_2025_ICCV, author = {Saleh, Fatemeh and Aliakbarian, Sadegh and Hewitt, Charlie and Petikam, Lohit and Xiao, Xian and Criminisi, Antonio and Cashman, Thomas J. and Baltrusaitis, Tadas}, title = {DAViD: Data-efficient and Accurate Vision Models from Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5348-5358} }
Reducing Unimodal Bias in Multi-Modal Semantic Segmentation with Multi-Scale Functional Entropy Regularization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Xu and Lyu, Yuanhuiyi and Jiang, Lutao and Paudel, Danda Pani and Van Gool, Luc and Hu, Xuming}, title = {Reducing Unimodal Bias in Multi-Modal Semantic Segmentation with Multi-Scale Functional Entropy Regularization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21166-21176} }
ETA: Efficiency through Thinking Ahead, A Dual Approach to Self-Driving with Large Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hamdan_2025_ICCV, author = {Hamdan, Shadi and Sima, Chonghao and Yang, Zetong and Li, Hongyang and Guney, Fatma}, title = {ETA: Efficiency through Thinking Ahead, A Dual Approach to Self-Driving with Large Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26529-26538} }
SCORE: Scene Context Matters in Open-Vocabulary Remote Sensing Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Shiqi and He, Shuting and Qin, Huaiyuan and Wen, Bihan}, title = {SCORE: Scene Context Matters in Open-Vocabulary Remote Sensing Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12559-12569} }
LEGO-Maker: A Semantic-Driven Algorithm for Text-to-3D Generation-
[pdf]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yifei and Chen, Lei}, title = {LEGO-Maker: A Semantic-Driven Algorithm for Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15127-15136} }
Learning Interpretable Queries for Explainable Image Classification with Information Pursuit-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kolek_2025_ICCV, author = {Kolek, Stefan and Chattopadhyay, Aditya and Chan, Kwan Ho Ryan and Andrade-Loarca, Hector and Kutyniok, Gitta and Vidal, Ren\'e}, title = {Learning Interpretable Queries for Explainable Image Classification with Information Pursuit}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3947-3956} }
LoRA-FAIR: Federated LoRA Fine-Tuning with Aggregation and Initialization Refinement-
[pdf]
[supp]
[bibtex]@InProceedings{Bian_2025_ICCV, author = {Bian, Jieming and Wang, Lei and Zhang, Letian and Xu, Jie}, title = {LoRA-FAIR: Federated LoRA Fine-Tuning with Aggregation and Initialization Refinement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3737-3746} }
DiffDoctor: Diagnosing Image Diffusion Models Before Treating-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yiyang and Chen, Xi and Xu, Xiaogang and Ji, Sihui and Liu, Yu and Shen, Yujun and Zhao, Hengshuang}, title = {DiffDoctor: Diagnosing Image Diffusion Models Before Treating}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18917-18926} }
AIRA: Activation-Informed Low-Rank Adaptation for Large Models-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Lujun and Li, Dezhi and Lin, Cheng and Li, Wei and Xue, Wei and Han, Sirui and Guo, Yike}, title = {AIRA: Activation-Informed Low-Rank Adaptation for Large Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1729-1739} }
Understanding Personal Concept in Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Sunghyun and Lee, Jungsoo and Borse, Shubhankar and Hayat, Munawar and Choi, Sungha and Hwang, Kyuwoong and Porikli, Fatih}, title = {Understanding Personal Concept in Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19957-19966} }
4D Visual Pre-training for Robot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hou_2025_ICCV, author = {Hou, Chengkai and Ze, Yanjie and Fu, Yankai and Gao, Zeyu and Hu, Songbo and Yu, Yue and Zhang, Shanghang and Xu, Huazhe}, title = {4D Visual Pre-training for Robot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8451-8461} }
SemTalk: Holistic Co-speech Motion Generation with Frame-level Semantic Emphasis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiangyue and Li, Jianfang and Zhang, Jiaxu and Dang, Ziqiang and Ren, Jianqiang and Bo, Liefeng and Tu, Zhigang}, title = {SemTalk: Holistic Co-speech Motion Generation with Frame-level Semantic Emphasis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13761-13771} }
Textured 3D Regenerative Morphing with 3D Diffusion Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Songlin and Lan, Yushi and Chen, Honghua and Pan, Xingang}, title = {Textured 3D Regenerative Morphing with 3D Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15159-15170} }
LONG3R: Long Sequence Streaming 3D Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Zhuoguang and Qin, Minghui and Yuan, Tianyuan and Liu, Zhe and Zhao, Hang}, title = {LONG3R: Long Sequence Streaming 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5273-5284} }
DreamActor-M1: Holistic, Expressive and Robust Human Image Animation with Hybrid Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Yuxuan and Rong, Zhengkun and Wang, Lizhen and Zhang, Longhao and Hu, Tianshu}, title = {DreamActor-M1: Holistic, Expressive and Robust Human Image Animation with Hybrid Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11036-11046} }
Blind2Sound: Self-Supervised Image Denoising without Residual Noise-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Jiazheng and Wang, Zejin and Chen, Bohao and Han, Hua}, title = {Blind2Sound: Self-Supervised Image Denoising without Residual Noise}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12937-12946} }
Sparsity Outperforms Low-Rank Projections in Few-Shot Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mrabah_2025_ICCV, author = {Mrabah, Nairouz and Richet, Nicolas and Ben Ayed, Ismail and Granger, Eric}, title = {Sparsity Outperforms Low-Rank Projections in Few-Shot Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3143-3152} }
Hyper-Depth: Hypergraph-based Multi-Scale Representation Fusion for Monocular Depth Estimation-
[pdf]
[bibtex]@InProceedings{Bie_2025_ICCV, author = {Bie, Lin and Li, Siqi and Feng, Yifan and Gao, Yue}, title = {Hyper-Depth: Hypergraph-based Multi-Scale Representation Fusion for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5081-5090} }
PVMamba: Parallelizing Vision Mamba via Dynamic State Aggregation-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Fei and Wang, Zhongdao and Zhang, Weijia and Ma, Chao}, title = {PVMamba: Parallelizing Vision Mamba via Dynamic State Aggregation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10218-10228} }
Fuzzy Contrastive Decoding to Alleviate Object Hallucination in Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jieun and Kim, Jinmyeong and Kim, Yoonji and Cho, Sung-Bae}, title = {Fuzzy Contrastive Decoding to Alleviate Object Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20572-20581} }
SMARTIES: Spectrum-Aware Multi-Sensor Auto-Encoder for Remote Sensing Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sumbul_2025_ICCV, author = {Sumbul, Gencer and Xu, Chang and Dalsasso, Emanuele and Tuia, Devis}, title = {SMARTIES: Spectrum-Aware Multi-Sensor Auto-Encoder for Remote Sensing Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5569-5578} }
SynFER: Towards Boosting Facial Expression Recognition with Synthetic Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Xilin and Luo, Cheng and Xian, Xiaole and Li, Bing and Khan, Muhammad Haris and Ge, Zongyuan and Xie, Weicheng and Song, Siyang and Shen, Linlin and Ghanem, Bernard and Yue, Xiangyu}, title = {SynFER: Towards Boosting Facial Expression Recognition with Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10184-10195} }
Boosting Multimodal Learning via Disentangled Gradient Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Shicai and Luo, Chunbo and Luo, Yang}, title = {Boosting Multimodal Learning via Disentangled Gradient Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22879-22888} }
Generating Physically Stable and Buildable Brick Structures from Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pun_2025_ICCV, author = {Pun, Ava and Deng, Kangle and Liu, Ruixuan and Ramanan, Deva and Liu, Changliu and Zhu, Jun-Yan}, title = {Generating Physically Stable and Buildable Brick Structures from Text}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14798-14809} }
Bootstrapping Grounded Chain-of-Thought in Multimodal LLMs for Data-Efficient Model Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_ICCV, author = {Xia, Jiaer and Tong, Bingkui and Zang, Yuhang and Shao, Rui and Zhou, Kaiyang}, title = {Bootstrapping Grounded Chain-of-Thought in Multimodal LLMs for Data-Efficient Model Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {208-217} }
DisTime: Distribution-based Time Representation for Video Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Yingsen and Huang, Zepeng and Zhong, Yujie and Feng, Chengjian and Hu, Jie and Ma, Lin and Liu, Yang}, title = {DisTime: Distribution-based Time Representation for Video Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21961-21971} }
Engage for All: Making Ordinary Image Descriptions Appealing Again!-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yuyan and Jiang, Yifan and Zhou, Li and Cao, Jinghan and Guan, Yu and Yang, Ming and Guo, Qingpei}, title = {Engage for All: Making Ordinary Image Descriptions Appealing Again!}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19342-19352} }
QuantCache: Adaptive Importance-Guided Quantization with Hierarchical Latent and Layer Caching for Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Junyi and Li, Zhiteng and Hui, Zheng and Zhang, Yulun and Kong, Linghe and Yang, Xiaokang}, title = {QuantCache: Adaptive Importance-Guided Quantization with Hierarchical Latent and Layer Caching for Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15035-15044} }
DH-FaceVid-1K: A Large-Scale High-Quality Dataset for Face Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Di_2025_ICCV, author = {Di, Donglin and Feng, He and Sun, Wenzhang and Ma, Yongjia and Li, Hao and Chen, Wei and Fan, Lei and Su, Tonghua and Yang, Xun}, title = {DH-FaceVid-1K: A Large-Scale High-Quality Dataset for Face Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12124-12134} }
Instruction-Grounded Visual Projectors for Continual Learning of Generative Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2025_ICCV, author = {Jin, Hyundong and Chang, Hyung Jin and Kim, Eunwoo}, title = {Instruction-Grounded Visual Projectors for Continual Learning of Generative Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3466-3476} }
COIN: Confidence Score-Guided Distillation for Annotation-Free Cell Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jo_2025_ICCV, author = {Jo, Sanghyun and Lee, Seo Jin and Lee, Seungwoo and Hong, Seohyung and Seo, Hyungseok and Kim, Kyungsu}, title = {COIN: Confidence Score-Guided Distillation for Annotation-Free Cell Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20324-20335} }
Proactive Scene Decomposition and Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Baicheng and Yan, Zike and Wu, Dong and Zha, Hongbin}, title = {Proactive Scene Decomposition and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9780-9789} }
Estimating 2D Camera Motion with Hybrid Motion Basis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Haipeng and Zhou, Tianhao and Yang, Zhanglei and Wu, Yi and Chen, Yan and Mao, Zijing and Cheng, Shen and Zeng, Bing and Liu, Shuaicheng}, title = {Estimating 2D Camera Motion with Hybrid Motion Basis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7624-7633} }
Adaptive Articulated Object Manipulation On The Fly with Foundation Model Reasoning and Part Grounding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiaojie and Wang, Yuanfei and Wu, Ruihai and Xu, Kunqi and Li, Yu and Xiang, Liuyu and Dong, Hao and He, Zhaofeng}, title = {Adaptive Articulated Object Manipulation On The Fly with Foundation Model Reasoning and Part Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13032-13042} }
Predict-Optimize-Distill: A Self-Improving Cycle for 4D Object Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Mingxuan and Huang, Huang and Kerr, Justin and Kim, Chung Min and Zhang, Anthony and Yi, Brent and Kanazawa, Angjoo}, title = {Predict-Optimize-Distill: A Self-Improving Cycle for 4D Object Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6575-6584} }
LHM: Large Animatable Human Reconstruction Model for Single Image to 3D in Seconds-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2025_ICCV, author = {Qiu, Lingteng and Gu, Xiaodong and Li, Peihao and Zuo, Qi and Shen, Weichao and Zhang, Junfei and Qiu, Kejie and Yuan, Weihao and Chen, Guanying and Dong, Zilong and Bo, Liefeng}, title = {LHM: Large Animatable Human Reconstruction Model for Single Image to 3D in Seconds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14184-14194} }
Fine-grained Spatiotemporal Grounding on Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Shuo and Zhong, Yiwu and Hu, Zi-Yuan and Tao, Yeyao and Wang, Liwei}, title = {Fine-grained Spatiotemporal Grounding on Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9385-9395} }
3D Test-time Adaptation via Graph Spectral Driven Point Shift-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Xin and Yang, Qin and Fang, Yijie and Zhu, Mingrui and Wang, Nannan}, title = {3D Test-time Adaptation via Graph Spectral Driven Point Shift}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26762-26771} }
An Empirical Study of Autoregressive Pre-training from Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rajasegaran_2025_ICCV, author = {Rajasegaran, Jathushan and Radosavovic, Ilija and Ravishankar, Rahul and Gandelsman, Yossi and Feichtenhofer, Christoph and Malik, Jitendra}, title = {An Empirical Study of Autoregressive Pre-training from Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19108-19118} }
Latent Diffusion Models with Masked AutoEncoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Junho and Shin, Jeongwoo and Choi, Hyungwook and Lee, Joonseok}, title = {Latent Diffusion Models with Masked AutoEncoders}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17422-17431} }
PlaceIt3D: Language-Guided Object Placement in Real 3D Scenes-
[pdf]
[arXiv]
[bibtex]@InProceedings{Abdelreheem_2025_ICCV, author = {Abdelreheem, Ahmed and Aleotti, Filippo and Watson, Jamie and Qureshi, Zawar and Eldesokey, Abdelrahman and Wonka, Peter and Brostow, Gabriel and Vicente, Sara and Garcia-Hernando, Guillermo}, title = {PlaceIt3D: Language-Guided Object Placement in Real 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6645-6655} }
TR-PTS: Task-Relevant Parameter and Token Selection for Efficient Tuning-
[pdf]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Siqi and Yang, Haoran and Xin, Yi and Yi, Mingyang and Wu, Guangyang and Zhai, Guangtao and Liu, Xiaohong}, title = {TR-PTS: Task-Relevant Parameter and Token Selection for Efficient Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4360-4369} }
StruMamba3D: Exploring Structural Mamba for Self-supervised Point Cloud Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Chuxin and Zha, Yixin and Yang, Wenfei and Zhang, Tianzhu}, title = {StruMamba3D: Exploring Structural Mamba for Self-supervised Point Cloud Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28546-28555} }
Open-World Skill Discovery from Unsegmented Demonstration Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_ICCV, author = {Deng, Jingwen and Wang, Zihao and Cai, Shaofei and Liu, Anji and Liang, Yitao}, title = {Open-World Skill Discovery from Unsegmented Demonstration Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10708-10718} }
Staining and Locking Computer Vision Models Without Retraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sutton_2025_ICCV, author = {Sutton, Oliver J. and Zhou, Qinghua and Leete, George and Gorban, Alexander N. and Tyukin, Ivan Y.}, title = {Staining and Locking Computer Vision Models Without Retraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2346-2355} }
Consistent Time-of-Flight Depth Denoising via Graph-Informed Geometric Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Weida and He, Changyong and Zeng, Jin and Qiu, Di}, title = {Consistent Time-of-Flight Depth Denoising via Graph-Informed Geometric Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5188-5197} }
Can Generative Geospatial Diffusion Models Excel as Discriminative Geospatial Foundation Models?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2025_ICCV, author = {Jia, Yuru and Marsocci, Valerio and Gong, Ziyang and Yang, Xue and Vergauwen, Maarten and Nascetti, Andrea}, title = {Can Generative Geospatial Diffusion Models Excel as Discriminative Geospatial Foundation Models?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8429-8440} }
End-to-End Entity-Predicate Association Reasoning for Dynamic Scene Graph Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Liwei and Zhang, Yanduo and Lu, Tao and Liu, Fang and Zhang, Huiqin and Ma, Jiayi and Zhou, Huabing}, title = {End-to-End Entity-Predicate Association Reasoning for Dynamic Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17729-17738} }
AJAHR: Amputated Joint Aware 3D Human Mesh Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cho_2025_ICCV, author = {Cho, Hyunjin and Choi, Giyun and Choi, Jongwon}, title = {AJAHR: Amputated Joint Aware 3D Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7925-7935} }
Bridging the Skeleton-Text Modality Gap: Diffusion-Powered Modality Alignment for Zero-shot Skeleton-based Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Do_2025_ICCV, author = {Do, Jeonghyeok and Kim, Munchurl}, title = {Bridging the Skeleton-Text Modality Gap: Diffusion-Powered Modality Alignment for Zero-shot Skeleton-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12757-12768} }
Fish2Mesh Transformer: 3D Human Mesh Recovery from Egocentric Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_ICCV, author = {Shen, Tianma and Puranik, Aditya and Vong, James and Deogirikar, Vrushabh and Fell, Ryan and Dietrich, Julianna and Kyrarini, Maria and Kitts, Christopher and Jeong, David C.}, title = {Fish2Mesh Transformer: 3D Human Mesh Recovery from Egocentric Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6498-6507} }
Easy3D: A Simple Yet Effective Method for 3D Interactive Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Simonelli_2025_ICCV, author = {Simonelli, Andrea and M\"uller, Norman and Kontschieder, Peter}, title = {Easy3D: A Simple Yet Effective Method for 3D Interactive Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24707-24716} }
DynImg: Key Frames with Visual Prompts are Good Representation for Multi-Modal Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bao_2025_ICCV, author = {Bao, Xiaoyi and Xie, Chenwei and Tang, Hao and Weng, Tingyu and Wang, Xiaofeng and Zheng, Yun and Wang, Xingang}, title = {DynImg: Key Frames with Visual Prompts are Good Representation for Multi-Modal Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23678-23688} }
Interpretable Zero-Shot Learning with Locally-Aligned Vision-Language Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Shiming and Duan, Bowen and Khan, Salman and Khan, Fahad Shahbaz}, title = {Interpretable Zero-Shot Learning with Locally-Aligned Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {478-487} }
MixANT: Observation-dependent Memory Propagation for Stochastic Dense Action Anticipation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wasim_2025_ICCV, author = {Wasim, Syed Talal and Suleman, Hamid and Zatsarynna, Olga and Naseer, Muzammal and Gall, Juergen}, title = {MixANT: Observation-dependent Memory Propagation for Stochastic Dense Action Anticipation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14613-14622} }
MaGS: Reconstructing and Simulating Dynamic 3D Objects with Mesh-adsorbed Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Shaojie and Luo, Yawei and Yang, Wei and Yang, Yi}, title = {MaGS: Reconstructing and Simulating Dynamic 3D Objects with Mesh-adsorbed Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8745-8755} }
SceneMI: Motion In-betweening for Modeling Human-Scene Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hwang_2025_ICCV, author = {Hwang, Inwoo and Zhou, Bing and Kim, Young Min and Wang, Jian and Guo, Chuan}, title = {SceneMI: Motion In-betweening for Modeling Human-Scene Interaction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6034-6045} }
SMoLoRA: Exploring and Defying Dual Catastrophic Forgetting in Continual Visual Instruction Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Ziqi and Che, Chang and Wang, Qi and Li, Yangyang and Shi, Zenglin and Wang, Meng}, title = {SMoLoRA: Exploring and Defying Dual Catastrophic Forgetting in Continual Visual Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {177-186} }
Self-Supervised Sparse Sensor Fusion for Long Range Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Palladin_2025_ICCV, author = {Palladin, Edoardo and Brucker, Samuel and Ghilotti, Filippo and Narayanan, Praveen and Bijelic, Mario and Heide, Felix}, title = {Self-Supervised Sparse Sensor Fusion for Long Range Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27498-27509} }
GeoDistill: Geometry-Guided Self-Distillation for Weakly Supervised Cross-View Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tong_2025_ICCV, author = {Tong, Shaowen and Xia, Zimin and Alahi, Alexandre and He, Xuming and Shi, Yujiao}, title = {GeoDistill: Geometry-Guided Self-Distillation for Weakly Supervised Cross-View Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25357-25366} }
Leveraging 2D Priors and SDF Guidance for Urban Scene Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Tourani_2025_ICCV, author = {Tourani, Siddharth and Reddy, Jayaram and Kumbar, Akash and Tourani, Satyajit and Goyal, Nishant and Krishna, Madhava and Reddy, N Dinesh and Khan, Muhammad Haris}, title = {Leveraging 2D Priors and SDF Guidance for Urban Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29051-29063} }
SVTRv2: CTC Beats Encoder-Decoder Models in Scene Text Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Yongkun and Chen, Zhineng and Xie, Hongtao and Jia, Caiyan and Jiang, Yu-Gang}, title = {SVTRv2: CTC Beats Encoder-Decoder Models in Scene Text Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20147-20156} }
Blind Noisy Image Deblurring Using Residual Guidance Strategy-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Heyan and Sun, Jianing and Liu, Jun and Zhao, Xi-Le and Wu, Tingting and Zeng, Tieyong}, title = {Blind Noisy Image Deblurring Using Residual Guidance Strategy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11016-11025} }
DeGauss: Dynamic-Static Decomposition with Gaussian Splatting for Distractor-free 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Rui and Lohmeyer, Quentin and Meboldt, Mirko and Tang, Siyu}, title = {DeGauss: Dynamic-Static Decomposition with Gaussian Splatting for Distractor-free 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6294-6303} }
Time-Aware Auto White Balance in Mobile Photography-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Afifi_2025_ICCV, author = {Afifi, Mahmoud and Zhao, Luxi and Punnappurath, Abhijith and Abdelsalam, Mohamed A. and Zhang, Ran and Brown, Michael S.}, title = {Time-Aware Auto White Balance in Mobile Photography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5038-5047} }
Unlocking the Potential of Diffusion Priors in Blind Face Restoration-
[pdf]
[arXiv]
[bibtex]@InProceedings{Miao_2025_ICCV, author = {Miao, Yunqi and Qu, Zhiyu and Gao, Mingqi and Chen, Changrui and Song, Jifei and Han, Jungong and Deng, Jiankang}, title = {Unlocking the Potential of Diffusion Priors in Blind Face Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13471-13480} }
InstaScene: Towards Complete 3D Instance Decomposition and Reconstruction from Cluttered Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Zesong and Yang, Bangbang and Dong, Wenqi and Cao, Chenxuan and Cui, Liyuan and Ma, Yuewen and Cui, Zhaopeng and Bao, Hujun}, title = {InstaScene: Towards Complete 3D Instance Decomposition and Reconstruction from Cluttered Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7771-7781} }
Improved Noise Schedule for Diffusion Training-
[pdf]
[supp]
[bibtex]@InProceedings{Hang_2025_ICCV, author = {Hang, Tiankai and Gu, Shuyang and Bao, Jianmin and Wei, Fangyun and Chen, Dong and Geng, Xin and Guo, Baining}, title = {Improved Noise Schedule for Diffusion Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4796-4806} }
ProbRes: Probabilistic Jump Diffusion for Open-World Egocentric Activity Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kundu_2025_ICCV, author = {Kundu, Sanjoy and Vellamcheti, Shanmukha and Aakur, Sathyanarayanan N.}, title = {ProbRes: Probabilistic Jump Diffusion for Open-World Egocentric Activity Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14128-14140} }
Information Density Principle for MLLM Benchmarks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Chunyi and Li, Xiaozhe and Zhang, Zicheng and Tian, Yuan and Jia, Ziheng and Liu, Xiaohong and Min, Xiongkuo and Wang, Jia and Duan, Haodong and Chen, Kai and Zhai, Guangtao}, title = {Information Density Principle for MLLM Benchmarks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4167-4177} }
CNS-Bench: Benchmarking Image Classifier Robustness Under Continuous Nuisance Shifts-
[pdf]
[supp]
[bibtex]@InProceedings{Dunkel_2025_ICCV, author = {D\"unkel, Olaf and Jesslen, Artur and Xie, Jiahao and Theobalt, Christian and Rupprecht, Christian and Kortylewski, Adam}, title = {CNS-Bench: Benchmarking Image Classifier Robustness Under Continuous Nuisance Shifts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19978-19988} }
Knowledge-Guided Part Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Gou_2025_ICCV, author = {Gou, Xuejian and Liu, Fang and Jiao, Licheng and Li, Shuo and Li, Lingling and Wang, Hao and Liu, Xu and Chen, Puhua and Ma, Wenping}, title = {Knowledge-Guided Part Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5490-5500} }
CRAM: Large Scale Video Continual Learning with Bootstrapped Compression-
[pdf]
[arXiv]
[bibtex]@InProceedings{Mall_2025_ICCV, author = {Mall, Shivani and Henriques, Joao F.}, title = {CRAM: Large Scale Video Continual Learning with Bootstrapped Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15045-15055} }
DeSPITE: Exploring Contrastive Deep Skeleton-Pointcloud-IMU-Text Embeddings for Advanced Point Cloud Human Activity Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Kreutz_2025_ICCV, author = {Kreutz, Thomas and M\"uhlh\"auser, Max and Guinea, Alejandro Sanchez}, title = {DeSPITE: Exploring Contrastive Deep Skeleton-Pointcloud-IMU-Text Embeddings for Advanced Point Cloud Human Activity Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14633-14643} }
Leveraging Panoptic Scene Graph for Evaluating Fine-Grained Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_ICCV, author = {Deng, Xueqing and Yang, Linjie and Yu, Qihang and Yang, Chenglin and Chen, Liang-Chieh}, title = {Leveraging Panoptic Scene Graph for Evaluating Fine-Grained Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15107-15116} }
Noise2Score3D: Tweedie's Approach for Unsupervised Point Cloud Denoising-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Xiangbin and Wang, Yuanfeng and Xu, Ao and Zhu, Lingyu and Sun, Dongyong and Li, Keren and Li, Yang and Qin, Qi}, title = {Noise2Score3D: Tweedie's Approach for Unsupervised Point Cloud Denoising}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25993-26003} }
I2-World: Intra-Inter Tokenization for Efficient Dynamic 4D Scene Forecasting-
[pdf]
[bibtex]@InProceedings{Liao_2025_ICCV, author = {Liao, Zhimin and Wei, Ping and Zhang, Ruijie and Chen, Shuaijia and Wang, Haoxuan and Ren, Ziyang}, title = {I2-World: Intra-Inter Tokenization for Efficient Dynamic 4D Scene Forecasting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25810-25819} }
Meta-Unlearning on Diffusion Models: Preventing Relearning Unlearned Concepts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Hongcheng and Pang, Tianyu and Du, Chao and Hu, Taihang and Deng, Zhijie and Lin, Min}, title = {Meta-Unlearning on Diffusion Models: Preventing Relearning Unlearned Concepts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2131-2141} }
Auto-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Ulger_2025_ICCV, author = {\"Ulger, Osman and Kulicki, Maksymilian and Asano, Yuki and Oswald, Martin R.}, title = {Auto-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24266-24275} }
SpinMeRound: Consistent Multi-View Identity Generation Using Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Galanakis_2025_ICCV, author = {Galanakis, Stathis and Lattas, Alexandros and Moschoglou, Stylianos and Kainz, Bernhard and Zafeiriou, Stefanos}, title = {SpinMeRound: Consistent Multi-View Identity Generation Using Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14346-14356} }
MSA2: Multi-task Framework with Structure-aware and Style-adaptive Character Representation for Open-set Chinese Text Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yangfu and Zhan, Hongjian and Liu, Qi and Sun, Li and Xiong, Yu-Jie and Lu, Yue}, title = {MSA2: Multi-task Framework with Structure-aware and Style-adaptive Character Representation for Open-set Chinese Text Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23095-23104} }
IGL-Nav: Incremental 3D Gaussian Localization for Image-goal Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Wenxuan and Xu, Xiuwei and Yin, Hang and Wang, Ziwei and Feng, Jianjiang and Zhou, Jie and Lu, Jiwen}, title = {IGL-Nav: Incremental 3D Gaussian Localization for Image-goal Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6808-6817} }
PCR-GS: COLMAP-Free 3D Gaussian Splatting via Pose Co-Regularizations-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Yu and Zhang, Jiahui and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {PCR-GS: COLMAP-Free 3D Gaussian Splatting via Pose Co-Regularizations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26499-26508} }
EA-KD: Entropy-based Adaptive Knowledge Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2025_ICCV, author = {Su, Chi-Ping and Tseng, Ching-Hsun and Pu, Bin and Zhao, Lei and Yang, Jiewen and Chen, Zhuangzhuang and Lee, Shin-Jye}, title = {EA-KD: Entropy-based Adaptive Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {731-740} }
D3: Training-Free AI-Generated Video Detection Using Second-Order Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Chende and Suo, Ruiqi and Lin, Chenhao and Zhao, Zhengyu and Yang, Le and Liu, Shuai and Yang, Minghui and Wang, Cong and Shen, Chao}, title = {D3: Training-Free AI-Generated Video Detection Using Second-Order Features}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12852-12862} }
Background Invariance Testing According to Semantic Proximity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_ICCV, author = {Liao, Zukang and Chen, Min}, title = {Background Invariance Testing According to Semantic Proximity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8056-8065} }
Resolving Token-Space Gradient Conflicts: Token Space Manipulation for Transformer-Based Multi-Task Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_ICCV, author = {Jeong, Wooseong and Yoon, Kuk-Jin}, title = {Resolving Token-Space Gradient Conflicts: Token Space Manipulation for Transformer-Based Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2887-2897} }
ViT-Split: Unleashing the Power of Vision Foundation Models via Efficient Splitting Heads-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yifan and Li, Xin and Li, Tianqin and He, Wenbin and Kong, Yu and Ren, Liu}, title = {ViT-Split: Unleashing the Power of Vision Foundation Models via Efficient Splitting Heads}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1979-1989} }
Fusion Meets Diverse Conditions: A High-diversity Benchmark and Baseline for UAV-based Multimodal Object Detection with Condition Cues-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Chen and Bin, Kangcheng and Hu, Ting and Qi, Jiahao and Liu, Xingyue and Liu, Tianpeng and Liu, Zhen and Liu, Yongxiang and Zhong, Ping}, title = {Fusion Meets Diverse Conditions: A High-diversity Benchmark and Baseline for UAV-based Multimodal Object Detection with Condition Cues}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27958-27967} }
Stealthy Backdoor Attack in Federated Learning via Adaptive Layer-wise Gradient Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Qingqian and Yan, Peishen and Wu, Xiaoyu and Zhang, Jiaru and Song, Tao and Hua, Yang and Wang, Hao and Wang, Liangliang and Guan, Haibing}, title = {Stealthy Backdoor Attack in Federated Learning via Adaptive Layer-wise Gradient Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29163-29172} }
Hybrid Layout Control for Diffusion Transformer: Fewer Annotations, Superior Aesthetics-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Keming and Chen, Junwen and Liang, Zhanhao and Wang, Yinuo and Li, Ji and Zhang, Chao and Wang, Bin and Yuan, Yuhui}, title = {Hybrid Layout Control for Diffusion Transformer: Fewer Annotations, Superior Aesthetics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17930-17940} }
BUFFER-X: Towards Zero-Shot Point Cloud Registration in Diverse Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Seo_2025_ICCV, author = {Seo, Minkyun and Lim, Hyungtae and Lee, Kanghee and Carlone, Luca and Park, Jaesik}, title = {BUFFER-X: Towards Zero-Shot Point Cloud Registration in Diverse Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3851-3862} }
VITAL: More Understandable Feature Visualization through Distribution Alignment and Relevant Information Flow-
[pdf]
[supp]
[bibtex]@InProceedings{Gorgun_2025_ICCV, author = {G\"org\"un, Ada and Schiele, Bernt and Fischer, Jonas}, title = {VITAL: More Understandable Feature Visualization through Distribution Alignment and Relevant Information Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4403-4412} }
DisCoPatch: Taming Adversarially-driven Batch Statistics for Improved Out-of-Distribution Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Caetano_2025_ICCV, author = {Caetano, Francisco and Viviers, Christiaan and Zavala-Mondrag\'on, Luis A. and De With, Peter H.N. and van der Sommen, Fons}, title = {DisCoPatch: Taming Adversarially-driven Batch Statistics for Improved Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2898-2908} }
EC-Flow: Enabling Versatile Robotic Manipulation from Action-Unlabeled Videos via Embodiment-Centric Flow-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yixiang and Li, Peiyan and Huang, Yan and Yang, Jiabing and Chen, Kehan and Wang, Liang}, title = {EC-Flow: Enabling Versatile Robotic Manipulation from Action-Unlabeled Videos via Embodiment-Centric Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11958-11968} }
One Look is Enough: Seamless Patchwise Refinement for Zero-Shot Monocular Depth Estimation on High-Resolution Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2025_ICCV, author = {Kwon, Byeongjun and Kim, Munchurl}, title = {One Look is Enough: Seamless Patchwise Refinement for Zero-Shot Monocular Depth Estimation on High-Resolution Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8077-8087} }
Omegance: A Single Parameter for Various Granularities in Diffusion-Based Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hou_2025_ICCV, author = {Hou, Xinyu and Yue, Zongsheng and Li, Xiaoming and Loy, Chen Change}, title = {Omegance: A Single Parameter for Various Granularities in Diffusion-Based Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19353-19362} }
Frequency-Semantic Enhanced Variational Autoencoder for Zero-Shot Skeleton-based Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Wenhan and Guo, Zhishuai and Chen, Chen and Xue, Hongfei and Lu, Aidong}, title = {Frequency-Semantic Enhanced Variational Autoencoder for Zero-Shot Skeleton-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11122-11131} }
CleanPose: Category-Level Object Pose Estimation via Causal Learning and Knowledge Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Xiao and Peng, Yun and Wang, Liuyi and Zhong, Xianyou and Zhu, Minghao and Feng, Yi and Yang, Jingwei and Liu, Chengju and Chen, Qijun}, title = {CleanPose: Category-Level Object Pose Estimation via Causal Learning and Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5990-6000} }
Lark: Low-Rank Updates After Knowledge Localization for Few-shot Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Jinxin and Zhao, Jiabao and Yang, Yifan and Wu, Xingjiao and Li, Jiawen and He, Liang}, title = {Lark: Low-Rank Updates After Knowledge Localization for Few-shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3607-3617} }
Articulate3D: Holistic Understanding of 3D Scenes as Universal Scene Description-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Halacheva_2025_ICCV, author = {Halacheva, Anna-Maria and Miao, Yang and Zaech, Jan-Nico and Wang, Xi and Van Gool, Luc and Paudel, Danda Pani}, title = {Articulate3D: Holistic Understanding of 3D Scenes as Universal Scene Description}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5633-5644} }
DiffPCI: Large Motion Point Cloud frame Interpolation with Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Tianyu and Jiang, Haobo and Yang, Jian and Xie, Jin}, title = {DiffPCI: Large Motion Point Cloud frame Interpolation with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27348-27358} }
TryOn-Refiner: Conditional Rectified-flow-based TryOn Refiner for More Accurate Detail Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2025_ICCV, author = {Qian, Wen}, title = {TryOn-Refiner: Conditional Rectified-flow-based TryOn Refiner for More Accurate Detail Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15669-15679} }
AM-Adapter: Appearance Matching Adapter for Exemplar-based Semantic Image Synthesis in-the-Wild-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2025_ICCV, author = {Jin, Siyoon and Nam, Jisu and Kim, Jiyoung and Chung, Dahyun and Kim, Yeong-Seok and Park, Joonhyung and Chu, Heonjeong and Kim, Seungryong}, title = {AM-Adapter: Appearance Matching Adapter for Exemplar-based Semantic Image Synthesis in-the-Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17077-17086} }
Auto-Controlled Image Perception in MLLMs via Visual Perception Tokens-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Runpeng and Ma, Xinyin and Wang, Xinchao}, title = {Auto-Controlled Image Perception in MLLMs via Visual Perception Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21822-21831} }
Addressing Representation Collapse in Vector Quantized Models with One Linear Layer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yongxin and Li, Bocheng and Xin, Yifei and Xia, Zhihua and Xu, Linli}, title = {Addressing Representation Collapse in Vector Quantized Models with One Linear Layer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22968-22977} }
MosaicDiff: Training-free Structural Pruning for Diffusion Model Acceleration Reflecting Pretraining Dynamics-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Bowei and Tang, Shengkun and Zeng, Cong and Shen, Zhiqiang}, title = {MosaicDiff: Training-free Structural Pruning for Diffusion Model Acceleration Reflecting Pretraining Dynamics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1655-1664} }
Not Only Vision: Evolve Visual Speech Recognition via Peripheral Information-
[pdf]
[supp]
[bibtex]@InProceedings{Yuan_2025_ICCV, author = {Yuan, Zhaoxin and Yang, Shuang and Shan, Shiguang and Chen, Xilin}, title = {Not Only Vision: Evolve Visual Speech Recognition via Peripheral Information}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3091-3100} }
Borrowing Eyes for the Blind Spot: Overcoming Data Scarcity in Malicious Video Detection via Cross-Domain Retrieval Augmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2025_ICCV, author = {Hong, Rongpei and Lang, Jian and Zhong, Ting and Zhou, Fan}, title = {Borrowing Eyes for the Blind Spot: Overcoming Data Scarcity in Malicious Video Detection via Cross-Domain Retrieval Augmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22728-22737} }
Trans-Adapter: A Plug-and-Play Framework for Transparent Image Inpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2025_ICCV, author = {Dai, Yuekun and Li, Haitian and Zhou, Shangchen and Loy, Chen Change}, title = {Trans-Adapter: A Plug-and-Play Framework for Transparent Image Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15015-15024} }
MetaMorph: Multimodal Understanding and Generation via Instruction Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Tong_2025_ICCV, author = {Tong, Shengbang and Fan, David and Li, Jiachen and Xiong, Yunyang and Chen, Xinlei and Sinha, Koustuv and Rabbat, Michael and LeCun, Yann and Xie, Saining and Liu, Zhuang}, title = {MetaMorph: Multimodal Understanding and Generation via Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17001-17012} }
CHARM3R: Towards Unseen Camera Height Robust Monocular 3D Detector-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kumar_2025_ICCV, author = {Kumar, Abhinav and Guo, Yuliang and Zhang, Zhihao and Huang, Xinyu and Ren, Liu and Liu, Xiaoming}, title = {CHARM3R: Towards Unseen Camera Height Robust Monocular 3D Detector}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8777-8788} }
SuperDec: 3D Scene Decomposition with Superquadrics Primitives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fedele_2025_ICCV, author = {Fedele, Elisabetta and Sun, Boyang and Guibas, Leonidas and Pollefeys, Marc and Engelmann, Francis}, title = {SuperDec: 3D Scene Decomposition with Superquadrics Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24625-24635} }
Sculpting Memory: Multi-Concept Forgetting in Diffusion Models via Dynamic Mask and Concept-Aware Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Gen and Xiao, Yang and Ji, Jie and Deng, Kaiyuan and Hui, Bo and Guo, Linke and Ma, Xiaolong}, title = {Sculpting Memory: Multi-Concept Forgetting in Diffusion Models via Dynamic Mask and Concept-Aware Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19659-19668} }
LaRender: Training-Free Occlusion Control in Image Generation via Latent Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2025_ICCV, author = {Zhan, Xiaohang and Liu, Dingming}, title = {LaRender: Training-Free Occlusion Control in Image Generation via Latent Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19679-19688} }
TimeFormer: Capturing Temporal Relationships of Deformable 3D Gaussians for Robust Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Dadong and Hou, Zhi and Ke, Zhihui and Yang, Xianghui and Zhou, Xiaobo and Qiu, Tie}, title = {TimeFormer: Capturing Temporal Relationships of Deformable 3D Gaussians for Robust Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8721-8732} }
LLM Thought Divergence and Convergence for Dialogue-Based Image Generation Control-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Hui}, title = {LLM Thought Divergence and Convergence for Dialogue-Based Image Generation Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18101-18110} }
LATINO-PRO: LAtent consisTency INverse sOlver with PRompt Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Spagnoletti_2025_ICCV, author = {Spagnoletti, Alessio and Prost, Jean and Almansa, Andr\'es and Papadakis, Nicolas and Pereyra, Marcelo}, title = {LATINO-PRO: LAtent consisTency INverse sOlver with PRompt Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19597-19607} }
Lightweight and Fast Real-time Image Enhancement via Decomposition of the Spatial-aware Lookup Tables-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Wontae and Lee, Keuntek and Cho, Nam Ik}, title = {Lightweight and Fast Real-time Image Enhancement via Decomposition of the Spatial-aware Lookup Tables}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11895-11905} }
Geminio: Language-Guided Gradient Inversion Attacks in Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shan_2025_ICCV, author = {Shan, Junjie and Zhao, Ziqi and Lu, Jialin and Zhang, Rui and Yiu, Siu Ming and Chow, Ka-Ho}, title = {Geminio: Language-Guided Gradient Inversion Attacks in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2718-2727} }
DONUT: A Decoder-Only Model for Trajectory Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Knoche_2025_ICCV, author = {Knoche, Markus and de Geus, Daan and Leibe, Bastian}, title = {DONUT: A Decoder-Only Model for Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28903-28912} }
Training-free Generation of Temporally Consistent Rewards from VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Yinuo and Yuan, Jiale and Xu, Zhiyuan and Hao, Xiaoshuai and Zhang, Xinyi and Wu, Kun and Che, Zhengping and Liu, Chi Harold and Tang, Jian}, title = {Training-free Generation of Temporally Consistent Rewards from VLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8133-8143} }
Compression-Aware One-Step Diffusion Model for JPEG Artifact Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Jinpei and Chen, Zheng and Li, Wenbo and Guo, Yong and Zhang, Yulun}, title = {Compression-Aware One-Step Diffusion Model for JPEG Artifact Removal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14930-14939} }
AnyPortal: Zero-Shot Consistent Video Background Replacement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Wenshuo and Lan, Xicheng and Yang, Shuai}, title = {AnyPortal: Zero-Shot Consistent Video Background Replacement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18990-18999} }
Scene Coordinate Reconstruction Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Bian_2025_ICCV, author = {Bian, Wenjing and Barroso-Laguna, Axel and Cavallari, Tommaso and Prisacariu, Victor Adrian and Brachmann, Eric}, title = {Scene Coordinate Reconstruction Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25765-25776} }
Towards a Universal Image Degradation Model via Content-Degradation Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Wenbo and Wang, Zhongling and Wang, Zhou}, title = {Towards a Universal Image Degradation Model via Content-Degradation Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12966-12975} }
Balanced Image Stylization with Style Matching Score-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Yuxin and Jiang, Liming and Yang, Shuai and Liu, Jia-Wei and Tsang, Ivor W. and Shou, Mike Zheng}, title = {Balanced Image Stylization with Style Matching Score}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17346-17355} }
MultiModal Action Conditioned Video Simulation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yichen and Torralba, Antonio}, title = {MultiModal Action Conditioned Video Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14173-14183} }
Breaking Rectangular Shackles: Cross-View Object Segmentation for Fine-Grained Object Geo-Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Qingwang and Zhu, Yingying}, title = {Breaking Rectangular Shackles: Cross-View Object Segmentation for Fine-Grained Object Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8197-8206} }
Temporal Overlapping Prediction: A Self-supervised Pre-training Method for LiDAR Moving Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miao_2025_ICCV, author = {Miao, Ziliang and Chen, Runjian and Cai, Yixi and He, Buwei and Zhao, Wenquan and Shao, Wenqi and Zhang, Bo and Zhang, Fu}, title = {Temporal Overlapping Prediction: A Self-supervised Pre-training Method for LiDAR Moving Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26653-26663} }
No More Sibling Rivalry: Debiasing Human-Object Interaction Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Bin and Zhang, Yulin and Zhou, Hong-Yu and Yang, Sibei}, title = {No More Sibling Rivalry: Debiasing Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22707-22717} }
Imbalance in Balance: Online Concept Balancing in Generation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Yukai and Ou, Jiarong and Chen, Rui and Yang, Haotian and Wang, Jiahao and Tao, Xin and Wan, Pengfei and Zhang, Di and Gai, Kun}, title = {Imbalance in Balance: Online Concept Balancing in Generation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17432-17442} }
Unleashing High-Quality Image Generation in Diffusion Sampling Using Second-Order Levenberg-Marquardt-Langevin-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Fangyikang and Yin, Hubery and Qian, Lei and Li, Yinan and Zhuang, Shaobin and Zhu, Huminhao and Zhang, Yilin and Tang, Yanlong and Zhang, Chao and Zhao, Hanbin and Qian, Hui and Li, Chen}, title = {Unleashing High-Quality Image Generation in Diffusion Sampling Using Second-Order Levenberg-Marquardt-Langevin}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10453-10464} }
Temperature in Cosine-based Softmax Loss-
[pdf]
[supp]
[bibtex]@InProceedings{Kobayashi_2025_ICCV, author = {Kobayashi, Takumi}, title = {Temperature in Cosine-based Softmax Loss}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22199-22208} }
SD2Actor: Continuous State Decomposition via Diffusion Embeddings for Robotic Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Jiayi}, title = {SD2Actor: Continuous State Decomposition via Diffusion Embeddings for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13751-13760} }
Free-Form Motion Control: Controlling the 6D Poses of Camera and Objects in Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shuai_2025_ICCV, author = {Shuai, Xincheng and Ding, Henghui and Qin, Zhenyuan and Luo, Hao and Ma, Xingjun and Tao, Dacheng}, title = {Free-Form Motion Control: Controlling the 6D Poses of Camera and Objects in Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12449-12458} }
GaussianProperty: Integrating Physical Properties to 3D Gaussians with LMMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Xinli and Ge, Wenhang and Qiu, Dicong and Chen, ZhiFei and Yan, Dongyu and Liu, Zhuoyun and Zhao, Haoyu and Zhao, Hanfeng and Zhang, Shunsi and Liang, Junwei and Chen, Ying-Cong}, title = {GaussianProperty: Integrating Physical Properties to 3D Gaussians with LMMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7231-7240} }
Exploring Probabilistic Modeling Beyond Domain Generalization for Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, I-Hsiang and Chang, Hua-En and Chen, Wei-Ting and Hwang, Jenq-Neng and Kuo, Sy-Yen}, title = {Exploring Probabilistic Modeling Beyond Domain Generalization for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21755-21765} }
Learning Robust Stereo Matching in the Wild with Selective Mixture-of-Experts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yun and Wang, Longguang and Zhang, Chenghao and Zhang, Yongjian and Zhang, Zhanjie and Ma, Ao and Fan, Chenyou and Lam, Tin Lun and Hu, Junjie}, title = {Learning Robust Stereo Matching in the Wild with Selective Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21276-21287} }
Token-Efficient VLM: High-Resolution Image Understanding via Dynamic Region Proposal-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Yitong and Gu, Jinwei and Xue, Tianfan and Cheung, Ka Chun and Molchanov, Pavlo and Yin, Hongxu and Liu, Sifei}, title = {Token-Efficient VLM: High-Resolution Image Understanding via Dynamic Region Proposal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24147-24158} }
Controlling Multimodal LLMs via Reward-guided Decoding-
[pdf]
[supp]
[bibtex]@InProceedings{Manas_2025_ICCV, author = {Ma\~nas, Oscar and D'Oro, Pierluca and Sinha, Koustuv and Romero-Soriano, Adriana and Drozdzal, Michal and Agrawal, Aishwarya}, title = {Controlling Multimodal LLMs via Reward-guided Decoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1391-1401} }
Multi-Modal Few-Shot Temporal Action Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Zijia and Elhamifar, Ehsan}, title = {Multi-Modal Few-Shot Temporal Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14106-14116} }
What If: Understanding Motion Through Sparse Interactions-
[pdf]
[supp]
[bibtex]@InProceedings{Baumann_2025_ICCV, author = {Baumann, Stefan Andreas and Stracke, Nick and Phan, Timy and Ommer, Bj\"orn}, title = {What If: Understanding Motion Through Sparse Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10286-10296} }
Robust 3D Object Detection using Probabilistic Point Clouds from Single-Photon LiDARs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Goyal_2025_ICCV, author = {Goyal, Bhavya and Gutierrez-Barragan, Felipe and Lin, Wei and Velten, Andreas and Li, Yin and Gupta, Mohit}, title = {Robust 3D Object Detection using Probabilistic Point Clouds from Single-Photon LiDARs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28417-28427} }
SAMPLE: Semantic Alignment through Temporal-Adaptive Multimodal Prompt Learning for Event-Based Open-Vocabulary Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Jing and Zhao, Rui and Xiong, Ruiqin and Wang, Xingtao and Fan, Xiaopeng and Huang, Tiejun}, title = {SAMPLE: Semantic Alignment through Temporal-Adaptive Multimodal Prompt Learning for Event-Based Open-Vocabulary Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14409-14419} }
Semantic Watermarking Reinvented: Enhancing Robustness and Generation Quality with Fourier Integrity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Sung Ju and Cho, Nam Ik}, title = {Semantic Watermarking Reinvented: Enhancing Robustness and Generation Quality with Fourier Integrity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18759-18769} }
Auto-Regressively Generating Multi-View Consistent Images-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, JiaKui and Yang, Yuxiao and Liu, Jialun and Wu, Jinbo and Zhao, Chen and Lu, Yanye}, title = {Auto-Regressively Generating Multi-View Consistent Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2556-2566} }
TimeExpert: An Expert-Guided Video LLM for Video Temporal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Zuhao and Yu, Yingchen and Zhao, Yunqing and Lu, Shijian and Bai, Song}, title = {TimeExpert: An Expert-Guided Video LLM for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24286-24296} }
AIM: Amending Inherent Interpretability via Self-Supervised Masking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alshami_2025_ICCV, author = {Alshami, Eyad and Agnihotri, Shashank and Schiele, Bernt and Keuper, Margret}, title = {AIM: Amending Inherent Interpretability via Self-Supervised Masking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {993-1003} }
I Am Big, You Are Little; I Am Right, You Are Wrong-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kelly_2025_ICCV, author = {Kelly, David A. and Chanchal, Akchunya and Blake, Nathan}, title = {I Am Big, You Are Little; I Am Right, You Are Wrong}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {817-826} }
Jigsaw++: Imagining Complete Shape Priors for Object Reassembly-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Jiaxin and Hua, Gang and Huang, Qixing}, title = {Jigsaw++: Imagining Complete Shape Priors for Object Reassembly}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6704-6714} }
GS-Occ3D: Scaling Vision-only Occupancy Reconstruction with Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2025_ICCV, author = {Ye, Baijun and Qin, Minghui and Zhang, Saining and Gong, Moonjun and Zhu, Shaoting and Zhao, Hao and Zhao, Hang}, title = {GS-Occ3D: Scaling Vision-only Occupancy Reconstruction with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25925-25937} }
CoMatch: Dynamic Covisibility-Aware Transformer for Bilateral Subpixel-Level Semi-Dense Image Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zizhuo and Lu, Yifan and Tang, Linfeng and Zhang, Shihua and Ma, Jiayi}, title = {CoMatch: Dynamic Covisibility-Aware Transformer for Bilateral Subpixel-Level Semi-Dense Image Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18521-18530} }
A Unified Interpretation of Training-Time Out-of-Distribution Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Xu and Jiang, Xin and Li, Zechao}, title = {A Unified Interpretation of Training-Time Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2142-2151} }
Memory-Efficient Generative Models via Product Quantization-
[pdf]
[supp]
[bibtex]@InProceedings{Shao_2025_ICCV, author = {Shao, Jie and Zhang, Hanxiao and Yu, Hao and Wu, Jianxin}, title = {Memory-Efficient Generative Models via Product Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16871-16881} }
Unsupervised Visible-Infrared Person Re-identification under Unpaired Settings-
[pdf]
[supp]
[bibtex]@InProceedings{Yao_2025_ICCV, author = {Yao, Haoyu and Yang, Bin and Huang, Wenke and Du, Bo and Ye, Mang}, title = {Unsupervised Visible-Infrared Person Re-identification under Unpaired Settings}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11916-11926} }
Soft Separation and Distillation: Toward Global Uniformity in Federated Unsupervised Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fang_2025_ICCV, author = {Fang, Hung-Chieh and Lin, Hsuan-Tien and King, Irwin and Zhang, Yifei}, title = {Soft Separation and Distillation: Toward Global Uniformity in Federated Unsupervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2971-2980} }
TaxaDiffusion: Progressively Trained Diffusion Model for Fine-Grained Species Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Monsefi_2025_ICCV, author = {Monsefi, Amin Karimi and Khurana, Mridul and Ramnath, Rajiv and Karpatne, Anuj and Chao, Wei-Lun and Zhang, Cheng}, title = {TaxaDiffusion: Progressively Trained Diffusion Model for Fine-Grained Species Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8579-8589} }
Multimodal LLMs as Customized Reward Models for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Shijie and Zhang, Ruiyi and Zhu, Huaisheng and Kveton, Branislav and Zhou, Yufan and Gu, Jiuxiang and Chen, Jian and Chen, Changyou}, title = {Multimodal LLMs as Customized Reward Models for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19638-19648} }
MEMFOF: High-Resolution Training for Memory-Efficient Multi-Frame Optical Flow Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bargatin_2025_ICCV, author = {Bargatin, Vladislav and Chistov, Egor and Yakovenko, Alexander and Vatolin, Dmitriy}, title = {MEMFOF: High-Resolution Training for Memory-Efficient Multi-Frame Optical Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8187-8196} }
Diving into the Fusion of Monocular Priors for Generalized Stereo Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_ICCV, author = {Yao, Chengtang and Yu, Lidong and Liu, Zhidan and Zeng, Jiaxi and Wu, Yuwei and Jia, Yunde}, title = {Diving into the Fusion of Monocular Priors for Generalized Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14887-14897} }
RAGNet: Large-scale Reasoning-based Affordance Segmentation Benchmark towards General Grasping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Dongming and Fu, Yanping and Huang, Saike and Liu, Yingfei and Jia, Fan and Liu, Nian and Dai, Feng and Wang, Tiancai and Anwer, Rao Muhammad and Khan, Fahad Shahbaz and Shen, Jianbing}, title = {RAGNet: Large-scale Reasoning-based Affordance Segmentation Benchmark towards General Grasping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11980-11990} }
R-LiViT: A LiDAR-Visual-Thermal Dataset Enabling Vulnerable Road User Focused Roadside Perception-
[pdf]
[bibtex]@InProceedings{Mirlach_2025_ICCV, author = {Mirlach, Jonas and Wan, Lei and Wiedholz, Andreas and Keen, Hannan Ejaz and Eich, Andreas}, title = {R-LiViT: A LiDAR-Visual-Thermal Dataset Enabling Vulnerable Road User Focused Roadside Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28375-28384} }
Spatially-Varying Autofocus-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2025_ICCV, author = {Qin, Yingsi and Sankaranarayanan, Aswin C. and O'Toole, Matthew}, title = {Spatially-Varying Autofocus}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24645-24654} }
ZeroStereo: Zero-shot Stereo Matching from Single Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Xianqi and Yang, Hao and Xu, Gangwei and Cheng, Junda and Lin, Min and Deng, Yong and Zang, Jinliang and Chen, Yurui and Yang, Xin}, title = {ZeroStereo: Zero-shot Stereo Matching from Single Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28177-28187} }
A3GS: Arbitrary Artistic Style into Arbitrary 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_ICCV, author = {Fang, Zhiyuan and Xie, Rengan and Jin, Xuancheng and Ye, Qi and Chen, Wei and Zheng, Wenting and Wang, Rui and Huo, Yuchi}, title = {A3GS: Arbitrary Artistic Style into Arbitrary 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17751-17760} }
SEREP: Semantic Facial Expression Representation for Robust In-the-Wild Capture and Retargeting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Josi_2025_ICCV, author = {Josi, Arthur and Hafemann, Luiz Gustavo and Dib, Abdallah and Got, Emeline and Cruz, Rafael M. O. and Carbonneau, Marc-Andr\'e}, title = {SEREP: Semantic Facial Expression Representation for Robust In-the-Wild Capture and Retargeting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14538-14548} }
Visual Modality Prompt for Adapting Vision-Language Object Detectors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Medeiros_2025_ICCV, author = {Medeiros, Heitor R. and Belal, Atif and Muralidharan, Srikanth and Granger, Eric and Pedersoli, Marco}, title = {Visual Modality Prompt for Adapting Vision-Language Object Detectors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2172-2182} }
The Silent Assistant: NoiseQuery as Implicit Guidance for Goal-Driven Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Ruoyu and Huang, Huayang and Zhu, Ye and Russakovsky, Olga and Wu, Yu}, title = {The Silent Assistant: NoiseQuery as Implicit Guidance for Goal-Driven Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17618-17628} }
AnyBimanual: Transferring Unimanual Policy for General Bimanual Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Guanxing and Yu, Tengbo and Deng, Haoyuan and Chen, Season Si and Tang, Yansong and Wang, Ziwei}, title = {AnyBimanual: Transferring Unimanual Policy for General Bimanual Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13662-13672} }
Training-Free Industrial Defect Generation with Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Ruyi and Chiu, Yen-Tzu and Chen, Tai-I and Chew, Oscar and Chuang, Yung-Yu and Cheng, Wen-Huang}, title = {Training-Free Industrial Defect Generation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24214-24223} }
Hierarchical Event Memory for Accurate and Low-latency Online Video Temporal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Minghang and Peng, Yuxin and Sun, Benyuan and Yang, Yi and Liu, Yang}, title = {Hierarchical Event Memory for Accurate and Low-latency Online Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21589-21599} }
Spatial Preference Rewarding for MLLMs Spatial Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2025_ICCV, author = {Qiu, Han and Gao, Peng and Lu, Lewei and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {Spatial Preference Rewarding for MLLMs Spatial Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {720-730} }
Correspondence as Video: Test-Time Adaption on SAM2 for Reference Segmentation in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Haoran and Li, Zekun and Zhang, Jian and Qi, Lei and Shi, Yinghuan}, title = {Correspondence as Video: Test-Time Adaption on SAM2 for Reference Segmentation in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8177-8186} }
Interaction-Merged Motion Planning: Effectively Leveraging Diverse Motion Datasets for Robust Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Giwon and Jeong, Wooseong and Park, Daehee and Jeong, Jaewoo and Yoon, Kuk-Jin}, title = {Interaction-Merged Motion Planning: Effectively Leveraging Diverse Motion Datasets for Robust Planning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28610-28621} }
FreeFlux: Understanding and Exploiting Layer-Specific Roles in RoPE-Based MMDiT for Versatile Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Tianyi and Zhou, Yifan and Chen, Dongdong and Pan, Xingang}, title = {FreeFlux: Understanding and Exploiting Layer-Specific Roles in RoPE-Based MMDiT for Versatile Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16745-16754} }
WalkVLM: Aid Visually Impaired People Walking by Vision Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_ICCV, author = {Yuan, Zhiqiang and Zhang, Ting and Zhu, Yeshuang and Zhang, Jiapei and Deng, Ying and Jia, Zexi and Luo, Peixiang and Duan, Xiaoyue and Zhou, Jie and Zhang, Jinchao}, title = {WalkVLM: Aid Visually Impaired People Walking by Vision Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9845-9854} }
WildSeg3D: Segment Any 3D Objects in the Wild from 2D Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Yansong and Hu, Jie and Qu, Yansong and Cao, Liujuan}, title = {WildSeg3D: Segment Any 3D Objects in the Wild from 2D Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5166-5176} }
CAPTURE: Evaluating Spatial Reasoning in Vision Language Models via Occluded Object Counting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pothiraj_2025_ICCV, author = {Pothiraj, Atin and Stengel-Eskin, Elias and Cho, Jaemin and Bansal, Mohit}, title = {CAPTURE: Evaluating Spatial Reasoning in Vision Language Models via Occluded Object Counting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8001-8010} }
DeRIS: Decoupling Perception and Cognition for Enhanced Referring Image Segmentation through Loopback Synergy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2025_ICCV, author = {Dai, Ming and Cheng, Wenxuan and Liu, Jiang-jiang and Yang, Sen and Cai, Wenxiao and Sun, Yanpeng and Yang, Wankou}, title = {DeRIS: Decoupling Perception and Cognition for Enhanced Referring Image Segmentation through Loopback Synergy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19936-19946} }
PersPose: 3D Human Pose Estimation with Perspective Encoding and Perspective Rotation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hao_2025_ICCV, author = {Hao, Xiaoyang and Li, Han}, title = {PersPose: 3D Human Pose Estimation with Perspective Encoding and Perspective Rotation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8110-8119} }
Exploring Multimodal Diffusion Transformers for Enhanced Prompt-based Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shin_2025_ICCV, author = {Shin, Joonghyuk and Hwang, Alchan and Kim, Yujin and Kim, Daneul and Park, Jaesik}, title = {Exploring Multimodal Diffusion Transformers for Enhanced Prompt-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19492-19502} }
IM-LUT: Interpolation Mixing Look-Up Tables for Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Sejin and Lee, Sangmin and Jin, Kyong Hwan and Jung, Seung-Won}, title = {IM-LUT: Interpolation Mixing Look-Up Tables for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14317-14325} }
TeEFusion: Blending Text Embeddings to Distill Classifier-Free Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_ICCV, author = {Fu, Minghao and Wang, Guo-Hua and Chen, Xiaohao and Chen, Qing-Guo and Xu, Zhao and Luo, Weihua and Zhang, Kaifu}, title = {TeEFusion: Blending Text Embeddings to Distill Classifier-Free Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16652-16661} }
Scaling Omni-modal Pretraining with Multimodal Context: Advancing Universal Representation Learning Across Modalities-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yiyuan and Li, Handong and Liu, Jing and Yue, Xiangyu}, title = {Scaling Omni-modal Pretraining with Multimodal Context: Advancing Universal Representation Learning Across Modalities}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1336-1348} }
ZIM: Zero-Shot Image Matting for Anything-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Beomyoung and Shin, Chanyong and Jeong, Joonhyun and Jung, Hyungsik and Lee, Se-Yun and Chun, Sewhan and Hwang, Dong-Hyun and Yu, Joonsang}, title = {ZIM: Zero-Shot Image Matting for Anything}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23828-23838} }
LayerD: Decomposing Raster Graphic Designs into Layers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Suzuki_2025_ICCV, author = {Suzuki, Tomoyuki and Liu, Kang-Jun and Inoue, Naoto and Yamaguchi, Kota}, title = {LayerD: Decomposing Raster Graphic Designs into Layers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17783-17792} }
ALOcc: Adaptive Lifting-Based 3D Semantic Occupancy and Cost Volume-Based Flow Predictions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Dubing and Fang, Jin and Han, Wencheng and Cheng, Xinjing and Yin, Junbo and Xu, Chengzhong and Khan, Fahad Shahbaz and Shen, Jianbing}, title = {ALOcc: Adaptive Lifting-Based 3D Semantic Occupancy and Cost Volume-Based Flow Predictions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4156-4166} }
Heuristic-Induced Multimodal Risk Distribution Jailbreak Attack for Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Teng and Jia, Xiaojun and Duan, Ranjie and Li, Xinfeng and Huang, Yihao and Jia, Xiaoshuang and Chu, Zhixuan and Ren, Wenqi}, title = {Heuristic-Induced Multimodal Risk Distribution Jailbreak Attack for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2686-2696} }
Skip-Vision: Efficient and Scalable Acceleration of Vision-Language Models via Adaptive Token Skipping-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Weili and Huang, Ziyuan and Ji, Kaixiang and Yan, Yichao}, title = {Skip-Vision: Efficient and Scalable Acceleration of Vision-Language Models via Adaptive Token Skipping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21384-21397} }
Enrich and Detect: Video Temporal Grounding with Multimodal LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Pramanick_2025_ICCV, author = {Pramanick, Shraman and Mavroudi, Effrosyni and Song, Yale and Chellappa, Rama and Torresani, Lorenzo and Afouras, Triantafyllos}, title = {Enrich and Detect: Video Temporal Grounding with Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24297-24308} }
Class Token as Proxy: Optimal Transport-assisted Proxy Learning for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Jian and Dai, Tianhong and Zhang, Bingfeng and Yu, Siyue and Lim, Eng Gee and Xiao, Jimin}, title = {Class Token as Proxy: Optimal Transport-assisted Proxy Learning for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21645-21654} }
DexVLG: Dexterous Vision-Language-Grasp Model at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Jiawei and Li, Danshi and Yu, Xinqiang and Qi, Zekun and Zhang, Wenyao and Chen, Jiayi and Zhang, Zhaoxiang and Zhang, Zhizheng and Yi, Li and Wang, He}, title = {DexVLG: Dexterous Vision-Language-Grasp Model at Scale}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14248-14258} }
FIND: Few-Shot Anomaly Inspection with Normal-Only Multi-Modal Data-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yiting and Liu, Fayao and Liao, Jingyi and Tian, Sichao and Foo, Chuan-Sheng and Yang, Xulei}, title = {FIND: Few-Shot Anomaly Inspection with Normal-Only Multi-Modal Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23290-23299} }
MaskSAM: Auto-prompt SAM with Mask Classification for Volumetric Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Bin and Tang, Hao and Duan, Bin and Cai, Dawen and Yan, Yan and Agam, Gady}, title = {MaskSAM: Auto-prompt SAM with Mask Classification for Volumetric Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24423-24433} }
Boundary Probing for Input Privacy Protection When Using LMM Services-
[pdf]
[supp]
[bibtex]@InProceedings{Hui_2025_ICCV, author = {Hui, Xiaofei and Qu, Haoxuan and Hu, Ping and Rahmani, Hossein and Liu, Jun}, title = {Boundary Probing for Input Privacy Protection When Using LMM Services}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {467-477} }
Knowledge Distillation with Refined Logits-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Wujie and Chen, Defang and Lyu, Siwei and Chen, Genlang and Chen, Chun and Wang, Can}, title = {Knowledge Distillation with Refined Logits}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1110-1119} }
Tracking Tiny Drones against Clutter: Large-Scale Infrared Benchmark with Motion-Centric Adaptive Algorithm-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jiahao and Jiang, Zongli and Zhang, Jinli and Wei, Yixin and Li, Liang and Wang, Yizheng and Wang, Gang}, title = {Tracking Tiny Drones against Clutter: Large-Scale Infrared Benchmark with Motion-Centric Adaptive Algorithm}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7361-7371} }
AID: Adapting Image2Video Diffusion Models for Instruction-guided Video Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2025_ICCV, author = {Xing, Zhen and Dai, Qi and Weng, Zejia and Wu, Zuxuan and Jiang, Yu-Gang}, title = {AID: Adapting Image2Video Diffusion Models for Instruction-guided Video Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21243-21253} }
Enhancing Few-Shot Vision-Language Classification with Large Multimodal Model Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mitra_2025_ICCV, author = {Mitra, Chancharik and Huang, Brandon and Chai, Tianning and Lin, Zhiqiu and Arbelle, Assaf and Feris, Rogerio and Karlinsky, Leonid and Darrell, Trevor and Ramanan, Deva and Herzig, Roei}, title = {Enhancing Few-Shot Vision-Language Classification with Large Multimodal Model Features}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2760-2772} }
DeepShield: Fortifying Deepfake Video Detection with Local and Global Forgery Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2025_ICCV, author = {Cai, Yinqi and Li, Jichang and Li, Zhaolun and Chen, Weikai and Lan, Rushi and Xie, Xi and Luo, Xiaonan and Li, Guanbin}, title = {DeepShield: Fortifying Deepfake Video Detection with Local and Global Forgery Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12524-12534} }
Synthesizing Near-Boundary OOD Samples for Out-of-Distribution Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Jinglun and Jiang, Kaixun and Chen, Zhaoyu and Lin, Bo and Tang, Yao and Ge, Weifeng and Zhang, Wenqiang}, title = {Synthesizing Near-Boundary OOD Samples for Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4496-4506} }
Splat-based 3D Scene Reconstruction with Extreme Motion-blur-
[pdf]
[supp]
[bibtex]@InProceedings{Jang_2025_ICCV, author = {Jang, Hyeonjoong and Choi, Dongyoung and Kim, Donggun and Kang, Woohyun and Kim, Min H.}, title = {Splat-based 3D Scene Reconstruction with Extreme Motion-blur}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26425-26434} }
Towards Robustness of Person Search against Corruptions-
[pdf]
[supp]
[bibtex]@InProceedings{Son_2025_ICCV, author = {Son, Woojung and Cho, Yoonki and An, Guoyuan and Lee, Chanmi and Yoon, Sung-Eui}, title = {Towards Robustness of Person Search against Corruptions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23408-23418} }
INSTINCT: Instance-Level Interaction Architecture for Query-Based Collaborative Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Yunjiang and Li, Lingzhi and Wang, Jin and Ouyang, Yupeng and Yang, Benyuan}, title = {INSTINCT: Instance-Level Interaction Architecture for Query-Based Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25464-25473} }
DriveX: Omni Scene Modeling for Learning Generalizable World Knowledge in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Chen and Shi, Shaoshuai and Sheng, Kehua and Zhang, Bo and Jiang, Li}, title = {DriveX: Omni Scene Modeling for Learning Generalizable World Knowledge in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28599-28609} }
MagicCity: Geometry-Aware 3D City Generation from Satellite Imagery with Multi-View Consistency-
[pdf]
[supp]
[bibtex]@InProceedings{Yao_2025_ICCV, author = {Yao, Xingbo and Wang, Xuanmin and Wu, Hao and Ping, Chengliang and Zhang, Doudou and Xiong, Hui}, title = {MagicCity: Geometry-Aware 3D City Generation from Satellite Imagery with Multi-View Consistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25325-25334} }
EDFFDNet: Towards Accurate and Efficient Unsupervised Multi-Grid Image Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Haokai and Qu, Bo and Cao, Si-Yuan and Zhang, Runmin and Chen, Shujie and Yang, Bailin and Shen, Hui-Liang}, title = {EDFFDNet: Towards Accurate and Efficient Unsupervised Multi-Grid Image Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5102-5111} }
RadGPT: Constructing 3D Image-Text Tumor Datasets-
[pdf]
[supp]
[bibtex]@InProceedings{Bassi_2025_ICCV, author = {Bassi, Pedro R.A.S. and Yavuz, Mehmet Can and Hamamci, Ibrahim Ethem and Er, Sezgin and Chen, Xiaoxi and Li, Wenxuan and Menze, Bjoern and Decherchi, Sergio and Cavalli, Andrea and Wang, Kang and Yang, Yang and Yuille, Alan and Zhou, Zongwei}, title = {RadGPT: Constructing 3D Image-Text Tumor Datasets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23720-23730} }
Adaptive Prompt Learning via Gaussian Outlier Synthesis for Out-of-distribution Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yongkang and She, Dongyu and Zhou, Zhong}, title = {Adaptive Prompt Learning via Gaussian Outlier Synthesis for Out-of-distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3235-3244} }
LangBridge: Interpreting Image as a Combination of Language Embeddings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_ICCV, author = {Liao, Jiaqi and Niu, Yuwei and Meng, Fanqing and Li, Hao and Tian, Changyao and Du, Yinuo and Xiong, Yuwen and Li, Dianqi and Zhu, Xizhou and Yuan, Li and Dai, Jifeng and Cheng, Yu}, title = {LangBridge: Interpreting Image as a Combination of Language Embeddings}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23752-23762} }
Anchor Token Matching: Implicit Structure Locking for Training-free AR Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Taihang and Li, Linxuan and Wang, Kai and Wang, Yaxing and Yang, Jian and Cheng, Ming-Ming}, title = {Anchor Token Matching: Implicit Structure Locking for Training-free AR Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18166-18176} }
ArgoTweak: Towards Self-Updating HD Maps through Structured Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wild_2025_ICCV, author = {Wild, Lena and Valencia, Rafael and Jensfelt, Patric}, title = {ArgoTweak: Towards Self-Updating HD Maps through Structured Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6091-6100} }
FedMVP: Federated Multimodal Visual Prompt Tuning for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singha_2025_ICCV, author = {Singha, Mainak and Roy, Subhankar and Mehrotra, Sarthak and Jha, Ankit and Abdar, Moloud and Banerjee, Biplab and Ricci, Elisa}, title = {FedMVP: Federated Multimodal Visual Prompt Tuning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17869-17878} }
Robust Dataset Condensation using Supervised Contrastive Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Nicole Hee-Yeon and Song, Hwanjun}, title = {Robust Dataset Condensation using Supervised Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2857-2866} }
TIP-I2V: A Million-Scale Real Text and Image Prompt Dataset for Image-to-Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Wenhao and Yang, Yi}, title = {TIP-I2V: A Million-Scale Real Text and Image Prompt Dataset for Image-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14898-14908} }
From Reusing to Forecasting: Accelerating Diffusion Models with TaylorSeers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Jiacheng and Zou, Chang and Lyu, Yuanhuiyi and Chen, Junjie and Zhang, Linfeng}, title = {From Reusing to Forecasting: Accelerating Diffusion Models with TaylorSeers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15853-15863} }
VisNumBench: Evaluating Number Sense of Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Weng_2025_ICCV, author = {Weng, Tengjin and Wang, Jingyi and Jiang, Wenhao and Ming, Zhong}, title = {VisNumBench: Evaluating Number Sense of Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3830-3840} }
Invisible Watermarks, Visible Gains: Steering Machine Unlearning with Bi-Level Watermarking Design-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Yuhao and Zhang, Yihua and Liu, Gaowen and Xie, Hongtao and Liu, Sijia}, title = {Invisible Watermarks, Visible Gains: Steering Machine Unlearning with Bi-Level Watermarking Design}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2417-2428} }
FICGen: Frequency-Inspired Contextual Disentanglement for Layout-driven Degraded Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Wenzhuang and Zhao, Yifan and Ma, Mingcan and Liu, Ming and Jiang, Zhonglin and Chen, Yong and Li, Jia}, title = {FICGen: Frequency-Inspired Contextual Disentanglement for Layout-driven Degraded Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19097-19107} }
Quantifying and Narrowing the Unknown: Interactive Text-to-Video Retrieval via Uncertainty Minimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Bingqing and Cao, Zhuo and Du, Heming and Li, Yang and Li, Xue and Liu, Jiajun and Wang, Sen}, title = {Quantifying and Narrowing the Unknown: Interactive Text-to-Video Retrieval via Uncertainty Minimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22120-22130} }
ShortV: Efficient Multimodal Large Language Models by Freezing Visual Tokens in Ineffective Layers-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_ICCV, author = {Yuan, Qianhao and Zhang, Qingyu and Liu, Yanjiang and Chen, Jiawei and Lu, Yaojie and Lin, Hongyu and Zheng, Jia and Han, Xianpei and Sun, Le}, title = {ShortV: Efficient Multimodal Large Language Models by Freezing Visual Tokens in Ineffective Layers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {329-339} }
SpikeDiff: Zero-shot High-Quality Video Reconstruction from Chromatic Spike Camera and Sub-millisecond Spike Streams-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Siqi and Liang, Jinxiu and Huang, Zhaojun and Xiaokaiti, Yeliduosi and Chang, Yakun and Yu, Zhaofei and Shi, Boxin}, title = {SpikeDiff: Zero-shot High-Quality Video Reconstruction from Chromatic Spike Camera and Sub-millisecond Spike Streams}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7905-7914} }
Selective Contrastive Learning for Weakly Supervised Affordance Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moon_2025_ICCV, author = {Moon, WonJun and Seong, Hyun Seok and Heo, Jae-Pil}, title = {Selective Contrastive Learning for Weakly Supervised Affordance Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5210-5220} }
GECKO: Gigapixel Vision-Concept Contrastive Pretraining in Histopathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kapse_2025_ICCV, author = {Kapse, Saarthak and Pati, Pushpak and Yellapragada, Srikar and Das, Srijan and Gupta, Rajarsi R. and Saltz, Joel and Samaras, Dimitris and Prasanna, Prateek}, title = {GECKO: Gigapixel Vision-Concept Contrastive Pretraining in Histopathology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20020-20030} }
Single-Scanline Relative Pose Estimation for Rolling Shutter Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hruby_2025_ICCV, author = {Hruby, Petr and Pollefeys, Marc}, title = {Single-Scanline Relative Pose Estimation for Rolling Shutter Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7143-7153} }
Compression of 3D Gaussian Splatting with Optimized Feature Planes and Standard Video Codecs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Soonbin and Shu, Fangwen and Sanchez, Yago and Schierl, Thomas and Hellge, Cornelius}, title = {Compression of 3D Gaussian Splatting with Optimized Feature Planes and Standard Video Codecs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25496-25505} }
CULTURE3D: A Large-Scale and Diverse Dataset of Cultural Landmarks and Terrains for Gaussian-Based Scene Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Xinyi and Zhang, Steve and Lin, Weizhe and Zhang, Aaron and Mayol-Cuevas, Walterio W. and Liu, Yunze and Shen, Junxiao}, title = {CULTURE3D: A Large-Scale and Diverse Dataset of Cultural Landmarks and Terrains for Gaussian-Based Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29064-29074} }
Real3D: Towards Scaling Large Reconstruction Models with Real Images-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Hanwen and Huang, Qixing and Pavlakos, Georgios}, title = {Real3D: Towards Scaling Large Reconstruction Models with Real Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5821-5833} }
FA: Forced Prompt Learning of Vision-Language Models for Out-of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Xinhua and Lai, Runhe and Wu, Yanqi and Chen, Kanghao and Zheng, Wei-Shi and Wang, Ruixuan}, title = {FA: Forced Prompt Learning of Vision-Language Models for Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1152-1161} }
Geo4D: Leveraging Video Generators for Geometric 4D Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Zeren and Zheng, Chuanxia and Laina, Iro and Larlus, Diane and Vedaldi, Andrea}, title = {Geo4D: Leveraging Video Generators for Geometric 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20658-20671} }
REPARO: Compositional 3D Assets Generation with Differentiable 3D Layout Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Haonan and Yang, Rui and Liao, Huan and Xing, Jiankai and Xu, Zunnan and Yu, Xiaoming and Zha, Junwei and Li, Xiu and Li, Wanhua}, title = {REPARO: Compositional 3D Assets Generation with Differentiable 3D Layout Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25367-25377} }
VACE: All-in-One Video Creation and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Zeyinzi and Han, Zhen and Mao, Chaojie and Zhang, Jingfeng and Pan, Yulin and Liu, Yu}, title = {VACE: All-in-One Video Creation and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17191-17202} }
SparseMM: Head Sparsity Emerges from Visual Concept Responses in MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Jiahui and Liu, Zuyan and Rao, Yongming and Lu, Jiwen}, title = {SparseMM: Head Sparsity Emerges from Visual Concept Responses in MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23177-23187} }
Focal Plane Visual Feature Generation and Matching on a Pixel Processor Array-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hongyi and Bose, Laurie and Chen, Jianing and Dudek, Piotr and Mayol-Cuevas, Walterio}, title = {Focal Plane Visual Feature Generation and Matching on a Pixel Processor Array}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29031-29039} }
Q-Frame: Query-aware Frame Selection and Multi-Resolution Adaptation for Video-LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shaojie and Yang, Jiahui and Yin, Jianqin and Luo, Zhenbo and Luan, Jian}, title = {Q-Frame: Query-aware Frame Selection and Multi-Resolution Adaptation for Video-LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22056-22065} }
Prompt-driven Transferable Adversarial Attack on Person Re-Identification with Attribute-aware Textual Inversion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bian_2025_ICCV, author = {Bian, Yuan and Liu, Min and Yi, Yunqi and Wang, Xueping and Jiang, Shuai and Wang, Yaonan}, title = {Prompt-driven Transferable Adversarial Attack on Person Re-Identification with Attribute-aware Textual Inversion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22599-22609} }
OV-SCAN: Semantically Consistent Alignment for Novel Object Discovery in Open-Vocabulary 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Chow_2025_ICCV, author = {Chow, Adrian and Riddell, Evelien and Wang, Yimu and Sedwards, Sean and Czarnecki, Krzysztof}, title = {OV-SCAN: Semantically Consistent Alignment for Novel Object Discovery in Open-Vocabulary 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7990-8000} }
ContextFace: Generating Facial Expressions from Emotional Contexts-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Min-jung and Kim, Minsang and Baek, Seung Jun}, title = {ContextFace: Generating Facial Expressions from Emotional Contexts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11383-11392} }
MDP-Omni: Parameter-free Multimodal Depth Prior-based Sampling for Omnidirectional Stereo Matching-
[pdf]
[bibtex]@InProceedings{Son_2025_ICCV, author = {Son, Eunjin and Jo, HyungGi and Kwon, Wookyong and Lee, Sang Jun}, title = {MDP-Omni: Parameter-free Multimodal Depth Prior-based Sampling for Omnidirectional Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26178-26187} }
The Inter-Intra Modal Measure: A Predictive Lens on Fine-Tuning Outcomes in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Niss_2025_ICCV, author = {Niss, Laura and Vogt-Lowell, Kevin and Tsiligkaridis, Theodoros}, title = {The Inter-Intra Modal Measure: A Predictive Lens on Fine-Tuning Outcomes in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2396-2406} }
Error Recognition in Procedural Videos using Generalized Task Graph-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Shih-Po and Elhamifar, Ehsan}, title = {Error Recognition in Procedural Videos using Generalized Task Graph}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10009-10021} }
Reangle-A-Video: 4D Video Generation as Video-to-Video Translation-
[pdf]
[supp]
[bibtex]@InProceedings{Jeong_2025_ICCV, author = {Jeong, Hyeonho and Lee, Suhyeon and Ye, Jong Chul}, title = {Reangle-A-Video: 4D Video Generation as Video-to-Video Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11164-11175} }
DualReal: Adaptive Joint Training for Lossless Identity-Motion Fusion in Video Customization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Wenchuan and Huang, Mengqi and Tu, Yijing and Mao, Zhendong}, title = {DualReal: Adaptive Joint Training for Lossless Identity-Motion Fusion in Video Customization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16565-16575} }
RoboTron-Sim: Improving Real-World Driving via Simulated Hard-Case-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2025_ICCV, author = {Xiao, Baihui and Feng, Chengjian and Huang, Zhijian and Yan, Feng and Zhong, Yujie and Ma, Lin}, title = {RoboTron-Sim: Improving Real-World Driving via Simulated Hard-Case}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27380-27389} }
Towards Scalable Spatial Intelligence via 2D-to-3D Data Lifting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miao_2025_ICCV, author = {Miao, Xingyu and Duan, Haoran and Qian, Quanhao and Wang, Jiuniu and Long, Yang and Shao, Ling and Zhao, Deli and Xu, Ran and Zhang, Gongjie}, title = {Towards Scalable Spatial Intelligence via 2D-to-3D Data Lifting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {945-959} }
LDIP: Long Distance Information Propagation for Video Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Bernasconi_2025_ICCV, author = {Bernasconi, Michael and Djelouah, Abdelaziz and Zhang, Yang and Gross, Markus and Schroers, Christopher}, title = {LDIP: Long Distance Information Propagation for Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11558-11567} }
Cross-Architecture Distillation Made Simple with Redundancy Suppression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Weijia and Liu, Yuehao and Ran, Wu and Ma, Chao}, title = {Cross-Architecture Distillation Made Simple with Redundancy Suppression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23256-23266} }
M2EIT: Multi-Domain Mixture of Experts for Robust Neural Inertial Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yan and Xu, Yang and Chen, Changhao and Shi, Zhongchen and Chen, Wei and Xie, Liang and Chen, Hongbo and Yin, Erwei}, title = {M2EIT: Multi-Domain Mixture of Experts for Robust Neural Inertial Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28207-28216} }
ShortFT: Diffusion Model Alignment via Shortcut-based Fine-Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Xiefan and Cui, Miaomiao and Bo, Liefeng and Huang, Di}, title = {ShortFT: Diffusion Model Alignment via Shortcut-based Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {678-687} }
Who Controls the Authorization? Invertible Networks for Copyright Protection in Text-to-Image Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Baoyue and Wei, Yang and Xiao, Junhao and Huang, Wendong and Bi, Xiuli and Xiao, Bin}, title = {Who Controls the Authorization? Invertible Networks for Copyright Protection in Text-to-Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15832-15841} }
Identity-aware Language Gaussian Splatting for Open-vocabulary 3D Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Jang_2025_ICCV, author = {Jang, SungMin and Kim, Wonjun}, title = {Identity-aware Language Gaussian Splatting for Open-vocabulary 3D Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20467-20476} }
IDEATOR: Jailbreaking and Benchmarking Large Vision-Language Models Using Themselves-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Ruofan and Li, Juncheng and Wang, Yixu and Wang, Bo and Wang, Xiaosen and Teng, Yan and Wang, Yingchun and Ma, Xingjun and Jiang, Yu-Gang}, title = {IDEATOR: Jailbreaking and Benchmarking Large Vision-Language Models Using Themselves}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8875-8884} }
Degradation-Modeled Multipath Diffusion for Tunable Metalens Photography-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jianing and Zhu, Jiayi and Ji, Feiyu and Yang, Xiaokang and Yuan, Xiaoyun}, title = {Degradation-Modeled Multipath Diffusion for Tunable Metalens Photography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25914-25924} }
Driving View Synthesis on Free-form Trajectories with Generative Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Zeyu and Pan, Zijie and Yang, Yuankun and Zhu, Xiatian and Zhang, Li}, title = {Driving View Synthesis on Free-form Trajectories with Generative Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28083-28092} }
AnimeGamer: Infinite Anime Life Simulation with Next Game State Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Junhao and Ge, Yuying and Ge, Yixiao and Liao, Jing and Shan, Ying}, title = {AnimeGamer: Infinite Anime Life Simulation with Next Game State Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10875-10885} }
OmniPaint: Mastering Object-Oriented Editing via Disentangled Insertion-Removal Inpainting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Yongsheng and Zeng, Ziyun and Zheng, Haitian and Luo, Jiebo}, title = {OmniPaint: Mastering Object-Oriented Editing via Disentangled Insertion-Removal Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17324-17334} }
UIPro: Unleashing Superior Interaction Capability For GUI Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Hongxin and Su, Jingran and Chen, Jingfan and Ju, Zheng and Chen, Yuntao and Li, Qing and Zhang, Zhaoxiang}, title = {UIPro: Unleashing Superior Interaction Capability For GUI Agents}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1613-1623} }
Class-Wise Federated Averaging for Efficient Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Gyuejeong and Choi, Daeyoung}, title = {Class-Wise Federated Averaging for Efficient Personalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1773-1782} }
StochasticSplats: Stochastic Rasterization for Sorting-Free 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kheradmand_2025_ICCV, author = {Kheradmand, Shakiba and Vicini, Delio and Kopanas, George and Lagun, Dmitry and Yi, Kwang Moo and Matthews, Mark and Tagliasacchi, Andrea}, title = {StochasticSplats: Stochastic Rasterization for Sorting-Free 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26326-26335} }
A0: An Affordance-Aware Hierarchical Model for General Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Rongtao and Zhang, Jian and Guo, Minghao and Wen, Youpeng and Yang, Haoting and Lin, Min and Huang, Jianzheng and Li, Zhe and Zhang, Kaidong and Wang, Liqiong and Kuang, Yuxuan and Cao, Meng and Zheng, Feng and Liang, Xiaodan}, title = {A0: An Affordance-Aware Hierarchical Model for General Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13491-13501} }
FinMMR: Make Financial Numerical Reasoning More Multimodal, Comprehensive, and Challenging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_ICCV, author = {Tang, Zichen and E, Haihong and Liu, Jiacheng and Yang, Zhongjun and Li, Rongjin and Rong, Zihua and He, Haoyang and Hao, Zhuodi and Hu, Xinyang and Ji, Kun and Ma, Ziyan and Ji, Mengyuan and Zhang, Jun and Ma, Chenghao and Zheng, Qianhe and Liu, Yang and Huang, Yiling and Hu, Xinyi and Huang, Qing and Xie, Zijian and Peng, Shiyao}, title = {FinMMR: Make Financial Numerical Reasoning More Multimodal, Comprehensive, and Challenging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3245-3257} }
A Tiny Change, A Giant Leap: Long-Tailed Class-Incremental Learning via Geometric Prototype Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Lai_2025_ICCV, author = {Lai, Xinyi and Lin, Luojun and Chen, Weijie and Yu, Yuanlong}, title = {A Tiny Change, A Giant Leap: Long-Tailed Class-Incremental Learning via Geometric Prototype Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1444-1453} }
X-Prompt: Generalizable Auto-Regressive Visual Learning with In-Context Prompting-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Zeyi and Chu, Ziyang and Zhang, Pan and Wu, Tong and Zang, Yuhang and Dong, Xiaoyi and Xiong, Yuanjun and Lin, Dahua and Wang, Jiaqi}, title = {X-Prompt: Generalizable Auto-Regressive Visual Learning with In-Context Prompting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17268-17280} }
HORT: Monocular Hand-held Objects Reconstruction with Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Zerui and Potamias, Rolandos Alexandros and Chen, Shizhe and Schmid, Cordelia}, title = {HORT: Monocular Hand-held Objects Reconstruction with Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6046-6057} }
Early Timestep Zero-Shot Candidate Selection for Instruction-Guided Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Joowon and Lee, Ziseok and Cho, Donghyeon and Jo, Sanghyun and Jung, Yeonsung and Kim, Kyungsu and Yang, Eunho}, title = {Early Timestep Zero-Shot Candidate Selection for Instruction-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18844-18854} }
Identity Preserving 3D Head Stylization with Multiview Score Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Bilecen_2025_ICCV, author = {Bilecen, Bahri Batuhan and G\"okmen, Ahmet Berke and Guzelant, Furkan and Dundar, Aysegul}, title = {Identity Preserving 3D Head Stylization with Multiview Score Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12169-12179} }
Aligning Information Capacity Between Vision and Language via Dense-to-Sparse Feature Distillation for Image-Text Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yang and Feng, Wentao and Liu, Zhuoyao and Huang, Shudong and Lv, Jiancheng}, title = {Aligning Information Capacity Between Vision and Language via Dense-to-Sparse Feature Distillation for Image-Text Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21679-21688} }
A Lesson in Splats: Teacher-Guided Diffusion for 3D Gaussian Splats Generation with 2D Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_ICCV, author = {Peng, Chensheng and Sobol, Ido and Tomizuka, Masayoshi and Keutzer, Kurt and Xu, Chenfeng and Litany, Or}, title = {A Lesson in Splats: Teacher-Guided Diffusion for 3D Gaussian Splats Generation with 2D Supervision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28707-28717} }
Boosting Vision Semantic Density with Anatomy Normality Modeling for Medical Vision-language Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Weiwei and Zhang, Jianpeng and Shui, Zhongyi and Wang, Sinuo and Chen, Zeli and Li, Xi and Lu, Le and Ye, Xianghua and Zhang, Qi and Liang, Tingbo and Zhang, Ling}, title = {Boosting Vision Semantic Density with Anatomy Normality Modeling for Medical Vision-language Pre-training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23041-23050} }
GAP: Gaussianize Any Point Clouds with Text Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Weiqi and Zhou, Junsheng and Geng, Haotian and Zhang, Wenyuan and Liu, Yu-Shen}, title = {GAP: Gaussianize Any Point Clouds with Text Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25627-25638} }
Flow4Agent: Long-form Video Understanding via Motion Prior from Optical Flow-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Ruyang and Sun, Shangkun and Tang, Haoran and Gao, Wei and Li, Ge}, title = {Flow4Agent: Long-form Video Understanding via Motion Prior from Optical Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23817-23827} }
Moto: Latent Motion Token as the Bridging Language for Learning Robot Manipulation from Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yi and Ge, Yuying and Tang, Weiliang and Li, Yizhuo and Ge, Yixiao and Ding, Mingyu and Shan, Ying and Liu, Xihui}, title = {Moto: Latent Motion Token as the Bridging Language for Learning Robot Manipulation from Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19752-19763} }
Causal-Entity Reflected Egocentric Traffic Accident Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Lei-Lei and Fang, Jianwu and Xiao, Junbin and Pang, Shanmin and Yu, Hongkai and Lv, Chen and Xue, Jianru and Chua, Tat-Seng}, title = {Causal-Entity Reflected Egocentric Traffic Accident Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11208-11218} }
BezierGS: Dynamic Urban Scene Reconstruction with Bezier Curve Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Zipei and Jiang, Junzhe and Chen, Yurui and Zhang, Li}, title = {BezierGS: Dynamic Urban Scene Reconstruction with Bezier Curve Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25519-25528} }
RealCam-I2V: Real-World Image-to-Video Generation with Interactive Complex Camera Control-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Teng and Zheng, Guangcong and Jiang, Rui and Zhan, Shuigen and Wu, Tao and Lu, Yehao and Lin, Yining and Deng, Chuanyun and Xiong, Yepan and Chen, Min and Cheng, Lin and Li, Xi}, title = {RealCam-I2V: Real-World Image-to-Video Generation with Interactive Complex Camera Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28785-28796} }
Breaking the Encoder Barrier for Seamless Video-Language Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Handong and Zhang, Yiyuan and Guo, Longteng and Yue, Xiangyu and Liu, Jing}, title = {Breaking the Encoder Barrier for Seamless Video-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23167-23176} }
Simulating Dual-Pixel Images From Ray Tracing For Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Fengchen and Zhao, Dayang and Xu, Hao and Quan, Tingwei and Zeng, Shaoqun}, title = {Simulating Dual-Pixel Images From Ray Tracing For Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26106-26115} }
How Would It Sound? Material-Controlled Multimodal Acoustic Profile Generation for Indoor Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saad_2025_ICCV, author = {Saad, Mahnoor Fatima and Al-Halah, Ziad}, title = {How Would It Sound? Material-Controlled Multimodal Acoustic Profile Generation for Indoor Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12232-12241} }
SplatTalk: 3D VQA with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thai_2025_ICCV, author = {Thai, Anh and Peng, Songyou and Genova, Kyle and Guibas, Leonidas and Funkhouser, Thomas}, title = {SplatTalk: 3D VQA with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4712-4721} }
LocalDyGS: Multi-view Global Dynamic Scene Modeling via Adaptive Local Implicit Feature Decoupling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Jiahao and Peng, Rui and Jiao, Jianbo and Yang, Jiayu and Tang, Luyang and Xiong, Kaiqiang and Liang, Jie and Yan, Jinbo and Liu, Runling and Wang, Ronggang}, title = {LocalDyGS: Multi-view Global Dynamic Scene Modeling via Adaptive Local Implicit Feature Decoupling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9519-9529} }
Teeth Reconstruction and Performance Capture Using a Phone Camera-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Weixi and Ling, Jingwang and Wang, Zhibo and Wang, Quan and Xu, Feng}, title = {Teeth Reconstruction and Performance Capture Using a Phone Camera}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9998-10008} }
Knowledge Distillation for Learned Image Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yunuo and Lyu, Zezheng and He, Bing and Cao, Ning and Chen, Gang and Lu, Guo and Zhang, Wenjun}, title = {Knowledge Distillation for Learned Image Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4996-5006} }
Training-Free Class Purification for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Qi and Yang, Lingxiao and Chen, Yun and Zhao, Nailong and Lai, Jianhuang and Shao, Jie and Xie, Xiaohua}, title = {Training-Free Class Purification for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23124-23134} }
Unsupervised Identification of Protein Compositions and Conformations via Implicit Content-Transformation Disentanglement-
[pdf]
[supp]
[bibtex]@InProceedings{Uddin_2025_ICCV, author = {Uddin, Mostofa Rafid and Armouti, Jana and Xu, Min}, title = {Unsupervised Identification of Protein Compositions and Conformations via Implicit Content-Transformation Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7483-7493} }
VIGFace: Virtual Identity Generation for Privacy-Free Face Recognition Dataset-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Minsoo and Sagong, Min-Cheol and Nam, Gi Pyo and Cho, Junghyun and Kim, Ig-Jae}, title = {VIGFace: Virtual Identity Generation for Privacy-Free Face Recognition Dataset}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10043-10053} }
Generalizable Object Re-Identification via Visual In-Context Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Zhizhong and Liu, Xiaoming}, title = {Generalizable Object Re-Identification via Visual In-Context Prompting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22539-22550} }
GenHaze: Pioneering Controllable One-Step Realistic Haze Generation for Real-World Dehazing-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Sixiang and Ye, Tian and Lin, Yunlong and Jin, Yeying and Yang, Yijun and Chen, Haoyu and Lai, Jianyu and Fei, Song and Xing, Zhaohu and Tsung, Fugee and Zhu, Lei}, title = {GenHaze: Pioneering Controllable One-Step Realistic Haze Generation for Real-World Dehazing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9194-9205} }
LLaVA-CoT: Let Vision Language Models Reason Step-by-Step-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Guowei and Jin, Peng and Wu, Ziang and Li, Hao and Song, Yibing and Sun, Lichao and Yuan, Li}, title = {LLaVA-CoT: Let Vision Language Models Reason Step-by-Step}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2087-2098} }
O-MaMa: Learning Object Mask Matching between Egocentric and Exocentric Views-
[pdf]
[supp]
[bibtex]@InProceedings{Mur-Labadia_2025_ICCV, author = {Mur-Labadia, Lorenzo and Santos-Villafranca, Maria and Bermudez-Cameo, Jesus and Perez-Yus, Alejandro and Martinez-Cantin, Ruben and Guerrero, Jose J.}, title = {O-MaMa: Learning Object Mask Matching between Egocentric and Exocentric Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6892-6903} }
Rethinking the Embodied Gap in Vision-and-Language Navigation: A Holistic Study of Physical and Visual Disparities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Liuyi and Xia, Xinyuan and Zhao, Hui and Wang, Hanqing and Wang, Tai and Chen, Yilun and Liu, Chengju and Chen, Qijun and Pang, Jiangmiao}, title = {Rethinking the Embodied Gap in Vision-and-Language Navigation: A Holistic Study of Physical and Visual Disparities}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9455-9465} }
Pseudo-SD: Pseudo Controlled Stable Diffusion for Semi-Supervised and Cross-Domain Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Dong and Zang, Qi and Wang, Shuang and Sebe, Nicu and Zhong, Zhun}, title = {Pseudo-SD: Pseudo Controlled Stable Diffusion for Semi-Supervised and Cross-Domain Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22393-22403} }
MaTVLM: Hybrid Mamba-Transformer for Efficient Vision-Language Modeling-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yingyue and Liao, Bencheng and Liu, Wenyu and Wang, Xinggang}, title = {MaTVLM: Hybrid Mamba-Transformer for Efficient Vision-Language Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20878-20888} }
ETCH: Generalizing Body Fitting to Clothed Humans via Equivariant Tightness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Boqian and Feng, Haiwen and Cai, Zeyu and Black, Michael J. and Xiu, Yuliang}, title = {ETCH: Generalizing Body Fitting to Clothed Humans via Equivariant Tightness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8264-8274} }
SVIP: Semantically Contextualized Visual Patches for Zero-Shot Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Zhi and Zhao, Zecheng and Guo, Jingcai and Li, Jingjing and Huang, Zi}, title = {SVIP: Semantically Contextualized Visual Patches for Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3346-3356} }
MamTiff-CAD: Multi-Scale Latent Diffusion with Mamba+ for Complex Parametric Sequence-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_ICCV, author = {Deng, Liyuan and Bai, Yunpeng and Dai, Yongkang and Huang, Xiaoshui and Gan, Hongping and Huang, Dongshuo and Jiacheng, Hao and Shi, Yilei}, title = {MamTiff-CAD: Multi-Scale Latent Diffusion with Mamba+ for Complex Parametric Sequence}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10517-10526} }
Forgetting Through Transforming: Enabling Federated Unlearning via Class-Aware Representation Transformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Qi and Tian, Zhen and Yao, Minghao and Qi, Saiyu and Qi, Yong and Liu, Bingyi}, title = {Forgetting Through Transforming: Enabling Federated Unlearning via Class-Aware Representation Transformation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1474-1483} }
ReasonVQA: A Multi-hop Reasoning Benchmark with Structural Knowledge for Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2025_ICCV, author = {Tran, Duong T. and Tran, Trung-Kien and Hauswirth, Manfred and Le Phuoc, Danh}, title = {ReasonVQA: A Multi-hop Reasoning Benchmark with Structural Knowledge for Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18793-18803} }
HiNeuS: High-fidelity Neural Surface Mitigating Low-texture and Reflective Ambiguity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yida and Zhang, Xueyang and Zhan, Kun and Jia, Peng and Lang, Xianpeng}, title = {HiNeuS: High-fidelity Neural Surface Mitigating Low-texture and Reflective Ambiguity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25746-25755} }
Rethinking DPO-style Diffusion Aligning Frameworks-
[pdf]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Xun and Huang, Shaohan and Jiang, Lingjie and Wei, Furu}, title = {Rethinking DPO-style Diffusion Aligning Frameworks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18068-18077} }
VSC: Visual Search Compositional Text-to-Image Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Dat_2025_ICCV, author = {Dat, Do Huu and Hyeon-Woo, Nam and Mao, Po-Yuan and Oh, Tae-Hyun}, title = {VSC: Visual Search Compositional Text-to-Image Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19153-19162} }
FRET: Feature Redundancy Elimination for Test Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2025_ICCV, author = {You, Linjing and Lu, Jiabao and Huang, Xiayuan and Nie, Xiangli}, title = {FRET: Feature Redundancy Elimination for Test Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2120-2130} }
From One to More: Contextual Part Latents for 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Shaocong and Ding, Lihe and Chen, Xiao and Li, Yaokun and Wang, Yuxin and Wang, Yucheng and Wang, Qi and Kim, Jaehyeok and Gao, Chenjian and Huang, Zhanpeng and Wang, Zibin and Xue, Tianfan and Xu, Dan}, title = {From One to More: Contextual Part Latents for 3D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8230-8240} }
CaptionSmiths: Flexibly Controlling Language Pattern in Image Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saito_2025_ICCV, author = {Saito, Kuniaki and Kim, Donghyun and Park, Kwanyong and Hashimoto, Atsushi and Ushiku, Yoshitaka}, title = {CaptionSmiths: Flexibly Controlling Language Pattern in Image Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19872-19881} }
CoHD: A Counting-Aware Hierarchical Decoding Framework for Generalized Referring Expression Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Zhuoyan and Wu, Yinghao and Cheng, Tianheng and Liu, Yong and Xiao, Yicheng and Wang, Hongfa and Zhang, Xiao-Ping and Yang, Yujiu}, title = {CoHD: A Counting-Aware Hierarchical Decoding Framework for Generalized Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22685-22694} }
PersonalVideo: High ID-Fidelity Video Customization without Dynamic and Semantic Degradation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Hengjia and Qiu, Haonan and Zhang, Shiwei and Wang, Xiang and Wei, Yujie and Li, Zekun and Zhang, Yingya and Wu, Boxi and Cai, Deng}, title = {PersonalVideo: High ID-Fidelity Video Customization without Dynamic and Semantic Degradation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19406-19416} }
MP-HSIR: A Multi-Prompt Framework for Universal Hyperspectral Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Zhehui and Chen, Yong and Yokoya, Naoto and He, Wei}, title = {MP-HSIR: A Multi-Prompt Framework for Universal Hyperspectral Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13009-13020} }
Learning A Unified Template for Gait Recognition-
[pdf]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Panjian and Hou, Saihui and Huang, Junzhou and Huang, Yongzhen}, title = {Learning A Unified Template for Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12459-12469} }
LANGTRAJ: Diffusion Model and Dataset for Language-Conditioned Trajectory Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2025_ICCV, author = {Chang, Wei-Jer and Zhan, Wei and Tomizuka, Masayoshi and Chandraker, Manmohan and Pittaluga, Francesco}, title = {LANGTRAJ: Diffusion Model and Dataset for Language-Conditioned Trajectory Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26622-26631} }
Generative Active Learning for Long-tail Trajectory Prediction via Controllable Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Daehee and Surana, Monu and Desai, Pranav and Mehta, Ashish and John, Reuben MV and Yoon, Kuk-Jin}, title = {Generative Active Learning for Long-tail Trajectory Prediction via Controllable Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27839-27850} }
REGEN: Learning Compact Video Embedding with (Re-)Generative Decoder-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yitian and Mai, Long and Mahapatra, Aniruddha and Bourgin, David and Hong, Yicong and Casebeer, Jonah and Liu, Feng and Fu, Yun}, title = {REGEN: Learning Compact Video Embedding with (Re-)Generative Decoder}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18453-18462} }
DADet: Safeguarding Image Conditional Diffusion Models against Adversarial and Backdoor Attacks via Diffusion Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Hongwei and Ding, Xinlong and Li, Jiawei and Wang, Jinlong and Zhang, Yudong and Wang, Rongquan and Ma, Huimin and Chen, Jiansheng}, title = {DADet: Safeguarding Image Conditional Diffusion Models against Adversarial and Backdoor Attacks via Diffusion Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17411-17421} }
CL-Splats: Continual Learning of Gaussian Splatting with Local Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Ackermann_2025_ICCV, author = {Ackermann, Jan and Kulhanek, Jonas and Cai, Shengqu and Xu, Haofei and Pollefeys, Marc and Wetzstein, Gordon and Guibas, Leonidas J. and Peng, Songyou}, title = {CL-Splats: Continual Learning of Gaussian Splatting with Local Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7808-7817} }
Bridging Local Inductive Bias and Long-Range Dependencies with Pixel-Mamba for End-to-end Whole Slide Image Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2025_ICCV, author = {Qiu, Zhongwei and Chao, Hanqing and Lin, Tiancheng and Chang, Wanxing and Yang, Zijiang and Jiao, Wenpei and Shen, Yixuan and Zhang, Yunshuo and Yang, Yelin and Liu, Wenbin and Jiang, Hui and Bian, Yun and Yan, Ke and Jin, Dakai and Lu, Le}, title = {Bridging Local Inductive Bias and Long-Range Dependencies with Pixel-Mamba for End-to-end Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22738-22747} }
Inpaint4Drag: Repurposing Inpainting Models for Drag-Based Image Editing via Bidirectional Warping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Jingyi and Han, Kai}, title = {Inpaint4Drag: Repurposing Inpainting Models for Drag-Based Image Editing via Bidirectional Warping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18304-18313} }
Transformed Low-rank Adaptation via Tensor Decomposition and Its Applications to Text-to-image Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2025_ICCV, author = {Tao, Zerui and Takida, Yuhta and Murata, Naoki and Zhao, Qibin and Mitsufuji, Yuki}, title = {Transformed Low-rank Adaptation via Tensor Decomposition and Its Applications to Text-to-image Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16333-16344} }
PlaneRAS: Learning Planar Primitives for 3D Plane Recovery-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Fang and Zheng, Wenzhao and Zhao, Linqing and Zhu, Zelan and Lu, Jiwen and Zhou, Xiuzhuang}, title = {PlaneRAS: Learning Planar Primitives for 3D Plane Recovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6882-6891} }
TARS: Traffic-Aware Radar Scene Flow Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Jialong and Braun, Marco and Spata, Dominic and Rottmann, Matthias}, title = {TARS: Traffic-Aware Radar Scene Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26075-26084} }
Multi-Cache Enhanced Prototype Learning for Test-Time Generalization of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Xinyu and Zhai, Haotian and Zhang, Can and Shi, Xiupeng and Li, Ruirui}, title = {Multi-Cache Enhanced Prototype Learning for Test-Time Generalization of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2281-2291} }
MotionFollower: Editing Video Motion via Score-Guided Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Tu_2025_ICCV, author = {Tu, Shuyuan and Dai, Qi and Zhang, Zihao and Xie, Sicheng and Cheng, Zhi-Qi and Luo, Chong and Han, Xintong and Wu, Zuxuan and Jiang, Yu-Gang}, title = {MotionFollower: Editing Video Motion via Score-Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12822-12831} }
Learning Few-Step Diffusion Models by Trajectory Distribution Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Yihong and Hu, Tianyang and Sun, Jiacheng and Cai, Yujun and Tang, Jing}, title = {Learning Few-Step Diffusion Models by Trajectory Distribution Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17719-17728} }
Unified Multi-Agent Trajectory Modeling with Masked Trajectory Diffusion-
[pdf]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Songru and Shi, Zhenwei and Zou, Zhengxia}, title = {Unified Multi-Agent Trajectory Modeling with Masked Trajectory Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27563-27574} }
Certifiably Optimal Anisotropic Rotation Averaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Olsson_2025_ICCV, author = {Olsson, Carl and Lochman, Yaroslava and Malmport, Johan and Zach, Christopher}, title = {Certifiably Optimal Anisotropic Rotation Averaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14856-14865} }
TeRA: Rethinking Text-guided Realistic 3D Avatar Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yanwen and Zhuang, Yiyu and Zhang, Jiawei and Wang, Li and Zeng, Yifei and Cao, Xun and Zuo, Xinxin and Zhu, Hao}, title = {TeRA: Rethinking Text-guided Realistic 3D Avatar Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10686-10697} }
Understanding Flatness in Generative Models: Its Role and Benefits-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Taehwan and Seo, Kyeongkook and Yoo, Jaejun and Yoon, Sung Whan}, title = {Understanding Flatness in Generative Models: Its Role and Benefits}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4908-4917} }
UniGlyph: Unified Segmentation-Conditioned Diffusion for Precise Visual Text Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuanrui and Han, Cong and Li, Yafei and Jin, Zhipeng and Li, Xiawei and Du, SiNan and Tao, Wen and Li, Shuanglong and Yang, Yi and Yuan, Chun and Lin, Liu}, title = {UniGlyph: Unified Segmentation-Conditioned Diffusion for Precise Visual Text Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18335-18344} }
DCHM: Depth-Consistent Human Modeling for Multiview Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Jiahao and Wang, Tianyu and Liu, Miaomiao and Ahmedt-Aristizabal, David and Nguyen, Chuong}, title = {DCHM: Depth-Consistent Human Modeling for Multiview Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7731-7740} }
ChatReID: Open-ended Interactive Person Retrieval via Hierarchical Progressive Tuning for Vision Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Niu_2025_ICCV, author = {Niu, Ke and Yu, Haiyang and Zhao, Mengyang and Fu, Teng and Yi, Siyang and Lu, Wei and Li, Bin and Qian, Xuelin and Xue, Xiangyang}, title = {ChatReID: Open-ended Interactive Person Retrieval via Hierarchical Progressive Tuning for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24245-24254} }
TAViS: Text-bridged Audio-Visual Segmentation with Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Ziyang and Liu, Nian and Yang, Xuguang and Khan, Salman and Anwer, Rao Muhammad and Cholakkal, Hisham and Khan, Fahad Shahbaz and Han, Junwei}, title = {TAViS: Text-bridged Audio-Visual Segmentation with Foundation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24014-24023} }
ORION: A Holistic End-to-End Autonomous Driving Framework by Vision-Language Instructed Action Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_ICCV, author = {Fu, Haoyu and Zhang, Diankun and Zhao, Zongchuang and Cui, Jianfeng and Liang, Dingkang and Zhang, Chong and Zhang, Dingyuan and Xie, Hongwei and Wang, Bing and Bai, Xiang}, title = {ORION: A Holistic End-to-End Autonomous Driving Framework by Vision-Language Instructed Action Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24823-24834} }
GeoExplorer: Active Geo-localization with Curiosity-Driven Exploration-
[pdf]
[supp]
[bibtex]@InProceedings{Mi_2025_ICCV, author = {Mi, Li and B\'echaz, Manon and Chen, Zeming and Bosselut, Antoine and Tuia, Devis}, title = {GeoExplorer: Active Geo-localization with Curiosity-Driven Exploration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6122-6131} }
LaCoOT: Layer Collapse through Optimal Transport-
[pdf]
[supp]
[bibtex]@InProceedings{Quetu_2025_ICCV, author = {Qu\'etu, Victor and Liao, Zhu and Hezbri, Nour and Pizzati, Fabio and Tartaglione, Enzo}, title = {LaCoOT: Layer Collapse through Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20497-20507} }
Back on Track: Bundle Adjustment for Dynamic Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Weirong and Zhang, Ganlin and Wimbauer, Felix and Wang, Rui and Araslanov, Nikita and Vedaldi, Andrea and Cremers, Daniel}, title = {Back on Track: Bundle Adjustment for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4951-4960} }
ReferDINO: Referring Video Object Segmentation with Visual Grounding Foundations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Tianming and Lin, Kun-Yu and Tan, Chaolei and Zhang, Jianguo and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {ReferDINO: Referring Video Object Segmentation with Visual Grounding Foundations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20009-20019} }
LA-MOTR: End-to-End Multi-Object Tracking by Learnable Association-
[pdf]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Peng and Wang, Yongcai and Cao, Hualong and Chen, Wang and Li, Deying}, title = {LA-MOTR: End-to-End Multi-Object Tracking by Learnable Association}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12438-12448} }
NuiScene: Exploring Efficient Generation of Unbounded Outdoor Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Han-Hung and Han, Qinghong and Chang, Angel X.}, title = {NuiScene: Exploring Efficient Generation of Unbounded Outdoor Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26509-26518} }
NegRefine: Refining Negative Label-Based Zero-Shot OOD Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ansari_2025_ICCV, author = {Ansari, Amirhossein and Wang, Ke and Xiong, Pulei}, title = {NegRefine: Refining Negative Label-Based Zero-Shot OOD Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {573-582} }
DM-EFS: Dynamically Multiplexed Expanded Features Set Form for Robust and Efficient Small Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Sharma_2025_ICCV, author = {Sharma, Aashish}, title = {DM-EFS: Dynamically Multiplexed Expanded Features Set Form for Robust and Efficient Small Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24569-24579} }
OmniVTON: Training-Free Universal Virtual Try-On-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Zhaotong and Li, Yuhui and He, Shengfeng and Li, Xinzhe and Xu, Yangyang and Dong, Junyu and Du, Yong}, title = {OmniVTON: Training-Free Universal Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16702-16711} }
ACAM-KD: Adaptive and Cooperative Attention Masking for Knowledge Distillation-
[pdf]
[bibtex]@InProceedings{Lan_2025_ICCV, author = {Lan, Qizhen and Tian, Qing}, title = {ACAM-KD: Adaptive and Cooperative Attention Masking for Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3957-3966} }
InfGen: A Resolution-Agnostic Paradigm for Scalable Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Tao and Xu, Wanghan and Gong, Junchao and Yue, Xiaoyu and Guo, Song and Zhou, Luping and Bai, Lei}, title = {InfGen: A Resolution-Agnostic Paradigm for Scalable Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17941-17950} }
PlanGen: Towards Unified Layout Planning and Image Generation in Auto-Regressive Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Runze and Cheng, Bo and Ma, Yuhang and Jia, Qingxiang and Liu, Shanyuan and Ma, Ao and Wu, Xiaoyu and Wu, Liebucha and Leng, Dawei and Yin, Yuhui}, title = {PlanGen: Towards Unified Layout Planning and Image Generation in Auto-Regressive Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18143-18154} }
AirCache: Activating Inter-modal Relevancy KV Cache Compression for Efficient Large Vision-Language Model Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Kai and Zou, Hao and Wang, Bochen and Xi, Ye and Xie, Zhen and Wang, Hao}, title = {AirCache: Activating Inter-modal Relevancy KV Cache Compression for Efficient Large Vision-Language Model Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23958-23967} }
Completing 3D Partial Assemblies with View-Consistent 2D-3D Correspondence-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Weihao and Lan, Yu and You, Mingyu and He, Bin}, title = {Completing 3D Partial Assemblies with View-Consistent 2D-3D Correspondence}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7741-7750} }
Latte: Collaborative Test-Time Adaptation of Vision-Language Models in Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bao_2025_ICCV, author = {Bao, Wenxuan and Deng, Ruxi and Qiu, Ruizhong and Wei, Tianxin and Tong, Hanghang and He, Jingrui}, title = {Latte: Collaborative Test-Time Adaptation of Vision-Language Models in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {133-143} }
SSVQ: Unleashing the Potential of Vector Quantization with Sign-Splitting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Shuaiting and Deng, Juncan and Wang, Chengxuan and Xu, Kedong and Deng, Rongtao and Gu, Hong and Shen, Haibin and Huang, Kejie}, title = {SSVQ: Unleashing the Potential of Vector Quantization with Sign-Splitting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23710-23719} }
From Linearity to Non-Linearity: How Masked Autoencoders Capture Spatial Correlations-
[pdf]
[supp]
[bibtex]@InProceedings{Bisulco_2025_ICCV, author = {Bisulco, Anthony and Ramesh, Rahul and Balestriero, Randall and Chaudhari, Pratik}, title = {From Linearity to Non-Linearity: How Masked Autoencoders Capture Spatial Correlations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16441-16450} }
SliderSpace: Decomposing the Visual Capabilities of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gandikota_2025_ICCV, author = {Gandikota, Rohit and Wu, Zongze and Zhang, Richard and Bau, David and Shechtman, Eli and Kolkin, Nick}, title = {SliderSpace: Decomposing the Visual Capabilities of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15994-16003} }
TrustMark: Robust Watermarking and Watermark Removal for Arbitrary Resolution Images-
[pdf]
[supp]
[bibtex]@InProceedings{Bui_2025_ICCV, author = {Bui, Tu and Agarwal, Shruti and Collomosse, John}, title = {TrustMark: Robust Watermarking and Watermark Removal for Arbitrary Resolution Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18629-18639} }
TikZero: Zero-Shot Text-Guided Graphics Program Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Belouadi_2025_ICCV, author = {Belouadi, Jonas and Ilg, Eddy and Keuper, Margret and Tanaka, Hideki and Utiyama, Masao and Dabre, Raj and Eger, Steffen and Ponzetto, Simone}, title = {TikZero: Zero-Shot Text-Guided Graphics Program Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17793-17806} }
DrivingGPT: Unifying Driving World Modeling and Planning with Multi-modal Autoregressive Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yuntao and Wang, Yuqi and Zhang, Zhaoxiang}, title = {DrivingGPT: Unifying Driving World Modeling and Planning with Multi-modal Autoregressive Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26890-26900} }
One-Shot Knowledge Transfer for Scalable Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Longhua and Qi, Lei and Geng, Xin}, title = {One-Shot Knowledge Transfer for Scalable Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {668-677} }
GS-ID: Illumination Decomposition on Gaussian Splatting via Adaptive Light Aggregation and Diffusion-Guided Material Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Kang and Liang, Zhihao and Shen, Yulin and Wang, Zeyu}, title = {GS-ID: Illumination Decomposition on Gaussian Splatting via Adaptive Light Aggregation and Diffusion-Guided Material Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26220-26229} }
FreeScale: Unleashing the Resolution of Diffusion Models via Tuning-Free Scale Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2025_ICCV, author = {Qiu, Haonan and Zhang, Shiwei and Wei, Yujie and Chu, Ruihang and Yuan, Hangjie and Wang, Xiang and Zhang, Yingya and Liu, Ziwei}, title = {FreeScale: Unleashing the Resolution of Diffusion Models via Tuning-Free Scale Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16893-16903} }
TokenUnify: Scaling Up Autoregressive Pretraining for Neuron Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yinda and Shi, Haoyuan and Liu, Xiaoyu and Shi, Te and Zhang, Ruobing and Liu, Dong and Xiong, Zhiwei and Wu, Feng}, title = {TokenUnify: Scaling Up Autoregressive Pretraining for Neuron Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13604-13613} }
SAS: Segment Any 3D Scene with Integrated 2D Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhuoyuan and Lu, Jiahao and Deng, Jiacheng and Chang, Hanzhi and Wu, Lifan and Liang, Yanzhe and Zhang, Tianzhu}, title = {SAS: Segment Any 3D Scene with Integrated 2D Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8306-8318} }
OMNI-DC: Highly Robust Depth Completion with Multiresolution Depth Integration-
[pdf]
[supp]
[bibtex]@InProceedings{Zuo_2025_ICCV, author = {Zuo, Yiming and Yang, Willow and Ma, Zeyu and Deng, Jia}, title = {OMNI-DC: Highly Robust Depth Completion with Multiresolution Depth Integration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9287-9297} }
4D Gaussian Splatting SLAM-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yanyan and Fang, Youxu and Zhu, Zunjie and Li, Kunyi and Ding, Yong and Tombari, Federico}, title = {4D Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25019-25028} }
PHD: Personalized 3D Human Body Fitting with Point Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ho_2025_ICCV, author = {Ho, Hsuan-I and Guo, Chen and Wu, Po-Chen and Shugurov, Ivan and Tang, Chengcheng and Mittal, Abhay and An, Sizhe and Kaufmann, Manuel and Zhang, Linguang}, title = {PHD: Personalized 3D Human Body Fitting with Point Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7526-7537} }
SteerX: Creating Any Camera-Free 3D and 4D Scenes with Geometric Steering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Byeongjun and Go, Hyojun and Nam, Hyelin and Kim, Byung-Hoon and Chung, Hyungjin and Kim, Changick}, title = {SteerX: Creating Any Camera-Free 3D and 4D Scenes with Geometric Steering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27326-27337} }
Hierarchical Material Recognition from Local Appearance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Beveridge_2025_ICCV, author = {Beveridge, Matthew and Nayar, Shree K.}, title = {Hierarchical Material Recognition from Local Appearance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8165-8176} }
Unleashing the Temporal Potential of Stereo Event Cameras for Continuous-Time 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_ICCV, author = {Kang, Jae-Young and Cho, Hoonhee and Yoon, Kuk-Jin}, title = {Unleashing the Temporal Potential of Stereo Event Cameras for Continuous-Time 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6869-6881} }
SweetTok: Semantic-Aware Spatial-Temporal Tokenizer for Compact Video Discretization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2025_ICCV, author = {Tan, Zhentao and Xue, Ben and Jia, Jian and Wang, Junhao and Ye, Wencai and Shi, Shaoyun and Sun, Mingjie and Wu, Wenjin and Chen, Quan and Jiang, Peng}, title = {SweetTok: Semantic-Aware Spatial-Temporal Tokenizer for Compact Video Discretization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23541-23550} }
D-Attn: Decomposed Attention for Large Vision-and-Language Model-
[pdf]
[supp]
[bibtex]@InProceedings{Kuo_2025_ICCV, author = {Kuo, Chia-Wen and Zhu, Sijie and Chen, Fan and Shen, Xiaohui and Wen, Longyin}, title = {D-Attn: Decomposed Attention for Large Vision-and-Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23935-23944} }
R1-VL: Learning to Reason with Multimodal Large Language Models via Step-wise Group Relative Policy Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jingyi and Huang, Jiaxing and Yao, Huanjin and Liu, Shunyu and Zhang, Xikun and Lu, Shijian and Tao, Dacheng}, title = {R1-VL: Learning to Reason with Multimodal Large Language Models via Step-wise Group Relative Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1859-1869} }
Not All Degradations Are Equal: A Targeted Feature Denoising Framework for Generalizable Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Hongjun and Chen, Jiyuan and Yin, Zhengwei and Song, Xuan and Zheng, Yinqiang}, title = {Not All Degradations Are Equal: A Targeted Feature Denoising Framework for Generalizable Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14152-14161} }
Rep-MTL: Unleashing the Power of Representation-level Task Saliency for Multi-Task Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zedong and Li, Siyuan and Xu, Dan}, title = {Rep-MTL: Unleashing the Power of Representation-level Task Saliency for Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3413-3423} }
TLB-VFI: Temporal-Aware Latent Brownian Bridge Diffusion for Video Frame Interpolation-
[pdf]
[supp]
[bibtex]@InProceedings{Lyu_2025_ICCV, author = {Lyu, Zonglin and Chen, Chen}, title = {TLB-VFI: Temporal-Aware Latent Brownian Bridge Diffusion for Video Frame Interpolation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16260-16269} }
Gaussian Splatting with Discretized SDF for Relightable Assets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Zuo-Liang and Yang, Jian and Wang, Beibei}, title = {Gaussian Splatting with Discretized SDF for Relightable Assets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25155-25164} }
Region-based Cluster Discrimination for Visual Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Yin and Yang, Kaicheng and An, Xiang and Wu, Kun and Zhao, Yongle and Deng, Weimo and Ran, Zimin and Wang, Yumeng and Feng, Ziyong and Miles, Roy and Elezi, Ismail and Deng, Jiankang}, title = {Region-based Cluster Discrimination for Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1793-1803} }
AAA-Gaussians: Anti-Aliased and Artifact-Free 3D Gaussian Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Steiner_2025_ICCV, author = {Steiner, Michael and K\"ohler, Thomas and Radl, Lukas and Windisch, Felix and Schmalstieg, Dieter and Steinberger, Markus}, title = {AAA-Gaussians: Anti-Aliased and Artifact-Free 3D Gaussian Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27650-27659} }
EvRT-DETR: Latent Space Adaptation of Image Detectors for Event-based Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Torbunov_2025_ICCV, author = {Torbunov, Dmitrii and Ren, Yihui and Ghose, Animesh and Dim, Odera and Cui, Yonggang}, title = {EvRT-DETR: Latent Space Adaptation of Image Detectors for Event-based Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9812-9821} }
MeshPad: Interactive Sketch-Conditioned Artist-Reminiscent Mesh Generation and Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Haoxuan and Erko\c{c}, Ziya and Li, Lei and Sirigatti, Daniele and Rosov, Vladislav and Dai, Angela and Nie{\ss}ner, Matthias}, title = {MeshPad: Interactive Sketch-Conditioned Artist-Reminiscent Mesh Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16227-16237} }
ResidualViT for Efficient Temporally Dense Video Encoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Soldan_2025_ICCV, author = {Soldan, Mattia and Heilbron, Fabian Caba and Ghanem, Bernard and Sivic, Josef and Russell, Bryan}, title = {ResidualViT for Efficient Temporally Dense Video Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22305-22315} }
UniCombine: Unified Multi-Conditional Combination with Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Haoxuan and Peng, Jinlong and He, Qingdong and Yang, Hao and Jin, Ying and Wu, Jiafu and Hu, Xiaobin and Pan, Yanjie and Gan, Zhenye and Chi, Mingmin and Peng, Bo and Wang, Yabiao}, title = {UniCombine: Unified Multi-Conditional Combination with Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18325-18334} }
Easi3R: Estimating Disentangled Motion from DUSt3R Without Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Xingyu and Chen, Yue and Xiu, Yuliang and Geiger, Andreas and Chen, Anpei}, title = {Easi3R: Estimating Disentangled Motion from DUSt3R Without Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9158-9168} }
Addressing Text Embedding Leakage in Diffusion-based Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mun_2025_ICCV, author = {Mun, Sunung and Nam, Jinhwan and Cho, Sunghyun and Ok, Jungseul}, title = {Addressing Text Embedding Leakage in Diffusion-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16451-16460} }
CaO2: Rectifying Inconsistencies in Diffusion-Based Dataset Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Haoxuan and Zhao, Zhenghao and Wu, Junyi and Shang, Yuzhang and Liu, Gaowen and Yan, Yan}, title = {CaO2: Rectifying Inconsistencies in Diffusion-Based Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4722-4731} }
HQ-CLIP: Leveraging Large Vision-Language Models to Create High-Quality Image-Text Datasets and CLIP Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Zhixiang and Wang, Guangting and Ma, Xiaoxiao and Mei, Ke and Chen, Huaian and Jin, Yi and Rao, Fengyun}, title = {HQ-CLIP: Leveraging Large Vision-Language Models to Create High-Quality Image-Text Datasets and CLIP Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22447-22456} }
Bridging the Gap Between Ideal and Real-world Evaluation: Benchmarking AI-Generated Image Detection in Challenging Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Chunxiao and Wang, Xiaoxiao and Li, Meiling and Miao, Boming and Sun, Peng and Zhang, Yunjian and Ji, Xiangyang and Zhu, Yao}, title = {Bridging the Gap Between Ideal and Real-world Evaluation: Benchmarking AI-Generated Image Detection in Challenging Scenarios}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20379-20389} }
Progressive Test Time Energy Adaptation for Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiaoran and Hong, Byung-Woo and Park, Hyoungseob and Pak, Daniel H. and Rickmann, Anne-Marie and Staib, Lawrence H. and Duncan, James S. and Wong, Alex}, title = {Progressive Test Time Energy Adaptation for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22338-22348} }
Sequential Gaussian Avatars with Hierarchical Motion Context-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Wangze and Zhan, Yifan and Zhong, Zhihang and Sun, Xiao}, title = {Sequential Gaussian Avatars with Hierarchical Motion Context}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13592-13603} }
Beyond Losses Reweighting: Empowering Multi-Task Learning via the Generalization Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Phan_2025_ICCV, author = {Phan, Hoang and Tran, Lam and Tran, Quyen and Tran, Ngoc and Truong, Tuan and Lei, Qi and Ho, Nhat and Phung, Dinh and Le, Trung}, title = {Beyond Losses Reweighting: Empowering Multi-Task Learning via the Generalization Perspective}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2440-2450} }
Zero-shot Inexact CAD Model Alignment from a Single Image-
[pdf]
[supp]
[bibtex]@InProceedings{Arsomngern_2025_ICCV, author = {Arsomngern, Pattaramanee and Khwanmuang, Sasikarn and Nie{\ss}ner, Matthias and Suwajanakorn, Supasorn}, title = {Zero-shot Inexact CAD Model Alignment from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6231-6241} }
Vulnerability-Aware Spatio-Temporal Learning for Generalizable Deepfake Video Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Dat and Astrid, Marcella and Kacem, Anis and Ghorbel, Enjie and Aouada, Djamila}, title = {Vulnerability-Aware Spatio-Temporal Learning for Generalizable Deepfake Video Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10786-10796} }
Efficient Autoregressive Shape Generation via Octree-Based Adaptive Tokenization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Deng_2025_ICCV, author = {Deng, Kangle and Liu, Hsueh-Ti Derek and Zhu, Yiheng and Sun, Xiaoxia and Shang, Chong and Bhat, Kiran S. and Ramanan, Deva and Zhu, Jun-Yan and Agrawala, Maneesh and Zhou, Tinghui}, title = {Efficient Autoregressive Shape Generation via Octree-Based Adaptive Tokenization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11685-11696} }
Cycle-Consistent Learning for Joint Layout-to-Image Generation and Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2025_ICCV, author = {Cai, Xinhao and Lai, Qiuxia and Pei, Gensheng and Shu, Xiangbo and Yao, Yazhou and Wang, Wenguan}, title = {Cycle-Consistent Learning for Joint Layout-to-Image Generation and Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6797-6807} }
MUG: Pseudo Labeling Augmented Audio-Visual Mamba Network for Audio-Visual Video Parsing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Langyu and Zhu, Bingke and Chen, Yingying and Zhang, Yiyuan and Tang, Ming and Wang, Jinqiao}, title = {MUG: Pseudo Labeling Augmented Audio-Visual Mamba Network for Audio-Visual Video Parsing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20637-20646} }
LIFT: Latent Implicit Functions for Task- and Data-Agnostic Encoding-
[pdf]
[supp]
[bibtex]@InProceedings{Kazerouni_2025_ICCV, author = {Kazerouni, Amirhossein and Mehraban, Soroush and Brudno, Michael and Taati, Babak}, title = {LIFT: Latent Implicit Functions for Task- and Data-Agnostic Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4828-4837} }
Long-Context State-Space Video World Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Po_2025_ICCV, author = {Po, Ryan and Nitzan, Yotam and Zhang, Richard and Chen, Berlin and Dao, Tri and Shechtman, Eli and Wetzstein, Gordon and Huang, Xun}, title = {Long-Context State-Space Video World Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8733-8744} }
Dataset Distillation via the Wasserstein Metric-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Haoyang and Li, Yijiang and Xing, Tiancheng and Wang, Peiran and Dalal, Vibhu and Li, Luwei and He, Jingrui and Wang, Haohan}, title = {Dataset Distillation via the Wasserstein Metric}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1205-1215} }
IRGPT: Understanding Real-world Infrared Image with Bi-cross-modal Curriculum on Large-scale Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Zhe and Zhang, Jin and Zhang, Ruiheng}, title = {IRGPT: Understanding Real-world Infrared Image with Bi-cross-modal Curriculum on Large-scale Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {166-176} }
Mixed Signals: A Diverse Point Cloud Dataset for Heterogeneous LiDAR V2X Collaboration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Katie Z and Dao, Minh-Quan and Liu, Zhenzhen and Campbell, Mark and Chao, Wei-Lun and Weinberger, Kilian Q and Malis, Ezio and Fremont, Vincent and Hariharan, Bharath and Shan, Mao and Worrall, Stewart and Perez, Julie Stephany Berrio}, title = {Mixed Signals: A Diverse Point Cloud Dataset for Heterogeneous LiDAR V2X Collaboration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28763-28773} }
Exploring Weather-aware Aggregation and Adaptation for Semantic Segmentation under Adverse Conditions-
[pdf]
[bibtex]@InProceedings{Pan_2025_ICCV, author = {Pan, Yuwen and Sun, Rui and Li, Wangkai and Zhang, Tianzhu}, title = {Exploring Weather-aware Aggregation and Adaptation for Semantic Segmentation under Adverse Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13952-13962} }
OracleFusion: Assisting the Decipherment of Oracle Bone Script with Structurally Constrained Semantic Typography-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Caoshuo and Ding, Zengmao and Hu, Xiaobin and Li, Bang and Luo, Donghao and Wu, AndyPian and Wang, Chaoyang and Wang, Chengjie and Jin, Taisong and Shu, Seven and Wu, Yunsheng and Liu, Yongge and Ji, Rongrong}, title = {OracleFusion: Assisting the Decipherment of Oracle Bone Script with Structurally Constrained Semantic Typography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19893-19902} }
Streaming VideoLLMs for Real-Time Procedural Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chatterjee_2025_ICCV, author = {Chatterjee, Dibyadip and Remelli, Edoardo and Song, Yale and Tekin, Bugra and Mittal, Abhay and Bhatnagar, Bharat and Camgoz, Necati Cihan and Hampali, Shreyas and Sauser, Eric and Ma, Shugao and Yao, Angela and Sener, Fadime}, title = {Streaming VideoLLMs for Real-Time Procedural Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22586-22598} }
Efficient Visual Place Recognition Through Multimodal Semantic Knowledge Integration-
[pdf]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Sitao and Mao, Hongda and Chen, Qingshuang and Kim, Yelin}, title = {Efficient Visual Place Recognition Through Multimodal Semantic Knowledge Integration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5601-5610} }
Rethinking Multi-modal Object Detection from the Perspective of Mono-Modality Feature Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Tianyi and Liu, Boyang and Gao, Yanglei and Sun, Yiming and Yuan, Maoxun and Wei, Xingxing}, title = {Rethinking Multi-modal Object Detection from the Perspective of Mono-Modality Feature Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6364-6373} }
VEGGIE: Instructional Editing and Reasoning Video Concepts with Grounded Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Shoubin and Liu, Difan and Ma, Ziqiao and Hong, Yicong and Zhou, Yang and Tan, Hao and Chai, Joyce and Bansal, Mohit}, title = {VEGGIE: Instructional Editing and Reasoning Video Concepts with Grounded Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15147-15158} }
Task-Aware Prompt Gradient Projection for Parameter-Efficient Tuning Federated Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Ke_2025_ICCV, author = {Ke, Hualong and Shi, Jiangming and Zhang, Yachao and Wang, Fangyong and Xie, Yuan and Qu, Yanyun}, title = {Task-Aware Prompt Gradient Projection for Parameter-Efficient Tuning Federated Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2631-2641} }
Mixture-of-Scores: Robust Image-Text Data Valuation via Three Lines of Code-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Sitong and Tan, Haoru and Chen, Yukang and Zhang, Shaofeng and Li, Jingyao and Yu, Bei and Qi, Xiaojuan and Jia, Jiaya}, title = {Mixture-of-Scores: Robust Image-Text Data Valuation via Three Lines of Code}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24603-24614} }
DMesh++: An Efficient Differentiable Mesh for Complex Shapes-
[pdf]
[supp]
[bibtex]@InProceedings{Son_2025_ICCV, author = {Son, Sanghyun and Gadelha, Matheus and Zhou, Yang and Fisher, Matthew and Xu, Zexiang and Qiao, Yi-Ling and Lin, Ming C. and Zhou, Yi}, title = {DMesh++: An Efficient Differentiable Mesh for Complex Shapes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26590-26599} }
From Imitation to Innovation: The Emergence of AI's Unique Artistic Styles and the Challenge of Copyright Protection-
[pdf]
[supp]
[bibtex]@InProceedings{Jia_2025_ICCV, author = {Jia, Zexi and Huang, Chuanwei and Zhu, Yeshuang and Fei, Hongyan and Deng, Ying and Yuan, Zhiqiang and Zhang, Jiapei and Zhang, Jinchao and Zhou, Jie}, title = {From Imitation to Innovation: The Emergence of AI's Unique Artistic Styles and the Challenge of Copyright Protection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18980-18989} }
VPO: Aligning Text-to-Video Generation Models with Prompt Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Jiale and Lyu, Ruiliang and Gu, Xiaotao and Liu, Xiao and Xu, Jiazheng and Lu, Yida and Teng, Jiayan and Yang, Zhuoyi and Dong, Yuxiao and Tang, Jie and Wang, Hongning and Huang, Minlie}, title = {VPO: Aligning Text-to-Video Generation Models with Prompt Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15636-15645} }
Intervening in Black Box: Concept Bottleneck Model for Enhancing Human Neural Network Mutual Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2025_ICCV, author = {Xiong, Nuoye and Dong, Anqi and Wang, Ning and Hua, Cong and Zhu, Guangming and Mei, Lin and Shen, Peiyi and Zhang, Liang}, title = {Intervening in Black Box: Concept Bottleneck Model for Enhancing Human Neural Network Mutual Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2836-2845} }
Versatile Transition Generation with Image-to-Video Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Zuhao and Zhang, Jiahui and Yu, Yingchen and Lu, Shijian and Bai, Song}, title = {Versatile Transition Generation with Image-to-Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16981-16990} }
SMGDiff: Soccer Motion Generation using Diffusion Probabilistic Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Hongdi and Li, Chengyang and Wu, Zhenxuan and Li, Gaozheng and Wang, Jingya and Yu, Jingyi and Su, Zhuo and Xu, Lan}, title = {SMGDiff: Soccer Motion Generation using Diffusion Probabilistic Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11807-11817} }
Extrapolated Urban View Synthesis Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Xiangyu and Jia, Zhen and Li, Boyi and Wang, Yan and Ivanovic, Boris and You, Yurong and Liu, Lingjie and Wang, Yue and Pavone, Marco and Feng, Chen and Li, Yiming}, title = {Extrapolated Urban View Synthesis Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28718-28728} }
Emulating Self-attention with Convolution for Efficient Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Dongheon and Yun, Seokju and Ro, Youngmin}, title = {Emulating Self-attention with Convolution for Efficient Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24467-24477} }
CoDa-4DGS: Dynamic Gaussian Splatting with Context and Deformation Awareness for Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2025_ICCV, author = {Song, Rui and Liang, Chenwei and Xia, Yan and Zimmer, Walter and Cao, Hu and Caesar, Holger and Festag, Andreas and Knoll, Alois}, title = {CoDa-4DGS: Dynamic Gaussian Splatting with Context and Deformation Awareness for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28031-28041} }
RI3D: Few-Shot Gaussian Splatting With Repair and Inpainting Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Paliwal_2025_ICCV, author = {Paliwal, Avinash and Zhou, Xilong and Ye, Wei and Xiong, Jinhui and Ranjan, Rakesh and Kalantari, Nima Khademi}, title = {RI3D: Few-Shot Gaussian Splatting With Repair and Inpainting Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25094-25103} }
Text Embedding Knows How to Quantize Text-Guided Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Hongjae and Son, Myungjun and Kang, Dongjea and Jung, Seung-Won}, title = {Text Embedding Knows How to Quantize Text-Guided Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15426-15436} }
UniVerse: Unleashing the Scene Prior of Video Diffusion Models for Robust Radiance Field Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Jin and Wu, Hongrui and Feng, Ziyong and Bao, Hujun and Zhou, Xiaowei and Peng, Sida}, title = {UniVerse: Unleashing the Scene Prior of Video Diffusion Models for Robust Radiance Field Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27031-27041} }
Shape of Motion: 4D Reconstruction from a Single Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Qianqian and Ye, Vickie and Gao, Hang and Zeng, Weijia and Austin, Jake and Li, Zhengqi and Kanazawa, Angjoo}, title = {Shape of Motion: 4D Reconstruction from a Single Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9660-9672} }
From Gallery to Wrist: Realistic 3D Bracelet Insertion in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Chenjian and Ding, Lihe and Han, Rui and Huang, Zhanpeng and Wang, Zibin and Xue, Tianfan}, title = {From Gallery to Wrist: Realistic 3D Bracelet Insertion in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25712-25721} }
Representing 3D Shapes with 64 Latent Vectors for 3D Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cho_2025_ICCV, author = {Cho, In and Yoo, Youngbeom and Jeon, Subin and Kim, Seon Joo}, title = {Representing 3D Shapes with 64 Latent Vectors for 3D Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28556-28566} }
Backdoor Defense via Enhanced Splitting and Trap Isolation-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Hongrui and Qi, Lu and Lin, Wanyu and Chen, Jian and Sun, Hailong and Sun, Chengbin}, title = {Backdoor Defense via Enhanced Splitting and Trap Isolation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1708-1717} }
FedVLA: Federated Vision-Language-Action Learning with Dual Gating Mixture-of-Experts for Robotic Manipulation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Miao_2025_ICCV, author = {Miao, Cui and Chang, Tao and Wu, Meihan and Xu, Hongbin and Li, Chun and Li, Ming and Wang, Xiaodong}, title = {FedVLA: Federated Vision-Language-Action Learning with Dual Gating Mixture-of-Experts for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6904-6913} }
BokehDiff: Neural Lens Blur with One-Step Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Chengxuan and Fan, Qingnan and Zhang, Qi and Chen, Jinwei and Zhang, Huaqi and Xu, Chao and Shi, Boxin}, title = {BokehDiff: Neural Lens Blur with One-Step Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9508-9518} }
CutS3D: Cutting Semantics in 3D for 2D Unsupervised Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sick_2025_ICCV, author = {Sick, Leon and Engel, Dominik and Hartwig, Sebastian and Hermosilla, Pedro and Ropinski, Timo}, title = {CutS3D: Cutting Semantics in 3D for 2D Unsupervised Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21265-21275} }
Perspective-Aware Reasoning in Vision-Language Models via Mental Imagery Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Phillip Y. and Je, Jihyeon and Park, Chanho and Uy, Mikaela Angelina and Guibas, Leonidas and Sung, Minhyuk}, title = {Perspective-Aware Reasoning in Vision-Language Models via Mental Imagery Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9241-9251} }
Salvaging the Overlooked: Leveraging Class-Aware Contrastive Learning for Multi-Class Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Lei and Huang, Junjie and Di, Donglin and Su, Anyang and Song, Tianyou and Pagnucco, Maurice and Song, Yang}, title = {Salvaging the Overlooked: Leveraging Class-Aware Contrastive Learning for Multi-Class Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21419-21428} }
GUAVA: Generalizable Upper Body 3D Gaussian Avatar-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Dongbin and Liu, Yunfei and Lin, Lijian and Zhu, Ye and Li, Yang and Qin, Minghan and Li, Yu and Wang, Haoqian}, title = {GUAVA: Generalizable Upper Body 3D Gaussian Avatar}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14205-14217} }
PASG: A Closed-Loop Framework for Automated Geometric Primitive Extraction and Semantic Anchoring in Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Zhihao and Zheng, Yifan and Pan, Siyu and Jin, Yaohui and Mu, Yao}, title = {PASG: A Closed-Loop Framework for Automated Geometric Primitive Extraction and Semantic Anchoring in Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8950-8960} }
FastVAR: Linear Visual Autoregressive Modeling via Cached Token Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Hang and Li, Yawei and Zhang, Taolin and Wang, Jiangshan and Dai, Tao and Xia, Shu-Tao and Benini, Luca}, title = {FastVAR: Linear Visual Autoregressive Modeling via Cached Token Pruning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19011-19021} }
Online Generic Event Boundary Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2025_ICCV, author = {Jung, Hyungrok and Kim, Daneul and Lim, Seunggyun and Son, Jeany and Choi, Jonghyun}, title = {Online Generic Event Boundary Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13741-13750} }
Registration beyond Points: General Affine Subspace Alignment via Geodesic Distance on Grassmann Manifold-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shin_2025_ICCV, author = {Shin, Jaeho and Gil, Hyeonjae and Jang, Junwoo and Ghaffari, Maani and Kim, Ayoung}, title = {Registration beyond Points: General Affine Subspace Alignment via Geodesic Distance on Grassmann Manifold}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3767-3776} }
Disentangling Instance and Scene Contexts for 3D Semantic Scene Completion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Enyu and Yu, En and Chen, Sijia and Tao, Wenbing}, title = {Disentangling Instance and Scene Contexts for 3D Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26999-27009} }
World4Drive: End-to-End Autonomous Driving via Intention-aware Physical Latent World Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Yupeng and Yang, Pengxuan and Xing, Zebin and Zhang, Qichao and Zheng, Yuhang and Gao, Yinfeng and Li, Pengfei and Zhang, Teng and Xia, Zhongpu and Jia, Peng and Lang, XianPeng and Zhao, Dongbin}, title = {World4Drive: End-to-End Autonomous Driving via Intention-aware Physical Latent World Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28632-28642} }
Online Language Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Katragadda_2025_ICCV, author = {Katragadda, Saimouli and Wu, Cho-Ying and Guo, Yuliang and Huang, Xinyu and Huang, Guoquan and Ren, Liu}, title = {Online Language Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25882-25892} }
Amodal Depth Anything: Amodal Depth Estimation in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhenyu and Lavreniuk, Mykola and Shi, Jian and Bhat, Shariq Farooq and Wonka, Peter}, title = {Amodal Depth Anything: Amodal Depth Estimation in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9673-9682} }
Hierarchical Variational Test-Time Prompt Generation for Zero-Shot Generalization-
[pdf]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Zhaoyang and Liu, Fang and Jiao, Licheng and Li, Shuo and Li, Lingling and Liu, Xu and Chen, Puhua and Ma, Wenping}, title = {Hierarchical Variational Test-Time Prompt Generation for Zero-Shot Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2325-2335} }
VTimeCoT: Thinking by Drawing for Video Temporal Grounding and Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jinglei and Guo, Yuanfan and Potamias, Rolandos Alexandros and Deng, Jiankang and Xu, Hang and Ma, Chao}, title = {VTimeCoT: Thinking by Drawing for Video Temporal Grounding and Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24203-24213} }
RA-BUSSeg: Relation-aware Semi-supervised Breast Ultrasound Image Segmentation via Adjacent Propagation and Cross-layer Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wanting and Ding, Zhenhui and Chen, Guilian and Wu, Huisi and Qin, Jing}, title = {RA-BUSSeg: Relation-aware Semi-supervised Breast Ultrasound Image Segmentation via Adjacent Propagation and Cross-layer Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21689-21698} }
Factorized Learning for Temporally Grounded Video-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Wenzheng and Gao, Difei and Shou, Mike Zheng and Ng, Hwee Tou}, title = {Factorized Learning for Temporally Grounded Video-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20683-20693} }
Decouple to Reconstruct: High Quality UHD Restoration via Active Feature Disentanglement and Reversible Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yidi and Li, Dong and Ma, Yuxin and Huang, Jie and Zhang, Wenlong and Fu, Xueyang and Zha, Zheng-Jun}, title = {Decouple to Reconstruct: High Quality UHD Restoration via Active Feature Disentanglement and Reversible Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11622-11631} }
Generating Multi-Image Synthetic Data for Text-to-Image Customization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kumari_2025_ICCV, author = {Kumari, Nupur and Yin, Xi and Zhu, Jun-Yan and Misra, Ishan and Azadi, Samaneh}, title = {Generating Multi-Image Synthetic Data for Text-to-Image Customization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16524-16534} }
WeaveSeg: Iterative Contrast-weaving and Spectral Feature-refining for Nuclei Instance Segmentation-
[pdf]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Jiajia and Wu, Huisi and Qin, Jing}, title = {WeaveSeg: Iterative Contrast-weaving and Spectral Feature-refining for Nuclei Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21984-21993} }
Towards Long-Horizon Vision-Language-Action System: Reasoning, Acting and Memory-
[pdf]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Daixun and Zhang, Yusi and Cao, Mingxiang and Liu, Donglai and Xie, Weiying and Hui, Tianlin and Lin, Lunkai and Xie, Zhiqiang and Li, Yunsong}, title = {Towards Long-Horizon Vision-Language-Action System: Reasoning, Acting and Memory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6839-6848} }
MR-FIQA: Face Image Quality Assessment with Multi-Reference Representations from Synthetic Data Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Ou_2025_ICCV, author = {Ou, Fu-Zhao and Li, Chongyi and Wang, Shiqi and Kwong, Sam}, title = {MR-FIQA: Face Image Quality Assessment with Multi-Reference Representations from Synthetic Data Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12915-12925} }
IDFace: Face Template Protection for Efficient and Secure Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Sunpill and Paik, Seunghun and Hwang, Chanwoo and Kim, Dongsoo and Shin, Junbum and Seo, Jae Hong}, title = {IDFace: Face Template Protection for Efficient and Secure Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13995-14005} }
A Real-world Display Inverse Rendering Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2025_ICCV, author = {Choi, Seokjun and Chung, Hoon-Gyu and Jeon, Yujin and Nam, Giljoo and Baek, Seung-Hwan}, title = {A Real-world Display Inverse Rendering Dataset}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25272-25283} }
Lyra: An Efficient and Speech-Centric Framework for Omni-Cognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_ICCV, author = {Zhong, Zhisheng and Wang, Chengyao and Liu, Yuqi and Yang, Senqiao and Tang, Longxiang and Zhang, Yuechen and Li, Jingyao and Qu, Tianyuan and Li, Yanwei and Chen, Yukang and Yu, Shaozuo and Wu, Sitong and Lo, Eric and Liu, Shu and Jia, Jiaya}, title = {Lyra: An Efficient and Speech-Centric Framework for Omni-Cognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3694-3704} }
MEH: A Multi-Style Dataset and Toolkit for Advancing Egyptian Hieroglyph Recognition-
[pdf]
[bibtex]@InProceedings{Golyadkin_2025_ICCV, author = {Golyadkin, Maksim and Rubanova, Valeria and Utkov, Aleksandr and Nikolotov, Dmitry and Makarov, Ilya}, title = {MEH: A Multi-Style Dataset and Toolkit for Advancing Egyptian Hieroglyph Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24488-24496} }
D2ST-Adapter: Disentangled-and-Deformable Spatio-Temporal Adapter for Few-shot Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Pei_2025_ICCV, author = {Pei, Wenjie and Tan, Qizhong and Lu, Guangming and Tian, Jiandong and Yu, Jun}, title = {D2ST-Adapter: Disentangled-and-Deformable Spatio-Temporal Adapter for Few-shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11317-11326} }
Synchronization of Multiple Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Naaman_2025_ICCV, author = {Naaman, Avihai and Weber, Ron Shapira and Freifeld, Oren}, title = {Synchronization of Multiple Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12514-12523} }
Hi-Gaussian: Hierarchical Gaussians under Normalized Spherical Projection for Single-View 3D Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Binjian and Zhang, Pengju and Wei, Hao and Wu, Yihong}, title = {Hi-Gaussian: Hierarchical Gaussians under Normalized Spherical Projection for Single-View 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28664-28673} }
MotionAgent: Fine-grained Controllable Video Generation via Motion Field Agent-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_ICCV, author = {Liao, Xinyao and Zeng, Xianfang and Wang, Liao and Yu, Gang and Lin, Guosheng and Zhang, Chi}, title = {MotionAgent: Fine-grained Controllable Video Generation via Motion Field Agent}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11305-11316} }
The Scalability of Simplicity: Empirical Analysis of Vision-Language Learning with a Single Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2025_ICCV, author = {Lei, Weixian and Wang, Jiacong and Wang, Haochen and Li, Xiangtai and Liew, Jun Hao and Feng, Jiashi and Huang, Zilong}, title = {The Scalability of Simplicity: Empirical Analysis of Vision-Language Learning with a Single Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20758-20769} }
PacGDC: Label-Efficient Generalizable Depth Completion with Projection Ambiguity and Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Haotian and Xiao, Aoran and Zhang, Xiaoqin and Yang, Meng and Lu, Shijian}, title = {PacGDC: Label-Efficient Generalizable Depth Completion with Projection Ambiguity and Consistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7709-7720} }
GSV3D: Gaussian Splatting-based Geometric Distillation with Stable Video Diffusion for Single-Image 3D Object Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2025_ICCV, author = {Tao, Ye and Zhang, Jiawei and Shi, Yahao and Zou, Dongqing and Zhou, Bin}, title = {GSV3D: Gaussian Splatting-based Geometric Distillation with Stable Video Diffusion for Single-Image 3D Object Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7751-7760} }
OpenAnimals: Revisiting Person Re-Identification for Animals Towards Better Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hou_2025_ICCV, author = {Hou, Saihui and Huang, Panjian and Wang, Zengbin and Liu, Yuan and Li, Zeyu and Zhang, Man and Huang, Yongzhen}, title = {OpenAnimals: Revisiting Person Re-Identification for Animals Towards Better Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14369-14379} }
VSRM: A Robust Mamba-Based Framework for Video Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2025_ICCV, author = {Tran, Dinh Phu and Hung, Dao Duy and Kim, Daeyoung}, title = {VSRM: A Robust Mamba-Based Framework for Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14711-14721} }
End-to-End Multi-Modal Diffusion Mamba-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Chunhao and Lu, Qiang and Dong, Meichen and Luo, Jake}, title = {End-to-End Multi-Modal Diffusion Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20529-20540} }
Guiding Diffusion-Based Articulated Object Generation by Partial Point Cloud Alignment and Physical Plausibility Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kreber_2025_ICCV, author = {Kreber, Jens U. and Stueckler, Joerg}, title = {Guiding Diffusion-Based Articulated Object Generation by Partial Point Cloud Alignment and Physical Plausibility Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3206-3214} }
PoseSyn: Synthesizing Diverse 3D Pose Data from In-the-Wild 2D Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, ChangHee and Song, Hyeonseop and Choi, Seokhun and Lee, Seungwoo and Kim, Jaechul and Do, Hoseok}, title = {PoseSyn: Synthesizing Diverse 3D Pose Data from In-the-Wild 2D Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5611-5621} }
Backdoor Attacks on Neural Networks via One-Bit Flip-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Xiang and Luo, Lannan and Zeng, Qiang}, title = {Backdoor Attacks on Neural Networks via One-Bit Flip}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4328-4338} }
Vector Contrastive Learning For Pixel-Wise Pretraining In Medical Vision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Yuting and Li, Shuo}, title = {Vector Contrastive Learning For Pixel-Wise Pretraining In Medical Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19827-19837} }
Benefit From Seen: Enhancing Open-Vocabulary Object Detection by Bridging Visual and Textual Co-Occurrence Knowledge-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yanqi and Niu, Jianwei and Ren, Tao}, title = {Benefit From Seen: Enhancing Open-Vocabulary Object Detection by Bridging Visual and Textual Co-Occurrence Knowledge}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22110-22119} }
EEdit : Rethinking the Spatial and Temporal Redundancy for Efficient Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_ICCV, author = {Yan, Zexuan and Ma, Yue and Zou, Chang and Chen, Wenteng and Chen, Qifeng and Zhang, Linfeng}, title = {EEdit : Rethinking the Spatial and Temporal Redundancy for Efficient Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17474-17484} }
Enhancing Adversarial Transferability by Balancing Exploration and Exploitation with Gradient-Guided Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Niu_2025_ICCV, author = {Niu, Zenghao and Xie, Weicheng and Song, Siyang and Yu, Zitong and Liu, Feng and Shen, Linlin}, title = {Enhancing Adversarial Transferability by Balancing Exploration and Exploitation with Gradient-Guided Sampling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3885-3894} }
SuperEdit: Rectifying and Facilitating Supervision for Instruction-Based Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Ming and Gu, Xin and Chen, Fan and Xing, Xiaoying and Wen, Longyin and Chen, Chen and Zhu, Sijie}, title = {SuperEdit: Rectifying and Facilitating Supervision for Instruction-Based Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19206-19215} }
HPSv3: Towards Wide-Spectrum Human Preference Score-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Yuhang and Wu, Xiaoshi and Sun, Keqiang and Li, Hongsheng}, title = {HPSv3: Towards Wide-Spectrum Human Preference Score}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15086-15095} }
NullSwap: Proactive Identity Cloaking Against Deepfake Face Swapping-
[pdf]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Tianyi and Niu, Shuaicheng and Cheng, Harry and Zhang, Xiao and Wang, Yinglong}, title = {NullSwap: Proactive Identity Cloaking Against Deepfake Face Swapping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9945-9954} }
SynAD: Enhancing Real-World End-to-End Autonomous Driving Models through Synthetic Data Integration-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jongsuk and Lee, Jaeyoung and Han, Gyojin and Lee, Dong-Jae and Jeong, Minki and Kim, Junmo}, title = {SynAD: Enhancing Real-World End-to-End Autonomous Driving Models through Synthetic Data Integration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25197-25206} }
Purge-Gate: Backpropagation-Free Test-Time Adaptation for Point Clouds Classification via Token purging-
[pdf]
[supp]
[bibtex]@InProceedings{Yazdanpanah_2025_ICCV, author = {Yazdanpanah, Moslem and Bahri, Ali and Noori, Mehrdad and Dastani, Sahar and Hakim, Gustavo Adolfo Vargas and Osowiechi, David and Ben Ayed, Ismail and Desrosiers, Christian}, title = {Purge-Gate: Backpropagation-Free Test-Time Adaptation for Point Clouds Classification via Token purging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27640-27649} }
External Knowledge Injection for CLIP-Based Class-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Da-Wei and Li, Kai-Wen and Ning, Jingyi and Ye, Han-Jia and Zhang, Lijun and Zhan, De-Chuan}, title = {External Knowledge Injection for CLIP-Based Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3314-3325} }
GeoProg3D: Compositional Visual Reasoning for City-Scale 3D Language Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yasuki_2025_ICCV, author = {Yasuki, Shunsuke and Miyanishi, Taiki and Inoue, Nakamasa and Kurita, Shuhei and Sakamoto, Koya and Azuma, Daichi and Taki, Masato and Matsuo, Yutaka}, title = {GeoProg3D: Compositional Visual Reasoning for City-Scale 3D Language Fields}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9737-9748} }
Geometry Distributions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Biao and Ren, Jing and Wonka, Peter}, title = {Geometry Distributions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1495-1505} }
PVChat: Personalized Video Chat with One-Shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Yufei and Yan, Weilong and Xu, Gang and Li, Yumeng and Chen, Yucheng and Li, Zhenxi and Yu, Fei and Li, Ming and Yeo, Si Yong}, title = {PVChat: Personalized Video Chat with One-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23321-23331} }
VertexRegen: Mesh Generation with Continuous Level of Detail-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiang and Siddiqui, Yawar and Avetisyan, Armen and Xie, Chris and Engel, Jakob and Howard-Jenkins, Henry}, title = {VertexRegen: Mesh Generation with Continuous Level of Detail}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12570-12580} }
Low-Light Image Enhancement Using Event-Based Illumination Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Lei and Bao, Yuhan and Zhai, Jiajun and Liang, Jingyun and Zhang, Yulun and Wang, Kaiwei and Paudel, Danda Pani and Van Gool, Luc}, title = {Low-Light Image Enhancement Using Event-Based Illumination Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6667-6677} }
FontAnimate: High Quality Few-shot Font Generation via Animating Font Transfer Process-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2025_ICCV, author = {Fu, Bin and Wang, Zixuan and Yan, Kainan and Zhao, Shitian and Qin, Qi and Wen, Jie and He, Junjun and Gao, Peng}, title = {FontAnimate: High Quality Few-shot Font Generation via Animating Font Transfer Process}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16015-16025} }
A Good Teacher Adapts Their Knowledge for Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2025_ICCV, author = {Qian, Chengyao and Le, Trung and Harandi, Mehrtash}, title = {A Good Teacher Adapts Their Knowledge for Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1239-1248} }
GenFlowRL: Shaping Rewards with Generative Object-Centric Flow in Visual Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Kelin and Zhang, Sheng and Soora, Harshit and Huang, Furong and Huang, Heng and Tokekar, Pratap and Gao, Ruohan}, title = {GenFlowRL: Shaping Rewards with Generative Object-Centric Flow in Visual Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13183-13192} }
MAVFlow: Preserving Paralinguistic Elements with Conditional Flow Matching for Zero-Shot AV2AV Multilingual Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cho_2025_ICCV, author = {Cho, Sungwoo and Choi, Jeongsoo and Kim, Sungnyun and Yun, Se-Young}, title = {MAVFlow: Preserving Paralinguistic Elements with Conditional Flow Matching for Zero-Shot AV2AV Multilingual Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13151-13161} }
Video Color Grading via Look-Up Table Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shin_2025_ICCV, author = {Shin, Seunghyun and Shin, Dongmin and Shin, Jisu and Jeon, Hae-Gon and Lee, Joon-Young}, title = {Video Color Grading via Look-Up Table Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19141-19152} }
NeuraLeaf: Neural Parametric Leaf Models with Shape and Deformation Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Yang and Mao, Dongni and Santo, Hiroaki and Matsushita, Yasuyuki and Okura, Fumio}, title = {NeuraLeaf: Neural Parametric Leaf Models with Shape and Deformation Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28167-28176} }
DC-AR: Efficient Masked Autoregressive Image Generation with Deep Compression Hybrid Tokenizer-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Yecheng and Cai, Han and Chen, Junyu and Zhang, Zhuoyang and Xie, Enze and Yu, Jincheng and Chen, Junsong and Hu, Jinyi and Lu, Yao and Han, Song}, title = {DC-AR: Efficient Masked Autoregressive Image Generation with Deep Compression Hybrid Tokenizer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18034-18045} }
BVINet: Unlocking Blind Video Inpainting with Zero Annotations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Zhiliang and Chen, Kerui and Li, Kun and Fan, Hehe and Yang, Yi}, title = {BVINet: Unlocking Blind Video Inpainting with Zero Annotations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14017-14027} }
Semi-supervised Deep Transfer for Regression without Domain Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Biswas_2025_ICCV, author = {Biswas, Mainak and Dukkipati, Ambedkar and Sridharan, Devarajan}, title = {Semi-supervised Deep Transfer for Regression without Domain Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {827-836} }
Evading Data Provenance in Deep Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Hongyu and Liang, Sichu and Wang, Wenwen and Zhang, Zhuomeng and Li, Fangqi and Wang, Shi-Lin}, title = {Evading Data Provenance in Deep Neural Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1249-1260} }
Vamba: Understanding Hour-Long Videos with Hybrid Mamba-Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_ICCV, author = {Ren, Weiming and Ma, Wentao and Yang, Huan and Wei, Cong and Zhang, Ge and Chen, Wenhu}, title = {Vamba: Understanding Hour-Long Videos with Hybrid Mamba-Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21197-21208} }
Manual-PA: Learning 3D Part Assembly from Instruction Diagrams-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jiahao and Cherian, Anoop and Rodriguez, Cristian and Deng, Weijian and Gould, Stephen}, title = {Manual-PA: Learning 3D Part Assembly from Instruction Diagrams}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6304-6314} }
RMultiplex200K: Toward Reliable Multimodal Process Supervision for Visual Language Models on Telecommunications-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Sijia and Song, Bin}, title = {RMultiplex200K: Toward Reliable Multimodal Process Supervision for Visual Language Models on Telecommunications}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1686-1696} }
Video-T1: Test-time Scaling for Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Fangfu and Wang, Hanyang and Cai, Yimo and Zhang, Kaiyan and Zhan, Xiaohang and Duan, Yueqi}, title = {Video-T1: Test-time Scaling for Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18671-18681} }
MemoryTalker: Personalized Speech-Driven 3D Facial Animation via Audio-Guided Stylization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Hyung Kyu and Lee, Sangmin and Kim, Hak Gu}, title = {MemoryTalker: Personalized Speech-Driven 3D Facial Animation via Audio-Guided Stylization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11241-11251} }
Text2Outfit: Controllable Outfit Generation with Multimodal Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhai_2025_ICCV, author = {Zhai, Yuanhao and Lin, Yen-Liang and Peng, Minxu and Davis, Larry S. and Chandramouli, Ashwin and Yuan, Junsong and Doermann, David}, title = {Text2Outfit: Controllable Outfit Generation with Multimodal Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16165-16174} }
From Trial to Triumph: Advancing Long Video Understanding via Visual Context Sample Scaling and Self-reward Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Suo_2025_ICCV, author = {Suo, Yucheng and Ma, Fan and Zhu, Linchao and Wang, Tianyi and Rao, Fengyun and Yang, Yi}, title = {From Trial to Triumph: Advancing Long Video Understanding via Visual Context Sample Scaling and Self-reward Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23243-23255} }
CA2C: A Prior-Knowledge-Free Approach for Robust Label Noise Learning via Asymmetric Co-learning and Co-training-
[pdf]
[bibtex]@InProceedings{Sheng_2025_ICCV, author = {Sheng, Mengmeng and Sun, Zeren and Zhou, Tianfei and Shu, Xiangbo and Pan, Jinshan and Yao, Yazhou}, title = {CA2C: A Prior-Knowledge-Free Approach for Robust Label Noise Learning via Asymmetric Co-learning and Co-training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {901-911} }
HairCUP: Hair Compositional Universal Prior for 3D Gaussian Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Byungjun and Saito, Shunsuke and Nam, Giljoo and Simon, Tomas and Saragih, Jason and Joo, Hanbyul and Li, Junxuan}, title = {HairCUP: Hair Compositional Universal Prior for 3D Gaussian Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9966-9976} }
Prototype-based Contrastive Learning with Stage-wise Progressive Augmentation for Self-Supervised Fine-Grained Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2025_ICCV, author = {Tan, Baofeng and Wei, Xiu-Shen and Zhao, Lin}, title = {Prototype-based Contrastive Learning with Stage-wise Progressive Augmentation for Self-Supervised Fine-Grained Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4125-4134} }
4DSegStreamer: Streaming 4D Panoptic Segmentation via Dual Threads-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Ling and Tian, Jun and Yi, Li}, title = {4DSegStreamer: Streaming 4D Panoptic Segmentation via Dual Threads}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7089-7098} }
GS-LIVM: Real-Time Photo-Realistic LiDAR-Inertial-Visual Mapping with Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Yusen and Huang, Zhenmin and Wu, Jin and Ma, Jun}, title = {GS-LIVM: Real-Time Photo-Realistic LiDAR-Inertial-Visual Mapping with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26869-26878} }
Temporal Unlearnable Examples: Preventing Personal Video Data from Unauthorized Exploitation by Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Qiangqiang and Yu, Yi and Kong, Chenqi and Liu, Ziquan and Wan, Jia and Li, Haoliang and Kot, Alex C. and Chan, Antoni B.}, title = {Temporal Unlearnable Examples: Preventing Personal Video Data from Unauthorized Exploitation by Object Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11110-11121} }
Self-Calibrating Gaussian Splatting for Large Field-of-View Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2025_ICCV, author = {Deng, Youming and Xian, Wenqi and Yang, Guandao and Guibas, Leonidas and Wetzstein, Gordon and Marschner, Steve and Debevec, Paul}, title = {Self-Calibrating Gaussian Splatting for Large Field-of-View Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25124-25133} }
TWIST & SCOUT: Grounding Multimodal LLM-Experts by Forget-Free Tuning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Bhowmik_2025_ICCV, author = {Bhowmik, Aritra and Derakhshani, Mohammad Mahdi and Koelma, Dennis and Asano, Yuki M. and Oswald, Martin R. and Snoek, Cees G. M.}, title = {TWIST \& SCOUT: Grounding Multimodal LLM-Experts by Forget-Free Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1359-1368} }
Automated Model Evaluation for Object Detection via Prediction Consistency and Reliability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yoo_2025_ICCV, author = {Yoo, Seungju and Kwon, Hyuk and Hwang, Joong-Won and Lee, Kibok}, title = {Automated Model Evaluation for Object Detection via Prediction Consistency and Reliability}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19764-19773} }
VSP: Diagnosing the Dual Challenges of Perception and Reasoning in Spatial Planning Tasks for MLLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Qiucheng and Zhao, Handong and Saxon, Michael and Bui, Trung and Wang, William Yang and Zhang, Yang and Chang, Shiyu}, title = {VSP: Diagnosing the Dual Challenges of Perception and Reasoning in Spatial Planning Tasks for MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2270-2280} }
Instance-Level Video Depth in Groups Beyond Occlusions-
[pdf]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Yuan and Zhou, Yang and Sun, Ziming and Xiang, Tianyi and Li, Guiqing and He, Shengfeng}, title = {Instance-Level Video Depth in Groups Beyond Occlusions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7581-7591} }
ODP-Bench: Benchmarking Out-of-Distribution Performance Prediction-
[pdf]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Han and Li, Kehan and Li, Dongbai and He, Yue and Zhang, Xingxuan and Cui, Peng}, title = {ODP-Bench: Benchmarking Out-of-Distribution Performance Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1846-1858} }
Stepping Out of Similar Semantic Space for Open-Vocabulary Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yong and Wu, Song-Li and Bai, Sule and Wang, Jiahao and Wang, Yitong and Tang, Yansong}, title = {Stepping Out of Similar Semantic Space for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22664-22674} }
Sibai: A Few-Shot Meta-Classifier for Poisoning Detection in Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Gotz_2025_ICCV, author = {G\"otz, Melanie and Krau{\ss}, Torsten and Dmitrienko, Alexandra}, title = {Sibai: A Few-Shot Meta-Classifier for Poisoning Detection in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3787-3797} }
StyleKeeper: Prevent Content Leakage using Negative Visual Query Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_ICCV, author = {Jeong, Jaeseok and Kim, Junho and Lee, Gayoung and Choi, Yunjey and Uh, Youngjung}, title = {StyleKeeper: Prevent Content Leakage using Negative Visual Query Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15760-15769} }
Generative Modeling of Shape-Dependent Self-Contact Human Poses-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ohkawa_2025_ICCV, author = {Ohkawa, Takehiko and Lee, Jihyun and Saito, Shunsuke and Saragih, Jason and Prada, Fabian and Xu, Yichen and Yu, Shoou-I and Furuta, Ryosuke and Sato, Yoichi and Shiratori, Takaaki}, title = {Generative Modeling of Shape-Dependent Self-Contact Human Poses}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5426-5436} }
Frequency-Guided Diffusion for Training-Free Text-Driven Image Translation-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Zheng and Song, Jifei and Zhang, Zhensong and Deng, Jiankang and Patras, Ioannis}, title = {Frequency-Guided Diffusion for Training-Free Text-Driven Image Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19195-19205} }
DictAS: A Framework for Class-Generalizable Few-Shot Anomaly Segmentation via Dictionary Lookup-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_ICCV, author = {Qu, Zhen and Tao, Xian and Gong, Xinyi and Qu, ShiChen and Zhang, Xiaopei and Wang, Xingang and Shen, Fei and Zhang, Zhengtao and Prasad, Mukesh and Ding, Guiguang}, title = {DictAS: A Framework for Class-Generalizable Few-Shot Anomaly Segmentation via Dictionary Lookup}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20519-20528} }
DAP-MAE: Domain-Adaptive Point Cloud Masked Autoencoder for Effective Cross-Domain Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Ziqi and Li, Qiufu and Shen, Linlin}, title = {DAP-MAE: Domain-Adaptive Point Cloud Masked Autoencoder for Effective Cross-Domain Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3488-3498} }
6DOPE-GS: Online 6D Object Pose Estimation using Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2025_ICCV, author = {Jin, Yufeng and Prasad, Vignesh and Jauhri, Snehal and Franzius, Mathias and Chalvatzaki, Georgia}, title = {6DOPE-GS: Online 6D Object Pose Estimation using Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8032-8043} }
Incremental Few-Shot Semantic Segmentation via Multi-Level Switchable Visual Prompts-
[pdf]
[supp]
[bibtex]@InProceedings{Wan_2025_ICCV, author = {Wan, Maoxian and Li, Kaige and Geng, Qichuan and Shi, Weimin and Zhou, Zhong}, title = {Incremental Few-Shot Semantic Segmentation via Multi-Level Switchable Visual Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24113-24122} }
Faster and Better 3D Splatting via Group Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Chengbo and Ma, Guozheng and Xue, Yifei and Lao, Yizhen}, title = {Faster and Better 3D Splatting via Group Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27968-27977} }
PLMP - Point-Line Minimal Problems for Projective SfM-
[pdf]
[supp]
[bibtex]@InProceedings{Kiehn_2025_ICCV, author = {Kiehn, Kim and Ahlb\"ack, Albin and Kohn, Kathl\'en}, title = {PLMP - Point-Line Minimal Problems for Projective SfM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8558-8567} }
Flow-MIL: Constructing Highly-expressive Latent Feature Space For Whole Slide Image Classification Using Normalizing Flow-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Yingfan and An, Bohan and Shen, Ao and Yuan, Mingzhi and Duan, Minghong and Wang, Manning}, title = {Flow-MIL: Constructing Highly-expressive Latent Feature Space For Whole Slide Image Classification Using Normalizing Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23561-23570} }
3DRealCar: An In-the-wild RGB-D Car Dataset with 360-degree Views-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Xiaobiao and Wang, Yida and Sun, Haiyang and Wu, Zhuojie and Sheng, Hongwei and Wang, Shuyun and Ying, Jiaying and Lu, Ming and Zhu, Tianqing and Zhan, Kun and Yu, Xin}, title = {3DRealCar: An In-the-wild RGB-D Car Dataset with 360-degree Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26488-26498} }
Enhancing Transferability of Targeted Adversarial Examples via Inverse Target Gradient Competition and Spatial Distance Stretching-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhankai and Wang, Weiping and Li, Jie and Zhang, Shigeng and Hu, Yunan and Guo, Song}, title = {Enhancing Transferability of Targeted Adversarial Examples via Inverse Target Gradient Competition and Spatial Distance Stretching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3716-3725} }
MotionShot: Adaptive Motion Transfer across Arbitrary Objects for Text-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yanchen and Sun, Yanan and Xing, Zhening and Gao, Junyao and Chen, Kai and Pei, Wenjie}, title = {MotionShot: Adaptive Motion Transfer across Arbitrary Objects for Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11861-11871} }
STIV: Scalable Text and Image Conditioned Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Zongyu and Liu, Wei and Chen, Chen and Lu, Jiasen and Hu, Wenze and Fu, Tsu-Jui and Allardice, Jesse and Lai, Zhengfeng and Song, Liangchen and Zhang, Bowen and Chen, Cha and Fei, Yiran and Li, Lezhi and Yang, Yinfei and Sun, Yizhou and Chang, Kai-Wei}, title = {STIV: Scalable Text and Image Conditioned Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16249-16259} }
Repurposing 2D Diffusion Models with Gaussian Atlas for 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2025_ICCV, author = {Xiang, Tiange and Li, Kai and Long, Chengjiang and H\"ane, Christian and Guo, Peihong and Delp, Scott and Adeli, Ehsan and Fei-Fei, Li}, title = {Repurposing 2D Diffusion Models with Gaussian Atlas for 3D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16492-16502} }
Generating, Fast and Slow: Scalable Parallel Video Generation with Video Interface Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dedhia_2025_ICCV, author = {Dedhia, Bhishma and Bourgin, David and Singh, Krishna Kumar and Li, Yuheng and Kang, Yan and Xu, Zhan and Jha, Niraj K. and Liu, Yuchen}, title = {Generating, Fast and Slow: Scalable Parallel Video Generation with Video Interface Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15385-15394} }
The Devil is in the Spurious Correlations: Boosting Moment Retrieval with Dynamic Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Xinyang and Wei, Fanyue and Duan, Lixin and Yao, Angela and Li, Wen}, title = {The Devil is in the Spurious Correlations: Boosting Moment Retrieval with Dynamic Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20981-20990} }
Guiding Noisy Label Conditional Diffusion Models with Score-based Discriminator Correction-
[pdf]
[supp]
[bibtex]@InProceedings{Cong_2025_ICCV, author = {Cong, Dat Nguyen and Bao, Hieu Tran and Hoang-Thanh, Tung}, title = {Guiding Noisy Label Conditional Diffusion Models with Score-based Discriminator Correction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18531-18541} }
A Conditional Probability Framework for Compositional Zero-shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Peng and Lai, Qiuxia and Fang, Hao and Xie, Guo-Sen and Yin, Yilong and Lu, Xiankai and Wang, Wenguan}, title = {A Conditional Probability Framework for Compositional Zero-shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3673-3683} }
IAP: Invisible Adversarial Patch Attack through Perceptibility-Aware Localization and Perturbation Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dutta_2025_ICCV, author = {Dutta, Subrat Kishore and Zhang, Xiao}, title = {IAP: Invisible Adversarial Patch Attack through Perceptibility-Aware Localization and Perturbation Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14766-14775} }
Advancing Textual Prompt Learning with Anchored Attributes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zheng and Song, Yibing and Cheng, Ming-Ming and Li, Xiang and Yang, Jian}, title = {Advancing Textual Prompt Learning with Anchored Attributes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3618-3627} }
Event-based Visual Vibrometry-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Xinyu and Duan, Peiqi and Xiaokaiti, Yeliduosi and Xu, Chao and Shi, Boxin}, title = {Event-based Visual Vibrometry}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24666-24676} }
Keep Your Friends Close, and Your Enemies Farther: Distance-aware Voxel-wise Contrastive Learning for Semi-supervised Multi-organ Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Haochen and Niu, Jianwei and Liu, Xuefeng and Xie, Xiaozheng and Kuang, Li and Yang, Haotian and Dai, Bin and Meng, Hui and Wang, Yong}, title = {Keep Your Friends Close, and Your Enemies Farther: Distance-aware Voxel-wise Contrastive Learning for Semi-supervised Multi-organ Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21832-21842} }
Online Reasoning Video Segmentation with Just-in-Time Digital Twins-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shen_2025_ICCV, author = {Shen, Yiqing and Liu, Bohan and Li, Chenjia and Seenivasan, Lalithkumar and Unberath, Mathias}, title = {Online Reasoning Video Segmentation with Just-in-Time Digital Twins}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24698-24706} }
Robust Multi-View Learning via Representation Fusion of Sample-Level Attention and Alignment of Simulated Perturbation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Jie and Zhao, Na and Niu, Gang and Sugiyama, Masashi and Zhu, Xiaofeng}, title = {Robust Multi-View Learning via Representation Fusion of Sample-Level Attention and Alignment of Simulated Perturbation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4232-4241} }
InterSyn: Interleaved Learning for Dynamic Motion Synthesis in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Yiyi and Liang, Yuanzhi and Li, Xiu and Zhang, Chi and Li, Xuelong}, title = {InterSyn: Interleaved Learning for Dynamic Motion Synthesis in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12832-12841} }
SegmentDreamer: Towards High-fidelity Text-to-3D Synthesis with Segmented Consistency Trajectory Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jiahao and Chen, Zixuan and Wang, Guangcong and Xie, Xiaohua and Zhou, Yi}, title = {SegmentDreamer: Towards High-fidelity Text-to-3D Synthesis with Segmented Consistency Trajectory Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15864-15874} }
InterGSEdit: Interactive 3D Gaussian Splatting Editing with 3D Geometry-Consistent Attention Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2025_ICCV, author = {Wen, Minghao and Wu, Shengjie and Wang, Kangkan and Liang, Dong}, title = {InterGSEdit: Interactive 3D Gaussian Splatting Editing with 3D Geometry-Consistent Attention Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26136-26145} }
Fine-Grained Evaluation of Large Vision-Language Models in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yue and Tian, Meng and Lin, Zhenyu and Zhu, Jiangtong and Zhu, Dechang and Liu, Haiqiang and Zhang, Yueyi and Xiong, Zhiwei and Zhao, Xinhai}, title = {Fine-Grained Evaluation of Large Vision-Language Models in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9431-9442} }
MaTe: Images Are All You Need for Material Transfer via Diffusion Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Nisha and Liu, Henglin and Lin, Yizhou and Huang, Kaer and Chen, Chubin and Guo, Jie and Lee, Tong-yee and Li, Xiu}, title = {MaTe: Images Are All You Need for Material Transfer via Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15117-15126} }
Ph-GAN: Physics-Inspired GAN for Generating SAR Images Under Limited Data-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xidan and Zhuang, Yihan and Guo, Qian and Yang, Haodong and Qian, Xuelin and Cheng, Gong and Han, Junwei and Huang, Zhongling}, title = {Ph-GAN: Physics-Inspired GAN for Generating SAR Images Under Limited Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29075-29085} }
Interpretable point cloud classification using multiple instance learning-
[pdf]
[supp]
[bibtex]@InProceedings{De_Vries_2025_ICCV, author = {De Vries, Matt and Naidoo, Reed and Fourkioti, Olga and Dent, Lucas G. and Curry, Nathan and Dunsby, Chris and Bakal, Chris}, title = {Interpretable point cloud classification using multiple instance learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22209-22220} }
Efficient Fine-Tuning of Large Models via Nested Low-Rank Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Lujun and Lin, Cheng and Li, Dezhi and Huang, You-Liang and Li, Wei and Wu, Tianyu and Zou, Jie and Xue, Wei and Han, Sirui and Guo, Yike}, title = {Efficient Fine-Tuning of Large Models via Nested Low-Rank Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22252-22262} }
CoSMIC: Continual Self-supervised Learning for Multi-Domain Medical Imaging via Conditional Mutual Information Maximization-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yihang and Wen, Ying and Yang, Longzhen and He, Lianghua and Shen, Heng Tao}, title = {CoSMIC: Continual Self-supervised Learning for Multi-Domain Medical Imaging via Conditional Mutual Information Maximization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23051-23062} }
STAR: Spatial-Temporal Augmentation with Text-to-Video Models for Real-World Video Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Rui and Liu, Yinhong and Zhou, Penghao and Zhao, Chen and Zhou, Jun and Zhang, Kai and Zhang, Zhenyu and Yang, Jian and Yang, Zhenheng and Tai, Ying}, title = {STAR: Spatial-Temporal Augmentation with Text-to-Video Models for Real-World Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17108-17118} }
SCAN: Bootstrapping Contrastive Pre-training for Data Efficiency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Yangyang and Kankanhalli, Mohan}, title = {SCAN: Bootstrapping Contrastive Pre-training for Data Efficiency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3662-3672} }
Learnable Retrieval Enhanced Visual-Text Alignment and Fusion for Radiology Report Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Qin and Liang, Guoyan and Li, Xindi and Chen, Jingyuan and Wang, Zhe and Yao, Chang and Wu, Sai}, title = {Learnable Retrieval Enhanced Visual-Text Alignment and Fusion for Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22529-22538} }
Revisiting Adversarial Patch Defenses on Object Detectors: Unified Evaluation, Large-Scale Dataset, and New Insights-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Junhao and Sun, Jiahao and Lin, Chenhao and Zhao, Zhengyu and Ma, Chen and Zhang, Chong and Wang, Cong and Wang, Qian and Shen, Chao}, title = {Revisiting Adversarial Patch Defenses on Object Detectors: Unified Evaluation, Large-Scale Dataset, and New Insights}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23476-23486} }
V.I.P. : Iterative Online Preference Distillation for Efficient Video Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jisoo and Seo, Wooseok and Kim, Junwan and Park, Seungho and Park, Sooyeon and Yu, Youngjae}, title = {V.I.P. : Iterative Online Preference Distillation for Efficient Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17235-17245} }
Diffusion-based Source-biased Model for Single Domain Generalized Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Han and Yang, Wenfei and Zhang, Tianzhu and Zhang, Yongdong}, title = {Diffusion-based Source-biased Model for Single Domain Generalized Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1548-1557} }
Domain Generalizable Portrait Style Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Xinbo and Xu, Wenju and Zhang, Qing and Zheng, Wei-Shi}, title = {Domain Generalizable Portrait Style Transfer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15802-15811} }
4D-Bench: Benchmarking Multi-modal Large Language Models for 4D Object Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Wenxuan and Li, Bing and Zheng, Cheng and Mai, Jinjie and Chen, Jun and Jiang, Letian and Hamdi, Abdullah and Martinez, Sara Rojas and Lin, Chia-Wen and Elhoseiny, Mohamed and Ghanem, Bernard}, title = {4D-Bench: Benchmarking Multi-modal Large Language Models for 4D Object Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21129-21143} }
How Do Optical Flow and Textual Prompts Collaborate to Assist in Audio-Visual Semantic Segmentation?-
[pdf]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Yujian and Gao, Peng and Xu, Yongqi and Fan, Wentao}, title = {How Do Optical Flow and Textual Prompts Collaborate to Assist in Audio-Visual Semantic Segmentation?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23342-23352} }
Devil is in the Uniformity: Exploring Diverse Learners within Transformer for Image Restoration-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Shihao and Li, Dayu and Pan, Jinshan and Zhou, Juncheng and Shi, Jinglei and Yang, Jufeng}, title = {Devil is in the Uniformity: Exploring Diverse Learners within Transformer for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12307-12317} }
Epona: Autoregressive Diffusion World Model for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Kaiwen and Tang, Zhenyu and Hu, Xiaotao and Pan, Xingang and Guo, Xiaoyang and Liu, Yuan and Huang, Jingwei and Yuan, Li and Zhang, Qian and Long, Xiao-Xiao and Cao, Xun and Yin, Wei}, title = {Epona: Autoregressive Diffusion World Model for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27220-27230} }
Diffusion-based 3D Hand Motion Recovery with Intuitive Physics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yufei and Cui, Zijun and Kephart, Jeffrey O. and Ji, Qiang}, title = {Diffusion-based 3D Hand Motion Recovery with Intuitive Physics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7306-7317} }
Visual Surface Wave Elastography: Revealing Subsurface Physical Properties via Visible Surface Waves-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ogren_2025_ICCV, author = {Ogren, Alexander C. and Feng, Berthy T. and Ahn, Jihoon and Bouman, Katherine L. and Daraio, Chiara}, title = {Visual Surface Wave Elastography: Revealing Subsurface Physical Properties via Visible Surface Waves}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26446-26455} }
Leveraging Spatial Invariance to Boost Adversarial Transferability-
[pdf]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zihan and Li, Li and Ren, Yanli and Qin, Chuan and Feng, Guorui}, title = {Leveraging Spatial Invariance to Boost Adversarial Transferability}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1423-1432} }
MoSiC: Optimal-Transport Motion Trajectory for Dense Self-Supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Salehi_2025_ICCV, author = {Salehi, Mohammadreza and Venkataramanan, Shashanka and Simion, Ioana and Gavves, Efstratios and Snoek, Cees G. M. and Asano, Yuki M}, title = {MoSiC: Optimal-Transport Motion Trajectory for Dense Self-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6541-6551} }
GDKVM: Echocardiography Video Segmentation via Spatiotemporal Key-Value Memory with Gated Delta Rule-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Rui and Sun, Yimu and Guo, Jingxing and Wu, Huisi and Qin, Jing}, title = {GDKVM: Echocardiography Video Segmentation via Spatiotemporal Key-Value Memory with Gated Delta Rule}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12191-12200} }
ViCTr: Vital Consistency Transfer for Pathology Aware Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Susladkar_2025_ICCV, author = {Susladkar, Onkar and Deshmukh, Gayatri and Tur, Yalcin and Durak, Gorkem and Bagci, Ulas}, title = {ViCTr: Vital Consistency Transfer for Pathology Aware Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22772-22782} }
DanceEditor: Towards Iterative Editable Music-driven Dance Generation with Open-Vocabulary Descriptions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hengyuan and Li, Zhe and Qi, Xingqun and Li, Mengze and Sun, Muyi and Wang, Siye and Zhang, Man and Han, Sirui}, title = {DanceEditor: Towards Iterative Editable Music-driven Dance Generation with Open-Vocabulary Descriptions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12158-12168} }
When Pixel Difference Patterns Meet ViT: PiDiViT for Few-Shot Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Hongliang and Liu, Yongxiang and Mo, Canyu and Li, Weijie and Peng, Bowen and Liu, Li}, title = {When Pixel Difference Patterns Meet ViT: PiDiViT for Few-Shot Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24309-24318} }
AnimalClue: Recognizing Animals by their Traces-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shinoda_2025_ICCV, author = {Shinoda, Risa and Inoue, Nakamasa and Laina, Iro and Rupprecht, Christian and Kataoka, Hirokatsu}, title = {AnimalClue: Recognizing Animals by their Traces}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14776-14786} }
From Objects to Events: Unlocking Complex Visual Understanding in Object Detectors via LLM-guided Symbolic Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Yuhui and Wu, Haoxiang and Nie, Wenjie and Chen, Guangyao and Zheng, Xiawu and Shen, Yunhang and Peng, Jun and Tian, Yonghong and Ji, Rongrong}, title = {From Objects to Events: Unlocking Complex Visual Understanding in Object Detectors via LLM-guided Symbolic Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24380-24391} }
Pruning All-Rounder: Rethinking and Improving Inference Efficiency for Large Vision Language Models-
[pdf]
[bibtex]@InProceedings{Suo_2025_ICCV, author = {Suo, Wei and Ma, Ji and Sun, Mengyang and Wu, Lin Yuanbo and Wang, Peng and Zhang, Yanning}, title = {Pruning All-Rounder: Rethinking and Improving Inference Efficiency for Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20247-20256} }
Discontinuity-aware Normal Integration for Generic Central Camera Models-
[pdf]
[supp]
[bibtex]@InProceedings{Milano_2025_ICCV, author = {Milano, Francesco and L\'opez-Antequera, Manuel and Dhingra, Naina and Siegwart, Roland and Thiel, Robert}, title = {Discontinuity-aware Normal Integration for Generic Central Camera Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26026-26034} }
LazyMAR: Accelerating Masked Autoregressive Models via Feature Caching-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yan_2025_ICCV, author = {Yan, Feihong and Wei, Qingyan and Tang, Jiayi and Li, Jiajun and Wang, Yulin and Hu, Xuming and Li, Huiqi and Zhang, Linfeng}, title = {LazyMAR: Accelerating Masked Autoregressive Models via Feature Caching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15552-15561} }
OmniDiff: A Comprehensive Benchmark for Fine-grained Image Difference Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yuan and Hou, Saihui and Hou, Saijie and Du, Jiabao and Meng, Shibei and Huang, Yongzhen}, title = {OmniDiff: A Comprehensive Benchmark for Fine-grained Image Difference Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21440-21449} }
MAESTRO: Task-Relevant Optimization via Adaptive Feature Enhancement and Suppression for Multi-task 3D Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_ICCV, author = {Kang, Changwon and Kim, Jisong and Shin, Hongjae and Park, Junseo and Choi, Jun Won}, title = {MAESTRO: Task-Relevant Optimization via Adaptive Feature Enhancement and Suppression for Multi-task 3D Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28313-28323} }
MM-Spatial: Exploring 3D Spatial Understanding in Multimodal LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Daxberger_2025_ICCV, author = {Daxberger, Erik and Wenzel, Nina and Griffiths, David and Gang, Haiming and Lazarow, Justin and Kohavi, Gefen and Kang, Kai and Eichner, Marcin and Yang, Yinfei and Dehghan, Afshin and Grasch, Peter}, title = {MM-Spatial: Exploring 3D Spatial Understanding in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7395-7408} }
CasP: Improving Semi-Dense Feature Matching Pipeline Leveraging Cascaded Correspondence Priors for Guidance-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Peiqi and Yu, Lei and Wan, Yi and Pei, Yingying and Liu, Xinyi and Yao, Yongxiang and Zhang, Yingying and Ru, Lixiang and Zhong, Liheng and Chen, Jingdong and Yang, Ming and Zhang, Yongjun}, title = {CasP: Improving Semi-Dense Feature Matching Pipeline Leveraging Cascaded Correspondence Priors for Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28063-28072} }
Multimodal Latent Diffusion Model for Complex Sewing Pattern Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Shengqi and Cheng, Yuhao and Chen, Zhuo and Ren, Xingyu and Zhu, Wenhan and Li, Lincheng and Bi, Mengxiao and Yang, Xiaokang and Yan, Yichao}, title = {Multimodal Latent Diffusion Model for Complex Sewing Pattern Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17640-17650} }
CObL: Toward Zero-Shot Ordinal Layering without User Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Damaraju_2025_ICCV, author = {Damaraju, Aneel and Hazineh, Dean and Zickler, Todd}, title = {CObL: Toward Zero-Shot Ordinal Layering without User Prompting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8154-8164} }
Rethinking the Upsampling Process in Light Field Super-Resolution with Spatial-Epipolar Implicit Image Function-
[pdf]
[supp]
[bibtex]@InProceedings{Cong_2025_ICCV, author = {Cong, Ruixuan and Wang, Yu and Zhao, Mingyuan and Yang, Da and Chen, Rongshan and Sheng, Hao}, title = {Rethinking the Upsampling Process in Light Field Super-Resolution with Spatial-Epipolar Implicit Image Function}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7559-7569} }
GGTalker: Talking Head Systhesis with Generalizable Gaussian Priors and Identity-Specific Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Wentao and Li, Shunkai and Peng, Ziqiao and Zhang, Haoxian and Shi, Fan and Liu, Xiaoqiang and Wan, Pengfei and Zhang, Di and Tian, Hui}, title = {GGTalker: Talking Head Systhesis with Generalizable Gaussian Priors and Identity-Specific Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10108-10117} }
SAC-GNC: SAmple Consensus for adaptive Graduated Non-Convexity-
[pdf]
[supp]
[bibtex]@InProceedings{Piedade_2025_ICCV, author = {Piedade, Valter and Sidhartha, Chitturi and Gaspar, Jos\'e and Govindu, Venu Madhav and Miraldo, Pedro}, title = {SAC-GNC: SAmple Consensus for adaptive Graduated Non-Convexity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5780-5790} }
VGGSounder: Audio-Visual Evaluations for Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zverev_2025_ICCV, author = {Zverev, Daniil and Wiedemer, Thadd\"aus and Prabhu, Ameya and Bethge, Matthias and Brendel, Wieland and Koepke, A. Sophia}, title = {VGGSounder: Audio-Visual Evaluations for Foundation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1027-1037} }
ResQ: A Novel Framework to Implement Residual Neural Networks on Analog Rydberg Atom Quantum Computers-
[pdf]
[arXiv]
[bibtex]@InProceedings{DiBrita_2025_ICCV, author = {DiBrita, Nicholas S. and Han, Jason and Patel, Tirthak}, title = {ResQ: A Novel Framework to Implement Residual Neural Networks on Analog Rydberg Atom Quantum Computers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20085-20094} }
Make Me Happier: Evoking Emotions Through Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Qing and Zhang, Jingfeng and Ong, Yew-Soon and Zhang, Mengmi}, title = {Make Me Happier: Evoking Emotions Through Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16367-16376} }
SimpleVQA: Multimodal Factuality Evaluation for Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Xianfu and Zhang, Wei and Zhang, Shiwei and Yang, Jian and Guan, Xiangyuan and Wu, Xianjie and Li, Xiang and Zhang, Ge and Liu, Jiaheng and Mai, Yuying and Zeng, Yutao and Wen, Zhoufutu and Jin, Ke and Wang, Baorui and Zhou, Weixiao and Lu, Yunhong and Ji, Hangyuan and Li, Tongliang and Huang, Wenhao and Li, Zhoujun}, title = {SimpleVQA: Multimodal Factuality Evaluation for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4637-4646} }
DiTFastAttnV2: Head-wise Attention Compression for Multi-Modality Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hanling and Su, Rundong and Yuan, Zhihang and Chen, Pengtao and Shen, Mingzhu and Fan, Yibo and Yan, Shengen and Dai, Guohao and Wang, Yu}, title = {DiTFastAttnV2: Head-wise Attention Compression for Multi-Modality Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16399-16409} }
Find a Scapegoat: Poisoning Membership Inference Attack and Defense to Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mo_2025_ICCV, author = {Mo, Wenjin and Li, Zhiyuan and Fang, Minghong and Fang, Mingwei}, title = {Find a Scapegoat: Poisoning Membership Inference Attack and Defense to Federated Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3967-3976} }
Generalized Tensor-based Parameter-Efficient Fine-Tuning via Lie Group Transformations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Si_2025_ICCV, author = {Si, Chongjie and Shi, Zhiyi and Wang, Xuehui and Xiao, Yichen and Yang, Xiaokang and Shen, Wei}, title = {Generalized Tensor-based Parameter-Efficient Fine-Tuning via Lie Group Transformations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {197-207} }
GAS: Generative Avatar Synthesis from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Yixing and Dong, Junting and Kwon, Youngjoong and Zhao, Qin and Dai, Bo and De la Torre, Fernando}, title = {GAS: Generative Avatar Synthesis from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12883-12893} }
GenM3: Generative Pretrained Multi-path Motion Model for Text Conditional Human Motion Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Junyu and Liu, Lijiang and Sun, Yong and Zhang, Zhiyuan and Zhou, Jinni and Nie, Qiang}, title = {GenM3: Generative Pretrained Multi-path Motion Model for Text Conditional Human Motion Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13129-13139} }
Coordinate-based Speed of Sound Recovery for Aberration-Corrected Photoacoustic Computed Tomography-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Tianao and Cui, Manxiu and Ma, Cheng and Alexander, Emma}, title = {Coordinate-based Speed of Sound Recovery for Aberration-Corrected Photoacoustic Computed Tomography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27466-27475} }
Granular Concept Circuits: Toward a Fine-Grained Circuit Discovery for Concept Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2025_ICCV, author = {Kwon, Dahee and Lee, Sehyun and Choi, Jaesik}, title = {Granular Concept Circuits: Toward a Fine-Grained Circuit Discovery for Concept Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2356-2365} }
Diversity-Enhanced Distribution Alignment for Dataset Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Hongcheng and Zhou, Yucan and Gu, Xiaoyan and Li, Bo and Wang, Weiping}, title = {Diversity-Enhanced Distribution Alignment for Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3747-3756} }
OCR Hinders RAG: Evaluating the Cascading Impact of OCR on Retrieval-Augmented Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Junyuan and Zhang, Qintong and Wang, Bin and Ouyang, Linke and Wen, Zichen and Li, Ying and Chow, Ka-Ho and He, Conghui and Zhang, Wentao}, title = {OCR Hinders RAG: Evaluating the Cascading Impact of OCR on Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17443-17453} }
HarmonySeg: Tubular Structure Segmentation with Deep-Shallow Feature Fusion and Growth-Suppression Balanced Loss-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Yi and Zhang, Ke and Liu, Wei and Wang, Yuanyuan and Patel, Vishal M. and Lu, Le and Han, Xu and Jin, Dakai and Yan, Ke}, title = {HarmonySeg: Tubular Structure Segmentation with Deep-Shallow Feature Fusion and Growth-Suppression Balanced Loss}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23571-23581} }
Semi-supervised Concept Bottleneck Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Lijie and Huang, Tianhao and Xie, Huanyi and Gong, Xilin and Ren, Chenyang and Hu, Zhengyu and Yu, Lu and Ma, Ping and Wang, Di}, title = {Semi-supervised Concept Bottleneck Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2110-2119} }
Can Knowledge be Transferred from Unimodal to Multimodal? Investigating the Transitivity of Multimodal Knowledge Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_ICCV, author = {Fang, Lingyong and Wang, Xinzhong and Wang, Depeng and Wu, Zongru and Guo, Ya and Zhu, Huijia and Zhang, Zhuosheng and Liu, Gongshen}, title = {Can Knowledge be Transferred from Unimodal to Multimodal? Investigating the Transitivity of Multimodal Knowledge Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2482-2490} }
Beyond Training: Dynamic Token Merging for Zero-Shot Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yiming and Zhao, Zhuokai and Chen, Zhaorun and Ding, Zenghui and Yang, Xianjun and Sun, Yining}, title = {Beyond Training: Dynamic Token Merging for Zero-Shot Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22046-22055} }
CounterPC: Counterfactual Feature Realignment for Unsupervised Domain Adaptation on Point Clouds-
[pdf]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Feng and Cao, Yichao and Su, Xiu and Niu, Dan and Li, Xuanpeng}, title = {CounterPC: Counterfactual Feature Realignment for Unsupervised Domain Adaptation on Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24760-24769} }
MPG-SAM 2: Adapting SAM 2 with Mask Priors and Global Context for Referring Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rong_2025_ICCV, author = {Rong, Fu and Lan, Meng and Zhang, Qian and Zhang, Lefei}, title = {MPG-SAM 2: Adapting SAM 2 with Mask Priors and Global Context for Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23979-23989} }
FreeSplatter: Pose-free Gaussian Splatting for Sparse-view 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Jiale and Gao, Shenghua and Shan, Ying}, title = {FreeSplatter: Pose-free Gaussian Splatting for Sparse-view 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25442-25452} }
FreqPDE: Rethinking Positional Depth Embedding for Multi-View 3D Object Detection Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2025_ICCV, author = {Su, Haisheng and Zhang, Junjie and Song, Feixiang and Zhou, Sanping and Wu, Wei and Yan, Junchi and Zheng, Nanning}, title = {FreqPDE: Rethinking Positional Depth Embedding for Multi-View 3D Object Detection Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28145-28155} }
Hierarchy-Aware Pseudo Word Learning with Text Adaptation for Zero-Shot Composed Image Retrieval-
[pdf]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhe and Zhang, Lei and Fu, Zheren and Zhang, Kun and Mao, Zhendong}, title = {Hierarchy-Aware Pseudo Word Learning with Text Adaptation for Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24319-24329} }
WSI-LLaVA: A Multimodal Large Language Model for Whole Slide Image-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Yuci and Lyu, Xinheng and Chen, Wenting and Ding, Meidan and Zhang, Jipeng and He, Xiangjian and Wu, Song and Xing, Xiaohan and Yang, Sen and Wang, Xiyue and Shen, Linlin}, title = {WSI-LLaVA: A Multimodal Large Language Model for Whole Slide Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22718-22727} }
GLEAM: Learning Generalizable Exploration Policy for Active Mapping in Complex 3D Indoor Scene-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Xiao and Wang, Tai and Li, Quanyi and Huang, Tao and Pang, Jiangmiao and Xue, Tianfan}, title = {GLEAM: Learning Generalizable Exploration Policy for Active Mapping in Complex 3D Indoor Scene}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5558-5568} }
DialNav: Multi-turn Dialog Navigation with a Remote Guide-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Leekyeung and Min, Hyunji and Hwangbo, Gyeom and Choi, Jonghyun and Seo, Paul Hongsuck}, title = {DialNav: Multi-turn Dialog Navigation with a Remote Guide}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8514-8523} }
VLDrive: Vision-Augmented Lightweight MLLMs for Efficient Language-grounded Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Ruifei and Zhang, Wei and Tan, Xiao and Yang, Sibei and Wan, Xiang and Luo, Xiaonan and Li, Guanbin}, title = {VLDrive: Vision-Augmented Lightweight MLLMs for Efficient Language-grounded Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5923-5933} }
Ponimator: Unfolding Interactive Pose for Versatile Human-human Interaction Animation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Shaowei and Guo, Chuan and Zhou, Bing and Wang, Jian}, title = {Ponimator: Unfolding Interactive Pose for Versatile Human-human Interaction Animation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12068-12077} }
Streamlining Image Editing with Layered Diffusion Brushes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gholami_2025_ICCV, author = {Gholami, Peyman and Xiao, Robert}, title = {Streamlining Image Editing with Layered Diffusion Brushes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17368-17378} }
ObjectMate: A Recurrence Prior for Object Insertion and Subject-Driven Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Winter_2025_ICCV, author = {Winter, Daniel and Shul, Asaf and Cohen, Matan and Berman, Dana and Pritch, Yael and Rav-Acha, Alex and Hoshen, Yedid}, title = {ObjectMate: A Recurrence Prior for Object Insertion and Subject-Driven Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16281-16291} }
CycleVAR: Repurposing Autoregressive Model for Unsupervised One-Step Image Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yi and Li, Shengqian and Lin, Zuzeng and Wang, Feng and Liu, Si}, title = {CycleVAR: Repurposing Autoregressive Model for Unsupervised One-Step Image Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15194-15204} }
Synthetic Video Enhances Physical Fidelity in Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Qi and Ni, Xingyu and Wang, Ziyu and Cheng, Feng and Yang, Ziyan and Jiang, Lu and Wang, Bohan}, title = {Synthetic Video Enhances Physical Fidelity in Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12135-12146} }
Enhancing Partially Relevant Video Retrieval with Hyperbolic Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Jun and Wang, Jinpeng and Tan, Chaolei and Lian, Niu and Chen, Long and Wang, Yaowei and Zhang, Min and Xia, Shu-Tao and Chen, Bin}, title = {Enhancing Partially Relevant Video Retrieval with Hyperbolic Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23074-23084} }
Ultra High-Resolution Image Inpainting with Patch-Based Content Consistency Adapter-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jianhui and Cheng, Shen and Sun, Qirui and Liu, Jia and Luyang, Wang and Feng, Chaoyu and Fang, Chen and Lei, Lei and Wang, Jue and Liu, Shuaicheng}, title = {Ultra High-Resolution Image Inpainting with Patch-Based Content Consistency Adapter}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16991-17000} }
OURO: A Self-Bootstrapped Framework for Enhancing Multimodal Scene Understanding-
[pdf]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Tianrun and Chen, Guanyu and Li, Ye and Xi, Yuxin and Mu, Zeyu and Wang, Ruichen and Zhang, Tianren and Gao, Haichuan and Chen, Feng}, title = {OURO: A Self-Bootstrapped Framework for Enhancing Multimodal Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18240-18251} }
DiGA3D: Coarse-to-Fine Diffusional Propagation of Geometry and Appearance for Versatile 3D Inpainting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2025_ICCV, author = {Pan, Jingyi and Xu, Dan and Luo, Qiong}, title = {DiGA3D: Coarse-to-Fine Diffusional Propagation of Geometry and Appearance for Versatile 3D Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16345-16355} }
Text-to-Any-Skeleton Motion Generation Without Retargeting-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Qingyuan and Lv, Ke and Dong, Kun and Xue, Jian and Niu, Zehai and Wang, Jinbao}, title = {Text-to-Any-Skeleton Motion Generation Without Retargeting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12926-12936} }
EVDM: Event-based Real-world Video Deblurring with Mamba-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Zhijing and Xu, Senyan and Liu, Kean and Tian, Runze and Fu, Xueyang and Zha, Zheng-Jun}, title = {EVDM: Event-based Real-world Video Deblurring with Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13793-13803} }
Learning Yourself: Class-Incremental Semantic Segmentation with Language-Inspired Bootstrapped Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Ruitao and Zhao, Yifan and Li, Jia}, title = {Learning Yourself: Class-Incremental Semantic Segmentation with Language-Inspired Bootstrapped Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21623-21634} }
Towards Fine-grained Interactive Segmentation in Images and Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_ICCV, author = {Yao, Yuan and Yang, Qiushi and Cui, Miaomiao and Bo, Liefeng}, title = {Towards Fine-grained Interactive Segmentation in Images and Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22509-22518} }
FaceXFormer: A Unified Transformer for Facial Analysis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Narayan_2025_ICCV, author = {Narayan, Kartik and VS, Vibashan and Chellappa, Rama and Patel, Vishal M.}, title = {FaceXFormer: A Unified Transformer for Facial Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11369-11382} }
Puzzle Similarity: A Perceptually-guided Cross-Reference Metric for Artifact Detection in 3D Scene Reconstructions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hermann_2025_ICCV, author = {Hermann, Nicolai and Condor, Jorge and Didyk, Piotr}, title = {Puzzle Similarity: A Perceptually-guided Cross-Reference Metric for Artifact Detection in 3D Scene Reconstructions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28881-28891} }
InstantEdit: Text-Guided Few-Step Image Editing with Piecewise Rectified Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2025_ICCV, author = {Gong, Yiming and Zhu, Zhen and Zhang, Minjia}, title = {InstantEdit: Text-Guided Few-Step Image Editing with Piecewise Rectified Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16808-16817} }
The Source Image is the Best Attention for Infrared and Visible Image Fusion-
[pdf]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Song and Han, Xie and Kuang, Liqun and Wang, Boying and Chen, Zhongyu and Qiao, Zherui and Yang, Fan and Liu, Xiaoxia and Zhang, Bingyu and Wang, Zhixun}, title = {The Source Image is the Best Attention for Infrared and Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13513-13522} }
Princeton365: A Diverse Dataset with Accurate Camera Pose-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kayan_2025_ICCV, author = {Kayan, Karhan and Alexandropoulos, Stamatis and Jain, Rishabh and Zuo, Yiming and Liang, Erich and Deng, Jia}, title = {Princeton365: A Diverse Dataset with Accurate Camera Pose}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7645-7654} }
Towards Immersive Human-X Interaction: A Real-Time Framework for Physically Plausible Motion Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Ji_2025_ICCV, author = {Ji, Kaiyang and Shi, Ye and Jin, Zichen and Chen, Kangyi and Xu, Lan and Ma, Yuexin and Yu, Jingyi and Wang, Jingya}, title = {Towards Immersive Human-X Interaction: A Real-Time Framework for Physically Plausible Motion Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10173-10183} }
Generative Video Bi-flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Chen and Ritschel, Tobias}, title = {Generative Video Bi-flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19363-19372} }
AlignDiff: Learning Physically-Grounded Camera Alignment via Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Liuyue and Guo, Jiancong and Cakmakci, Ozan and Araujo, Andre and Jeni, L\'aszl\'o A. and Jia, Zhiheng}, title = {AlignDiff: Learning Physically-Grounded Camera Alignment via Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26901-26911} }
EFTViT: Efficient Federated Training of Vision Transformers with Masked Images on Resource-Constrained Clients-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Meihan and Chang, Tao and Miao, Cui and Zhou, Jie and Li, Chun and Xu, Xiangyu and Li, Ming and Wang, Xiaodong}, title = {EFTViT: Efficient Federated Training of Vision Transformers with Masked Images on Resource-Constrained Clients}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1815-1824} }
A Structure-aware and Motion-adaptive Framework for 3D Human Pose Estimation with Mamba-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Ye and Wang, Jie and Gao, Jianjun and Gong, Rui and Cai, Chen and Yap, Kim-Hui}, title = {A Structure-aware and Motion-adaptive Framework for 3D Human Pose Estimation with Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7958-7968} }
Toward Fair and Accurate Cross-Domain Medical Image Segmentation: A VLM-Driven Active Domain Adaptation Paradigm-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Hongqiu and Chen, Wu and Luo, Xiangde and Xing, Zhaohu and Liu, Lihao and Qin, Jing and Wu, Shaozhi and Zhu, Lei}, title = {Toward Fair and Accurate Cross-Domain Medical Image Segmentation: A VLM-Driven Active Domain Adaptation Paradigm}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24102-24112} }
Scaling Transformer-Based Novel View Synthesis with Models Token Disentanglement and Synthetic Data-
[pdf]
[supp]
[bibtex]@InProceedings{Nair_2025_ICCV, author = {Nair, Nithin Gopalakrishnan and Kaza, Srinivas and Luo, Xuan and Patel, Vishal M. and Lombardi, Stephen and Park, Jungyeon}, title = {Scaling Transformer-Based Novel View Synthesis with Models Token Disentanglement and Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28567-28576} }
FED-PsyAU: Privacy-Preserving Micro-Expression Recognition via Psychological AU Coordination and Dynamic Facial Motion Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Jingting and Qian, Yu and Zhao, Lin and Wang, Su-Jing}, title = {FED-PsyAU: Privacy-Preserving Micro-Expression Recognition via Psychological AU Coordination and Dynamic Facial Motion Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14453-14463} }
SAFER: Sharpness Aware layer-selective Finetuning for Enhanced Robustness in vision transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gopal_2025_ICCV, author = {Gopal, Bhavna and Yang, Huanrui and Horton, Mark and Chen, Yiran}, title = {SAFER: Sharpness Aware layer-selective Finetuning for Enhanced Robustness in vision transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3999-4008} }
ClearSight: Human Vision-Inspired Solutions for Event-Based Motion Deblurring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Xiaopeng and Huang, Yulong and Ren, Hongwei and Liu, Zunchang and Huang, Hongxiang and Zhou, Yue and Fu, Haotian and Cheng, Bojun}, title = {ClearSight: Human Vision-Inspired Solutions for Event-Based Motion Deblurring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7462-7471} }
Event-aided Dense and Continuous Point Tracking: Everywhere and Anytime-
[pdf]
[supp]
[bibtex]@InProceedings{Wan_2025_ICCV, author = {Wan, Zhexiong and Luo, Jianqin and Dai, Yuchao and Lee, Gim Hee}, title = {Event-aided Dense and Continuous Point Tracking: Everywhere and Anytime}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7936-7946} }
Stable-Sim2Real: Exploring Simulation of Real-Captured 3D Data with Two-Stage Depth Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Mutian and Ye, Chongjie and Liu, Haolin and Wu, Yushuang and Chang, Jiahao and Han, Xiaoguang}, title = {Stable-Sim2Real: Exploring Simulation of Real-Captured 3D Data with Two-Stage Depth Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2609-2619} }
Towards Real Unsupervised Anomaly Detection Via Confident Meta-Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Aqeel_2025_ICCV, author = {Aqeel, Muhammad and Sharifi, Shakiba and Cristani, Marco and Setti, Francesco}, title = {Towards Real Unsupervised Anomaly Detection Via Confident Meta-Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4858-4867} }
Multi-Schema Proximity Network for Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Jiangming and Yin, Xiangbo and Chen, Yeyun and Zhang, Yachao and Zhang, Zhizhong and Xie, Yuan and Qu, Yanyun}, title = {Multi-Schema Proximity Network for Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19999-20008} }
Unified Video Generation via Next-Set Prediction in Continuous Domain-
[pdf]
[bibtex]@InProceedings{Feng_2025_ICCV, author = {Feng, Zhanzhou and Guo, Qingpei and Xiao, Xinyu and Xu, Ruihan and Yang, Ming and Zhang, Shiliang}, title = {Unified Video Generation via Next-Set Prediction in Continuous Domain}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19427-19438} }
Latent-Reframe: Enabling Camera Control for Video Diffusion Models without Training-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zhenghong and An, Jie and Luo, Jiebo}, title = {Latent-Reframe: Enabling Camera Control for Video Diffusion Models without Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12779-12789} }
EditCLIP: Representation Learning for Image Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Qian and Cveji\'c, Aleksandar and Eldesokey, Abdelrahman and Wonka, Peter}, title = {EditCLIP: Representation Learning for Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15960-15970} }
Efficient Event Camera Data Pretraining with Adaptive Prompt Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Quanmin and Li, Qiang and Liu, Shuai and Cao, Xinzi and Lu, Jinyi and Yang, Feidiao and Zhang, Wei and Huang, Kai and Tian, Yonghong}, title = {Efficient Event Camera Data Pretraining with Adaptive Prompt Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8656-8667} }
Tiling artifacts and trade-offs of feature normalization in the segmentation of large biological images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Buglakova_2025_ICCV, author = {Buglakova, Elena and Archit, Anwai and D'Imprima, Edoardo and Mahamid, Julia and Pape, Constantin and Kreshuk, Anna}, title = {Tiling artifacts and trade-offs of feature normalization in the segmentation of large biological images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13109-13118} }
Harnessing Text-to-Image Diffusion Models for Point Cloud Self-Supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yiyang and Zhao, Shanshan and Duan, Lunhao and Ding, Changxing and Tao, Dacheng}, title = {Harnessing Text-to-Image Diffusion Models for Point Cloud Self-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26156-26166} }
Bridging the Gap between Brain and Machine in Interpreting Visual Semantics: Towards Self-adaptive Brain-to-Text Decoding-
[pdf]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Jiaxuan and Qi, Yu and Wang, Yueming and Pan, Gang}, title = {Bridging the Gap between Brain and Machine in Interpreting Visual Semantics: Towards Self-adaptive Brain-to-Text Decoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21938-21948} }
Doppler-Aware LiDAR-RADAR Fusion for Weather-Robust 3D Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Chae_2025_ICCV, author = {Chae, Yujeong and Park, Heejun and Kim, Hyeonseong and Yoon, Kuk-Jin}, title = {Doppler-Aware LiDAR-RADAR Fusion for Weather-Robust 3D Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27197-27208} }
RoboAnnotatorX: A Comprehensive and Universal Annotation Framework for Accurate Understanding of Long-horizon Robot Demonstration-
[pdf]
[supp]
[bibtex]@InProceedings{Kou_2025_ICCV, author = {Kou, Longxin and Ni, Fei and Zheng, Yan and Han, Peilong and Liu, Jinyi and Cui, Haiqin and Liu, Rui and Hao, Jianye}, title = {RoboAnnotatorX: A Comprehensive and Universal Annotation Framework for Accurate Understanding of Long-horizon Robot Demonstration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10353-10363} }
On-Device Diffusion Transformer Policy for Efficient Robot Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Yiming and Wang, Huan and Chen, Zhenghao and Pang, Jianxin and Xu, Dong}, title = {On-Device Diffusion Transformer Policy for Efficient Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14073-14083} }
Unsupervised Part Discovery via Descriptor-Based Masked Image Restoration with Optimized Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_ICCV, author = {Xia, Jiahao and Wu, Yike and Huang, Wenjian and Zhang, Jianguo and Zhang, Jian}, title = {Unsupervised Part Discovery via Descriptor-Based Masked Image Restoration with Optimized Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8668-8677} }
VideoOrion: Tokenizing Object Dynamics in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_ICCV, author = {Feng, Yicheng and Li, Yijiang and Zhang, Wanpeng and Zheng, Sipeng and Luo, Hao and Yue, Zihao and Lu, Zongqing}, title = {VideoOrion: Tokenizing Object Dynamics in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20401-20412} }
Training-Free Text-Guided Image Editing with Visual Autoregressive Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yufei and Guo, Lanqing and Li, Zhihao and Huang, Jiaxing and Wang, Pichao and Wen, Bihan and Wang, Jian}, title = {Training-Free Text-Guided Image Editing with Visual Autoregressive Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17577-17586} }
Cracking Instance Jigsaw Puzzles: An Alternative to Multiple Instance Learning for Whole Slide Image Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Xiwen and Qiu, Peijie and Zhu, Wenhui and Wang, Hao and Li, Huayu and Dong, Xuanzhao and Sun, Xiaotong and Yu, Xiaobing and Wang, Yalin and Razi, Abolfazl and Sotiras, Aristeidis}, title = {Cracking Instance Jigsaw Puzzles: An Alternative to Multiple Instance Learning for Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21353-21363} }
SPD: Shallow Backdoor Protecting Deep Backdoor Against Backdoor Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Yuan_2025_ICCV, author = {Yuan, Shunjie and Li, Xinghua and Cao, Xuelin and Zhang, Haiyan and Zhu, Mengyao and Deng, Robert H.}, title = {SPD: Shallow Backdoor Protecting Deep Backdoor Against Backdoor Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4029-4038} }
RealGeneral: Unifying Visual Generation via Temporal In-Context Learning with Video Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Yijing and Huang, Mengqi and Zhuang, Shuhan and Mao, Zhendong}, title = {RealGeneral: Unifying Visual Generation via Temporal In-Context Learning with Video Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14994-15004} }
Mamba-3VL: Taming State Space Model for 3D Vision Language Learning-
[pdf]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuan and Chen, Yuxin and Qi, Zhongang and Liu, Lijun and Jiao, Jile and Feng, Xuetao and Liang, Yujia and Shan, Ying and Zhang, Zhipeng}, title = {Mamba-3VL: Taming State Space Model for 3D Vision Language Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6273-6283} }
Fine-structure Preserved Real-world Image Super-resolution via Transfer VAE Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yi_2025_ICCV, author = {Yi, Qiaosi and Li, Shuai and Wu, Rongyuan and Sun, Lingchen and Wu, Yuhui and Zhang, Lei}, title = {Fine-structure Preserved Real-world Image Super-resolution via Transfer VAE Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12415-12426} }
FlowR: Flowing from Sparse to Dense 3D Reconstructions-
[pdf]
[supp]
[bibtex]@InProceedings{Fischer_2025_ICCV, author = {Fischer, Tobias and Bul\`o, Samuel Rota and Yang, Yung-Hsu and Keetha, Nikhil and Porzi, Lorenzo and M\"uller, Norman and Schwarz, Katja and Luiten, Jonathon and Pollefeys, Marc and Kontschieder, Peter}, title = {FlowR: Flowing from Sparse to Dense 3D Reconstructions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27702-27712} }
Augmented Mass-Spring Model for Real-Time Dense Hair Simulation-
[pdf]
[supp]
[bibtex]@InProceedings{H._2025_ICCV, author = {H., J. Alejandro Amador and Zhou, Yi and Sun, Xin and Shu, Zhixin and He, Chengan and Pirk, Soren and Michels, Dominik L.}, title = {Augmented Mass-Spring Model for Real-Time Dense Hair Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11339-11347} }
IQA-Adapter: Exploring Knowledge Transfer from Image Quality Assessment to Diffusion-based Generative Models-
[pdf]
[supp]
[bibtex]@InProceedings{Abud_2025_ICCV, author = {Abud, Khaled and Lavrushkin, Sergey and Kirillov, Alexey and Vatolin, Dmitriy}, title = {IQA-Adapter: Exploring Knowledge Transfer from Image Quality Assessment to Diffusion-based Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15469-15480} }
SPA: Efficient User-Preference Alignment against Uncertainty in Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jiayuan and Wu, Junde and Ouyang, Cheng and Kamnitsas, Konstantinos and Noble, J. Alison}, title = {SPA: Efficient User-Preference Alignment against Uncertainty in Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23731-23740} }
GaussianReg: Rapid 2D/3D Registration for Emergency Surgery via Explicit 3D Modeling with Gaussian Primitives-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Weihao and Guo, Xiaoqing and Liu, Xinyu and Liu, Yifan and Zheng, Hao and Huang, Yawen and Yuan, Yixuan}, title = {GaussianReg: Rapid 2D/3D Registration for Emergency Surgery via Explicit 3D Modeling with Gaussian Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21482-21491} }
Robustifying Zero-Shot Vision Language Models by Subspaces Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Junhao and Koniusz, Piotr and Feng, Liaoyuan and Zhang, Yifei and Zhu, Hao and Liu, Weiming and Qu, Xinghua and Ong, Yew-Soon}, title = {Robustifying Zero-Shot Vision Language Models by Subspaces Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21037-21047} }
Performing Defocus Deblurring by Modeling its Formation Process-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zhengbo and Foo, Lin Geng and Rahmani, Hossein and Liu, Jun and Soh, De Wen}, title = {Performing Defocus Deblurring by Modeling its Formation Process}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5791-5801} }
Towards Effective Foundation Model Adaptation for Extreme Cross-Domain Few-Shot Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Fei and Wang, Peng and Zhang, Lei and Wei, Wei and Ding, Chen and Lin, Guosheng and Zhang, Yanning}, title = {Towards Effective Foundation Model Adaptation for Extreme Cross-Domain Few-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4582-4593} }
GaRe: Relightable 3D Gaussian Splatting for Outdoor Scenes from Unconstrained Photo Collections-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2025_ICCV, author = {Bai, Haiyang and Zhu, Jiaqi and Jiang, Songru and Huang, Wei and Lu, Tao and Li, Yuanqi and Guo, Jie and Fu, Runze and Guo, Yanwen and Chen, Lijun}, title = {GaRe: Relightable 3D Gaussian Splatting for Outdoor Scenes from Unconstrained Photo Collections}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26456-26465} }
LoD-Loc v2: Aerial Visual Localization over Low Level-of-Detail City Models using Explicit Silhouette Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Juelin and Peng, Shuaibang and Wang, Long and Tan, Hanlin and Liu, Yu and Zhang, Maojun and Yan, Shen}, title = {LoD-Loc v2: Aerial Visual Localization over Low Level-of-Detail City Models using Explicit Silhouette Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26610-26621} }
RomanTex: Decoupling 3D-aware Rotary Positional Embedded Multi-Attention Network for Texture Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_ICCV, author = {Feng, Yifei and Yang, Mingxin and Yang, Shuhui and Zhang, Sheng and Yu, Jiaao and Zhao, Zibo and Liu, Yuhong and Jiang, Jie and Guo, Chunchao}, title = {RomanTex: Decoupling 3D-aware Rotary Positional Embedded Multi-Attention Network for Texture Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17203-17213} }
Phantom: Subject-Consistent Video Generation via Cross-Modal Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Lijie and Ma, Tianxiang and Li, Bingchuan and Chen, Zhuowei and Liu, Jiawei and Li, Gen and Zhou, Siyu and He, Qian and Wu, Xinglong}, title = {Phantom: Subject-Consistent Video Generation via Cross-Modal Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14951-14961} }
EYE3:Turn Anything into Naked-eye 3D-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2025_ICCV, author = {Song, Yingde and Yang, Zongyuan and Liu, Baolin and Xiong, Yongping and Chen, Sai and Yi, Lan and Zhang, Zhaohe and Yu, Xunbo}, title = {EYE3:Turn Anything into Naked-eye 3D}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27862-27871} }
Boosting Class Representation via Semantically Related Instances for Robust Long-Tailed Learning with Noisy Labels-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yuhang and Li, Zhuying and Jia, Yuheng}, title = {Boosting Class Representation via Semantically Related Instances for Robust Long-Tailed Learning with Noisy Labels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1516-1525} }
You Are Your Own Best Teacher: Achieving Centralized-level Performance in Federated Learning under Heterogeneous and Long-tailed Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_ICCV, author = {Yan, Shanshan and Li, Zexi and Wu, Chao and Pang, Meng and Lu, Yang and Yan, Yan and Wang, Hanzi}, title = {You Are Your Own Best Teacher: Achieving Centralized-level Performance in Federated Learning under Heterogeneous and Long-tailed Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2750-2759} }
IFAdapter: Instance Feature Control for Grounded Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Yinwei and Zhou, Xianpan and Ma, Bing and Su, Xuefeng and Ma, Kai and Wang, Xinchao}, title = {IFAdapter: Instance Feature Control for Grounded Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15949-15959} }
StableDepth: Scene-Consistent and Scale-Invariant Monocular Depth-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zheng and Yang, Lihe and Yang, Tianyu and Yu, Chaohui and Guo, Xiaoyang and Lao, Yixing and Zhao, Hengshuang}, title = {StableDepth: Scene-Consistent and Scale-Invariant Monocular Depth}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7069-7078} }
LV-MAE: Learning Long Video Representations through Masked-Embedding Autoencoders-
[pdf]
[supp]
[bibtex]@InProceedings{Naiman_2025_ICCV, author = {Naiman, Ilan and Ben-Baruch, Emanuel and Anschel, Oron and Shoshan, Alon and Kviatkovsky, Igor and Aggarwal, Manoj and Medioni, Gerard}, title = {LV-MAE: Learning Long Video Representations through Masked-Embedding Autoencoders}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21398-21407} }
Personalized Federated Learning under Local Supervision-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Qiqi and Li, Jiaqiang and Liu, Yuchen and Jin, Yaochu and Lyu, Lingjuan and Wu, Xiaohu and Yu, Han}, title = {Personalized Federated Learning under Local Supervision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4069-4079} }
STDDNet: Harnessing Mamba for Video Polyp Segmentation via Spatial-aligned Temporal Modeling and Discriminative Dynamic Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Guilian and Wu, Huisi and Qin, Jing}, title = {STDDNet: Harnessing Mamba for Video Polyp Segmentation via Spatial-aligned Temporal Modeling and Discriminative Dynamic Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21364-21373} }
DIMO: Diverse 3D Motion Generation for Arbitrary Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Mou_2025_ICCV, author = {Mou, Linzhan and Lei, Jiahui and Wang, Chen and Liu, Lingjie and Daniilidis, Kostas}, title = {DIMO: Diverse 3D Motion Generation for Arbitrary Objects}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14357-14368} }
Enhancing Spatial Reasoning in Multimodal Large Language Models through Reasoning-based Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ning_2025_ICCV, author = {Ning, Zhenhua and Tian, Zhuotao and Shi, Shaoshuai and Lu, Guangming and He, Daojing and Pei, Wenjie and Jiang, Li}, title = {Enhancing Spatial Reasoning in Multimodal Large Language Models through Reasoning-based Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7851-7860} }
MMAD: Multi-label Micro-Action Detection in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Kun and Liu, Pengyu and Guo, Dan and Wang, Fei and Wu, Zhiliang and Fan, Hehe and Wang, Meng}, title = {MMAD: Multi-label Micro-Action Detection in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13225-13236} }
Human-in-the-Loop Local Corrections of 3D Scene Layouts via Infilling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Christopher and Avetisyan, Armen and Howard-Jenkins, Henry and Siddiqui, Yawar and Straub, Julian and Newcombe, Richard and Balntas, Vasileios and Engel, Jakob}, title = {Human-in-the-Loop Local Corrections of 3D Scene Layouts via Infilling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5657-5666} }
2D Gaussian Splatting-based Sparse-view Transparent Object Depth Reconstruction via Physics Simulation for Scene Update-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jeongyun and Jeong, Seunghoon and Kim, Giseop and Jeon, Myung-Hwan and Jun, Eunji and Kim, Ayoung}, title = {2D Gaussian Splatting-based Sparse-view Transparent Object Depth Reconstruction via Physics Simulation for Scene Update}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27927-27936} }
HouseCrafter: Lifting Floorplans to 3D Scenes with 2D Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yiwen and Nguyen, Hieu T. and Voleti, Vikram and Jampani, Varun and Jiang, Huaizu}, title = {HouseCrafter: Lifting Floorplans to 3D Scenes with 2D Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28440-28450} }
Fast Globally Optimal and Geometrically Consistent 3D Shape Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Roetzer_2025_ICCV, author = {Roetzer, Paul and Bernard, Florian}, title = {Fast Globally Optimal and Geometrically Consistent 3D Shape Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {912-922} }
Guiding Diffusion Models with Adaptive Negative Sampling Without External Resources-
[pdf]
[supp]
[bibtex]@InProceedings{Desai_2025_ICCV, author = {Desai, Alakh and Vasconcelos, Nuno}, title = {Guiding Diffusion Models with Adaptive Negative Sampling Without External Resources}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16122-16131} }
AlignGuard: Scalable Safety Alignment for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Runtao and Chen, I Chieh and Gu, Jindong and Zhang, Jipeng and Pi, Renjie and Chen, Qifeng and Torr, Philip and Khakzar, Ashkan and Pizzati, Fabio}, title = {AlignGuard: Scalable Safety Alignment for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17024-17034} }
Function-centric Bayesian Network for Zero-Shot Object Goal Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Sixian and Yu, Xinyao and Song, Xinhang and Wang, Yiyao and Jiang, Shuqiang}, title = {Function-centric Bayesian Network for Zero-Shot Object Goal Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19535-19545} }
Preserve Anything: Controllable Image Synthesis with Object Preservation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sharma_2025_ICCV, author = {Sharma, Prasen Kumar and Matiyali, Neeraj and Srivastava, Siddharth and Sharma, Gaurav}, title = {Preserve Anything: Controllable Image Synthesis with Object Preservation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18058-18067} }
Leveraging Local Patch Alignment to Seam-cutting for Large Parallax Image Stitching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_ICCV, author = {Liao, Tianli and Zhao, Chenyang and Li, Lei and Cao, Heling}, title = {Leveraging Local Patch Alignment to Seam-cutting for Large Parallax Image Stitching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27262-27271} }
Revisiting Efficient Semantic Segmentation: Learning Offsets for Better Spatial and Class Feature Alignment-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shi-Chen and Li, Yunheng and Wu, Yu-Huan and Hou, Qibin and Cheng, Ming-Ming}, title = {Revisiting Efficient Semantic Segmentation: Learning Offsets for Better Spatial and Class Feature Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22361-22371} }
GIViC: Generative Implicit Video Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Ge and Teng, Siyue and Peng, Tianhao and Zhang, Fan and Bull, David}, title = {GIViC: Generative Implicit Video Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17356-17367} }
TAD-E2E: A Large-scale End-to-end Autonomous Driving Dataset-
[pdf]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Chang and Zhu, Mingxu and Zhang, Zheyuan and Song, Linna and Zhao, Xiao and Luo, Qingliang and Wang, Qi and Guo, Chufan and Su, Kuifeng}, title = {TAD-E2E: A Large-scale End-to-end Autonomous Driving Dataset}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26600-26609} }
Rethinking Bimanual Robotic Manipulation: Learning with Decoupled Interaction Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Jian-Jian and Wu, Xiao-Ming and He, Yi-Xiang and Zeng, Ling-An and Wei, Yi-Lin and Zhang, Dandan and Zheng, Wei-Shi}, title = {Rethinking Bimanual Robotic Manipulation: Learning with Decoupled Interaction Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12427-12437} }
LVAgent: Long Video Understanding by Multi-Round Dynamical Collaboration of MLLM Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Boyu and Yue, Zhengrong and Chen, Siran and Wang, Zikang and Liu, Yang and Li, Peng and Wang, Yali}, title = {LVAgent: Long Video Understanding by Multi-Round Dynamical Collaboration of MLLM Agents}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20237-20246} }
Avat3r: Large Animatable Gaussian Reconstruction Model for High-fidelity 3D Head Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kirschstein_2025_ICCV, author = {Kirschstein, Tobias and Romero, Javier and Sevastopolsky, Artem and Nie{\ss}ner, Matthias and Saito, Shunsuke}, title = {Avat3r: Large Animatable Gaussian Reconstruction Model for High-fidelity 3D Head Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12089-12100} }
EVER: Exact Volumetric Ellipsoid Rendering for Real-time View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mai_2025_ICCV, author = {Mai, Alexander and Hedman, Peter and Kopanas, George and Verbin, Dor and Futschik, David and Xu, Qiangeng and Kuester, Falko and Barron, Jonathan T. and Zhang, Yinda}, title = {EVER: Exact Volumetric Ellipsoid Rendering for Real-time View Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4930-4939} }
Democratizing High-Fidelity Co-Speech Gesture Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Xu and Huang, Shaoli and Xie, Shenbo and Chen, Xuelin and Liu, Yifei and Ding, Changxing}, title = {Democratizing High-Fidelity Co-Speech Gesture Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14283-14292} }
Dissecting Generalized Category Discovery: Multiplex Consensus under Self-Deconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tang_2025_ICCV, author = {Tang, Luyao and Huang, Kunze and Chen, Chaoqi and Yuan, Yuxuan and Li, Chenxin and Tu, Xiaotong and Ding, Xinghao and Huang, Yue}, title = {Dissecting Generalized Category Discovery: Multiplex Consensus under Self-Deconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {297-307} }
DiffIP: Representation Fingerprints for Robust IP Protection of Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhuoling and Qu, Haoxuan and Kuen, Jason and Gu, Jiuxiang and Ke, Qiuhong and Liu, Jun and Rahmani, Hossein}, title = {DiffIP: Representation Fingerprints for Robust IP Protection of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17035-17045} }
Divide-and-Conquer for Enhancing Unlabeled Learning, Stability, and Plasticity in Semi-supervised Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duan_2025_ICCV, author = {Duan, Yue and Chen, Taicai and Qi, Lei and Shi, Yinghuan}, title = {Divide-and-Conquer for Enhancing Unlabeled Learning, Stability, and Plasticity in Semi-supervised Continual Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {583-593} }
All Parts Matter: A Unified Mask-Free Virtual Try-On Framework-
[pdf]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Chenghu and Xiong, Shengwu and Rong, Yi}, title = {All Parts Matter: A Unified Mask-Free Virtual Try-On Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19525-19534} }
CogNav: Cognitive Process Modeling for Object Goal Navigation with LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Yihan and Zhang, Jiazhao and Yu, Zhinan and Liu, Shuzhen and Qin, Zheng and Zou, Qin and Du, Bo and Xu, Kai}, title = {CogNav: Cognitive Process Modeling for Object Goal Navigation with LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9550-9560} }
Spatio-Spectral Pattern Illumination for Direct and Indirect Separation from a Single Hyperspectral Image-
[pdf]
[supp]
[bibtex]@InProceedings{Ishihara_2025_ICCV, author = {Ishihara, Shin and Sato, Imari}, title = {Spatio-Spectral Pattern Illumination for Direct and Indirect Separation from a Single Hyperspectral Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26827-26836} }
Explaining Human Preferences via Metrics for Structured 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Langerman_2025_ICCV, author = {Langerman, Jack and Rozumnyi, Denys and Huang, Yuzhong and Mishkin, Dmytro}, title = {Explaining Human Preferences via Metrics for Structured 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26944-26953} }
LOCATEdit: Graph Laplacian Optimized Cross Attention for Localized Text-Guided Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Soni_2025_ICCV, author = {Soni, Achint and Soni, Meet and Rambhatla, Sirisha}, title = {LOCATEdit: Graph Laplacian Optimized Cross Attention for Localized Text-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18804-18814} }
VistaDream: Sampling multiview consistent images for single-view scene reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Haiping and Liu, Yuan and Liu, Ziwei and Wang, Wenping and Dong, Zhen and Yang, Bisheng}, title = {VistaDream: Sampling multiview consistent images for single-view scene reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26772-26782} }
LangScene-X: Reconstruct Generalizable 3D Language-Embedded Scenes with TriMap Video Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Fangfu and Li, Hao and Chi, Jiawei and Wang, Hanyang and Yang, Minghui and Wang, Fudong and Duan, Yueqi}, title = {LangScene-X: Reconstruct Generalizable 3D Language-Embedded Scenes with TriMap Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29010-29020} }
Orchid: Image Latent Diffusion for Joint Appearance and Geometry Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Krishnan_2025_ICCV, author = {Krishnan, Akshay and Yan, Xinchen and Casser, Vincent and Kundu, Abhijit}, title = {Orchid: Image Latent Diffusion for Joint Appearance and Geometry Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28217-28227} }
MonoMVSNet: Monocular Priors Guided Multi-View Stereo Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Jianfei and Liu, Qiankun and Yu, Haochen and Liu, Hongyuan and Wang, Liyong and Chen, Jiansheng and Ma, Huimin}, title = {MonoMVSNet: Monocular Priors Guided Multi-View Stereo Network}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27806-27816} }
Top2Pano: Learning to Generate Indoor Panoramas from Top-Down View-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zitong and Gautam, Suranjan and Yu, Rui}, title = {Top2Pano: Learning to Generate Indoor Panoramas from Top-Down View}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28493-28502} }
Highlight What You Want: Weakly-Supervised Instance-Level Controllable Infrared-Visible Image Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zeyu and Zhang, Jizheng and Song, Haiyu and Ge, Mingyu and Wang, Jiayu and Duan, Haoran}, title = {Highlight What You Want: Weakly-Supervised Instance-Level Controllable Infrared-Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12637-12647} }
Unbiased Missing-modality Multimodal Learning-
[pdf]
[bibtex]@InProceedings{Dai_2025_ICCV, author = {Dai, Ruiting and Li, Chenxi and Yan, Yandong and Mo, Lisi and Qin, Ke and He, Tao}, title = {Unbiased Missing-modality Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24507-24517} }
You Think, You ACT: The New Task of Arbitrary Text to Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Runqi and Ma, Caoyuan and Li, Guopeng and Xu, Hanrui and Li, Yuke and Wang, Zheng}, title = {You Think, You ACT: The New Task of Arbitrary Text to Motion Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12012-12022} }
Intermediate Connectors and Geometric Priors for Language-Guided Affordance Segmentation on Unseen Object Categories-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yicong and Chen, Yiyang and Ma, Zhenyuan and Xiao, Junbin and Wang, Xiang and Yao, Angela}, title = {Intermediate Connectors and Geometric Priors for Language-Guided Affordance Segmentation on Unseen Object Categories}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22836-22845} }
FALCON: Resolving Visual Redundancy and Fragmentation in High-resolution Multimodal Large Language Models via Visual Registers-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Renshan and Shao, Rui and Chen, Gongwei and Zhang, Miao and Zhou, Kaiwen and Guan, Weili and Nie, Liqiang}, title = {FALCON: Resolving Visual Redundancy and Fragmentation in High-resolution Multimodal Large Language Models via Visual Registers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23530-23540} }
Verbalized Representation Learning for Interpretable Few-Shot Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Cheng-Fu and Yin, Da and Hu, Wenbo and Ji, Heng and Peng, Nanyun and Zhou, Bolei and Chang, Kai-Wei}, title = {Verbalized Representation Learning for Interpretable Few-Shot Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1602-1612} }
HERO: Human Reaction Generation from Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Chengjun and Zhai, Wei and Yang, Yuhang and Cao, Yang and Zha, Zheng-Jun}, title = {HERO: Human Reaction Generation from Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10262-10274} }
Transformer-based Tooth Alignment Prediction with Occlusion and Collision Constraints-
[pdf]
[arXiv]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Zhenxing and Chen, Jiazhou}, title = {Transformer-based Tooth Alignment Prediction with Occlusion and Collision Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25145-25154} }
Frequency-Guided Posterior Sampling for Diffusion-Based Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thaker_2025_ICCV, author = {Thaker, Darshan and Goyal, Abhishek and Vidal, Rene}, title = {Frequency-Guided Posterior Sampling for Diffusion-Based Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12873-12882} }
Beyond Walking: A Large-Scale Image-Text Benchmark for Text-based Person Anomaly Search-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Shuyu and Wang, Yaxiong and Zhu, Li and Zheng, Zhedong}, title = {Beyond Walking: A Large-Scale Image-Text Benchmark for Text-based Person Anomaly Search}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11720-11730} }
GigaTok: Scaling Visual Tokenizers to 3 Billion Parameters for Autoregressive Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2025_ICCV, author = {Xiong, Tianwei and Liew, Jun Hao and Huang, Zilong and Feng, Jiashi and Liu, Xihui}, title = {GigaTok: Scaling Visual Tokenizers to 3 Billion Parameters for Autoregressive Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18770-18780} }
Perspective-aware 3D Gaussian Inpainting with Multi-view Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Yuxin and Huang, Binxiao and Wu, Taiqiang and Zhou, Wenyong and Ding, Chenchen and Liu, Zhengwu and Chesi, Graziano and Wong, Ngai}, title = {Perspective-aware 3D Gaussian Inpainting with Multi-view Consistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28503-28513} }
DistillDrive: End-to-End Multi-Mode Autonomous Driving Distillation by Isomorphic Hetero-Source Planning Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Rui and Zhang, Xianghang and Zhao, Runkai and Yan, Huaicheng and Wang, Meng}, title = {DistillDrive: End-to-End Multi-Mode Autonomous Driving Distillation by Isomorphic Hetero-Source Planning Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26188-26197} }
TESPEC: Temporally-Enhanced Self-Supervised Pretraining for Event Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mohammadi_2025_ICCV, author = {Mohammadi, Mohammad and Wu, Ziyi and Gilitschenski, Igor}, title = {TESPEC: Temporally-Enhanced Self-Supervised Pretraining for Event Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7782-7793} }
Wide2Long: Learning Lens Compression and Perspective Adjustment for Wide-Angle to Telephoto Translation-
[pdf]
[supp]
[bibtex]@InProceedings{Banerjee_2025_ICCV, author = {Banerjee, Soumyadipta and Paik, Jiaul H. and Sen, Debashis}, title = {Wide2Long: Learning Lens Compression and Perspective Adjustment for Wide-Angle to Telephoto Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29001-29009} }
FullDiT: Video Generative Foundation Models with Multimodal Control via Full Attention-
[pdf]
[bibtex]@InProceedings{Ju_2025_ICCV, author = {Ju, Xuan and Ye, Weicai and Liu, Quande and Wang, Qiulin and Wang, Xintao and Wan, Pengfei and Zhang, Di and Gai, Kun and Xu, Qiang}, title = {FullDiT: Video Generative Foundation Models with Multimodal Control via Full Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15737-15747} }
Feed-Forward SceneDINO for Unsupervised Semantic Scene Completion-
[pdf]
[supp]
[bibtex]@InProceedings{Jevtic_2025_ICCV, author = {Jevti\'c, Aleksandar and Reich, Christoph and Wimbauer, Felix and Hahn, Oliver and Rupprecht, Christian and Roth, Stefan and Cremers, Daniel}, title = {Feed-Forward SceneDINO for Unsupervised Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6784-6796} }
V2XScenes: A Multiple Challenging Traffic Conditions Dataset for Large-Range Vehicle-Infrastructure Collaborative Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Bowen and Wang, Yafei and Gong, Wei and Chen, Siheng and Liu, Genjia and Xiong, Minhao and Ng, Chin Long}, title = {V2XScenes: A Multiple Challenging Traffic Conditions Dataset for Large-Range Vehicle-Infrastructure Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28385-28395} }
VideoAuteur: Towards Long Narrative Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_ICCV, author = {Xiao, Junfei and Cheng, Feng and Qi, Lu and Gui, Liangke and Zhao, Yang and Lin, Shanchuan and Cen, Jiepeng and Ma, Zhibei and Yuille, Alan and Jiang, Lu}, title = {VideoAuteur: Towards Long Narrative Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19163-19173} }
Is CLIP ideal? No. Can we fix it? Yes!-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_ICCV, author = {Kang, Raphi and Song, Yue and Gkioxari, Georgia and Perona, Pietro}, title = {Is CLIP ideal? No. Can we fix it? Yes!}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22436-22446} }
LaneDiffusion: Improving Centerline Graph Learning via Prior Injected BEV Feature Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zijie and Zhang, Weiming and Zhang, Wei and Tan, Xiao and Liu, Hongxing and Wang, Yaowei and Li, Guanbin}, title = {LaneDiffusion: Improving Centerline Graph Learning via Prior Injected BEV Feature Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27052-27062} }
Federated Domain Generalization with Domain-specific Soft Prompts Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Jianhan and Qu, Xiaoyang and Huang, Zhangcheng and Wang, Jianzong}, title = {Federated Domain Generalization with Domain-specific Soft Prompts Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2366-2375} }
Multi-modal Identity Extraction-
[pdf]
[supp]
[bibtex]@InProceedings{Webster_2025_ICCV, author = {Webster, Ryan and Furon, Teddy}, title = {Multi-modal Identity Extraction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10797-10806} }
Height-Fidelity Dense Global Fusion for Multi-modal 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Hanshi and Gao, Jin and Hu, Weiming and Zhang, Zhipeng}, title = {Height-Fidelity Dense Global Fusion for Multi-modal 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26664-26674} }
DIH-CLIP: Unleashing the Diversity of Multi-Head Self-Attention for Training-Free Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2025_ICCV, author = {Duan, Songsong and Yang, Xi and Wang, Nannan}, title = {DIH-CLIP: Unleashing the Diversity of Multi-Head Self-Attention for Training-Free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22794-22803} }
ExploreGS: Explorable 3D Scene Reconstruction with Virtual Camera Samplings and Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Minsu and Jeon, Subin and Cho, In and Yoo, Mijin and Kim, Seon Joo}, title = {ExploreGS: Explorable 3D Scene Reconstruction with Virtual Camera Samplings and Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27042-27051} }
Prototypes are Balanced Units for Efficient and Effective Partially Relevant Video Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moon_2025_ICCV, author = {Moon, WonJun and Cho, Cheol-Ho and Jun, Woojin and Kim, Taeoh and Lee, Inwoong and Wee, Dongyoon and Shim, Minho and Heo, Jae-Pil}, title = {Prototypes are Balanced Units for Efficient and Effective Partially Relevant Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21789-21799} }
Diffusion Epistemic Uncertainty with Asymmetric Learning for Diffusion-Generated Image Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Yingsong and Guo, Hui and Huang, Jing and Bai, Bing and Xiong, Qi}, title = {Diffusion Epistemic Uncertainty with Asymmetric Learning for Diffusion-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17097-17107} }
SpiLiFormer: Enhancing Spiking Transformers with Lateral Inhibition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Zeqi and Huang, Yanchen and Yu, Yingchao and Zhu, Zizheng and Tang, Junfeng and Yu, Zhaofei and Jin, Yaochu}, title = {SpiLiFormer: Enhancing Spiking Transformers with Lateral Inhibition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24539-24548} }
PS-Mamba: Spatial-Temporal Graph Mamba for Pose Sequence Refinement-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Haoye and Lee, Gim Hee}, title = {PS-Mamba: Spatial-Temporal Graph Mamba for Pose Sequence Refinement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8568-8578} }
Model Reveals What to Cache: Profiling-Based Feature Reuse for Video Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Xuran and Liu, Yexin and Liu, Yaofu and Wu, Xianfeng and Zheng, Mingzhe and Wang, Zihao and Lim, Ser-Nam and Yang, Harry}, title = {Model Reveals What to Cache: Profiling-Based Feature Reuse for Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17150-17159} }
IRASim: A Fine-Grained World Model for Robot Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Fangqi and Wu, Hongtao and Guo, Song and Liu, Yuxiao and Cheang, Chilam and Kong, Tao}, title = {IRASim: A Fine-Grained World Model for Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9834-9844} }
Diffusion-Based Imaginative Coordination for Bimanual Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Huilin and Ding, Jian and Xu, Jiakun and Wang, Ruixiang and Chen, Jun and Mai, Jinjie and Fu, Yanwei and Ghanem, Bernard and Xu, Feng and Elhoseiny, Mohamed}, title = {Diffusion-Based Imaginative Coordination for Bimanual Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11469-11479} }
Seeing the Trees for the Forest: Rethinking Weakly-Supervised Medical Visual Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huy_2025_ICCV, author = {Huy, Ta Duc and Huynh, Duy Anh and Xie, Yutong and Qi, Yuankai and Chen, Qi and Le Nguyen, Phi and Tran, Sen Kim and Phung, Son Lam and van den Hengel, Anton and Liao, Zhibin and To, Minh-Son and Verjans, Johan W. and Phan, Vu Minh Hieu}, title = {Seeing the Trees for the Forest: Rethinking Weakly-Supervised Medical Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24445-24455} }
Dataset Distillation as Data Compression: A Rate-Utility Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bao_2025_ICCV, author = {Bao, Youneng and Liu, Yiping and Chen, Zhuo and Liang, Yongsheng and Li, Mu and Ma, Kede}, title = {Dataset Distillation as Data Compression: A Rate-Utility Perspective}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {519-529} }
GroundingSuite: Measuring Complex Multi-Granular Pixel Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Rui and Zhu, Lianghui and Zhang, Yuxuan and Cheng, Tianheng and Liu, Lei and Liu, Heng and Ran, Longjin and Chen, Xiaoxin and Liu, Wenyu and Wang, Xinggang}, title = {GroundingSuite: Measuring Complex Multi-Granular Pixel Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23105-23114} }
Consensus-Driven Active Model Selection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kay_2025_ICCV, author = {Kay, Justin and Van Horn, Grant and Maji, Subhransu and Sheldon, Daniel and Beery, Sara}, title = {Consensus-Driven Active Model Selection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4594-4604} }
Learning an Implicit Physics Model for Image-based Fluid Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2025_ICCV, author = {Jia, Emily Yue-Ting and Mao, Jiageng and Gao, Zhiyuan and Zhao, Yajie and Wang, Yue}, title = {Learning an Implicit Physics Model for Image-based Fluid Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7048-7057} }
Task-Decoupled Bezier Surface Constraint for Uneven Low-Light Image Enhancement-
[pdf]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Xingxiang and Su, Xiangdong and Zhang, Haoran and Chen, Wei and Gao, Guanglai}, title = {Task-Decoupled Bezier Surface Constraint for Uneven Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6859-6868} }
VideoRFSplat: Direct Scene-Level Text-to-3D Gaussian Splatting Generation with Flexible Pose and Multi-View Joint Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Go_2025_ICCV, author = {Go, Hyojun and Park, Byeongjun and Nam, Hyelin and Kim, Byung-Hoon and Chung, Hyungjin and Kim, Changick}, title = {VideoRFSplat: Direct Scene-Level Text-to-3D Gaussian Splatting Generation with Flexible Pose and Multi-View Joint Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26706-26717} }
PRISM: Reducing Spurious Implicit Biases in Vision-Language Models with LLM-Guided Embedding Projection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Molahasani_2025_ICCV, author = {Molahasani, Mahdiyar and Motamedi, Azadeh and Greenspan, Michael and Kim, Il-Min and Etemad, Ali}, title = {PRISM: Reducing Spurious Implicit Biases in Vision-Language Models with LLM-Guided Embedding Projection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {688-697} }
Animate Anyone 2: High-Fidelity Character Image Animation with Environment Affordance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Li and Wang, Guangyuan and Shen, Zhen and Gao, Xin and Meng, Dechao and Zhuo, Lian and Zhang, Peng and Zhang, Bang and Bo, Liefeng}, title = {Animate Anyone 2: High-Fidelity Character Image Animation with Environment Affordance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10207-10217} }
DATA: Domain-And-Time Alignment for High-Quality Feature Fusion in Collaborative Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_ICCV, author = {Tian, Chengchang and Ma, Jianwei and Huang, Yan and Chen, Zhanye and Wei, Honghao and Zhang, Hui and Hong, Wei}, title = {DATA: Domain-And-Time Alignment for High-Quality Feature Fusion in Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28643-28652} }
LongAnimation: Long Animation Generation with Dynamic Global-Local Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Nan and Huang, Mengqi and Meng, Yihao and Mao, Zhendong}, title = {LongAnimation: Long Animation Generation with Dynamic Global-Local Memory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10032-10042} }
Is Tracking Really More Challenging in First Person Egocentric Vision?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dunnhofer_2025_ICCV, author = {Dunnhofer, Matteo and Manigrasso, Zaira and Micheloni, Christian}, title = {Is Tracking Really More Challenging in First Person Egocentric Vision?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5879-5889} }
Bringing RNNs Back to Efficient Open-Ended Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Weili and Song, Enxin and Chai, Wenhao and Wen, Xuexiang and Ye, Tian and Wang, Gaoang}, title = {Bringing RNNs Back to Efficient Open-Ended Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23453-23465} }
ViM-VQ: Efficient Post-Training Vector Quantization for Visual Mamba-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_ICCV, author = {Deng, Juncan and Li, Shuaiting and Wang, Zeyu and Xu, Kedong and Gu, Hong and Huang, Kejie}, title = {ViM-VQ: Efficient Post-Training Vector Quantization for Visual Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24518-24527} }
TorchAdapt: Towards Light-Agnostic Real-Time Visual Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Hashmi_2025_ICCV, author = {Hashmi, Khurram Azeem and Suresh, Karthik Palyakere and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {TorchAdapt: Towards Light-Agnostic Real-Time Visual Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5645-5656} }
UniversalBooth: Model-Agnostic Personalized Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Songhua and Yu, Ruonan and Wang, Xinchao}, title = {UniversalBooth: Model-Agnostic Personalized Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18314-18324} }
EVEv2: Improved Baselines for Encoder-Free Vision-Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Diao_2025_ICCV, author = {Diao, Haiwen and Li, Xiaotong and Cui, Yufeng and Wang, Yueze and Deng, Haoge and Pan, Ting and Wang, Wenxuan and Lu, Huchuan and Wang, Xinlong}, title = {EVEv2: Improved Baselines for Encoder-Free Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21014-21025} }
End-to-End Driving with Online Trajectory Evaluation via BEV World Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yingyan and Wang, Yuqi and Liu, Yang and He, Jiawei and Fan, Lue and Zhang, Zhaoxiang}, title = {End-to-End Driving with Online Trajectory Evaluation via BEV World Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27137-27146} }
EEGMirror: Leveraging EEG Data in the Wild via Montage-Agnostic Self-Supervision for EEG to Video Decoding-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Xuan-Hao and Lu, Bao-Liang and Zheng, Wei-Long}, title = {EEGMirror: Leveraging EEG Data in the Wild via Montage-Agnostic Self-Supervision for EEG to Video Decoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18273-18283} }
Edit360: 2D Image Edits to 3D Assets from Any Angle-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Junchao and Hu, Xinting and Shi, Shaoshuai and Tian, Zhuotao and Jiang, Li}, title = {Edit360: 2D Image Edits to 3D Assets from Any Angle}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16618-16628} }
VehicleMAE: View-asymmetry Mutual Learning for Vehicle Re-identification Pre-training via Masked AutoEncoders-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Qi and Zhang, Zeyu and Wang, Dong and Gai, Di and Xiong, Xin and Xu, Jiyang and Zhou, Ruihua}, title = {VehicleMAE: View-asymmetry Mutual Learning for Vehicle Re-identification Pre-training via Masked AutoEncoders}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4701-4711} }
MiDSummer: Multi-Guidance Diffusion for Controllable Zero-Shot Immersive Gaussian Splatting Scene Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Anjun and Tomsett, Richard and Gourmet, Valentin and Camplani, Massimo and Kandola, Jas and Xie, Hanting}, title = {MiDSummer: Multi-Guidance Diffusion for Controllable Zero-Shot Immersive Gaussian Splatting Scene Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26793-26805} }
AV-Link: Temporally-Aligned Diffusion Features for Cross-Modal Audio-Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Haji-Ali_2025_ICCV, author = {Haji-Ali, Moayed and Menapace, Willi and Siarohin, Aliaksandr and Skorokhodov, Ivan and Canberk, Alper and Lee, Kwot Sin and Ordonez, Vicente and Tulyakov, Sergey}, title = {AV-Link: Temporally-Aligned Diffusion Features for Cross-Modal Audio-Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19373-19385} }
RANKCLIP: Ranking-Consistent Language-Image Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yiming and Zhao, Zhuokai and Chen, Zhaorun and Feng, Zhili and Ding, Zenghui and Sun, Yining}, title = {RANKCLIP: Ranking-Consistent Language-Image Pretraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3874-3884} }
Is Visual in-Context Learning for Compositional Medical Tasks within Reach?-
[pdf]
[supp]
[bibtex]@InProceedings{Reiss_2025_ICCV, author = {Rei{\ss}, Simon and Marinov, Zdravko and Jaus, Alexander and Seibold, Constantin and Sarfraz, M. Saquib and Rodner, Erik and Stiefelhagen, Rainer}, title = {Is Visual in-Context Learning for Compositional Medical Tasks within Reach?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2642-2652} }
Details Matter for Indoor Open-vocabulary 3D Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2025_ICCV, author = {Jung, Sanghun and Zheng, Jingjing and Zhang, Ke and Qiao, Nan and Chen, Albert Y. C. and Xia, Lu and Liu, Chi and Sun, Yuyin and Zeng, Xiao and Huang, Hsiang-Wei and Boots, Byron and Sun, Min and Kuo, Cheng-Hao}, title = {Details Matter for Indoor Open-vocabulary 3D Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9627-9637} }
Differentially Private Fine-Tuning of Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tsai_2025_ICCV, author = {Tsai, Yu-Lin and Li, Yizhe and Yu, Chia-Mu and Ren, Xuebin and Chen, Po-Yu and Chen, Zekai and Buet-Golfouse, Francois}, title = {Differentially Private Fine-Tuning of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4561-4571} }
Large Learning Rates Simultaneously Achieve Robustness to Spurious Correlations and Compressibility-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Barsbey_2025_ICCV, author = {Barsbey, Melih and Prieto, Lucas and Zafeiriou, Stefanos and Birdal, Tolga}, title = {Large Learning Rates Simultaneously Achieve Robustness to Spurious Correlations and Compressibility}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2055-2066} }
Global Regulation and Excitation via Attention Tuning for Stereo Matching-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Jiahao and Chen, Xinhong and Jiang, Zhengmin and Zhou, Qian and Li, Yung-Hui and Wang, Jianping}, title = {Global Regulation and Excitation via Attention Tuning for Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25539-25549} }
TruthPrInt: Mitigating Large Vision-Language Models Object Hallucination Via Latent Truthful-Guided Pre-Intervention-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2025_ICCV, author = {Duan, Jinhao and Kong, Fei and Cheng, Hao and Diffenderfer, James and Kailkhura, Bhavya and Sun, Lichao and Zhu, Xiaofeng and Shi, Xiaoshuang and Xu, Kaidi}, title = {TruthPrInt: Mitigating Large Vision-Language Models Object Hallucination Via Latent Truthful-Guided Pre-Intervention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7372-7382} }
Customizing Domain Adapters for Domain Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Ji_2025_ICCV, author = {Ji, Yuyang and Huang, Zeyi and Wang, Haohan and Lee, Yong Jae}, title = {Customizing Domain Adapters for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {934-944} }
ADCD-Net: Robust Document Image Forgery Localization via Adaptive DCT Feature and Hierarchical Content Disentanglement-
[pdf]
[supp]
[bibtex]@InProceedings{Wong_2025_ICCV, author = {Wong, Kahim and Zhou, Jicheng and Wu, Haiwei and Si, Yain-Whar and Zhou, Jiantao}, title = {ADCD-Net: Robust Document Image Forgery Localization via Adaptive DCT Feature and Hierarchical Content Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19280-19289} }
STaR: Seamless Spatial-Temporal Aware Motion Retargeting with Penetration and Consistency Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Xiaohang and Wang, Qing and Yang, Jiahao and Slabaugh, Gregory and Yuan, Shanxin}, title = {STaR: Seamless Spatial-Temporal Aware Motion Retargeting with Penetration and Consistency Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12947-12955} }
Structured Policy Optimization: Enhance Large Vision-Language Model via Self-referenced Dialogue-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Guohao and Qin, Can and Feng, Yihao and Chen, Zeyuan and Xu, Ran and Dianat, Sohail and Rabbani, Majid and Rao, Raghuveer and Tao, Zhiqiang}, title = {Structured Policy Optimization: Enhance Large Vision-Language Model via Self-referenced Dialogue}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {741-751} }
UnrealZoo: Enriching Photo-realistic Virtual Worlds for Embodied AI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_ICCV, author = {Zhong, Fangwei and Wu, Kui and Wang, Churan and Chen, Hao and Ci, Hai and Li, Zhoujun and Wang, Yizhou}, title = {UnrealZoo: Enriching Photo-realistic Virtual Worlds for Embodied AI}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5769-5779} }
Visual Test-time Scaling for GUI Agent Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Tiange and Logeswaran, Lajanugen and Johnson, Justin and Lee, Honglak}, title = {Visual Test-time Scaling for GUI Agent Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19989-19998} }
One-Step Specular Highlight Removal with Adapted Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Atmis_2025_ICCV, author = {Atmis, Mahir and Karacan, Levent and Sar{\i}g\"ul, Mehmet}, title = {One-Step Specular Highlight Removal with Adapted Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16313-16322} }
GraspCoT: Integrating Physical Property Reasoning for 6-DoF Grasping under Flexible Language Instructions-
[pdf]
[supp]
[bibtex]@InProceedings{Chu_2025_ICCV, author = {Chu, Xiaomeng and Deng, Jiajun and You, Guoliang and Liu, Wei and Li, Xingchen and Ji, Jianmin and Zhang, Yanyong}, title = {GraspCoT: Integrating Physical Property Reasoning for 6-DoF Grasping under Flexible Language Instructions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10130-10140} }
Deep Space Weather Model: Long-Range Solar Flare Prediction from Multi-Wavelength Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nagashima_2025_ICCV, author = {Nagashima, Shunya and Sugiura, Komei}, title = {Deep Space Weather Model: Long-Range Solar Flare Prediction from Multi-Wavelength Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9396-9405} }
Hydra-NeXt: Robust Closed-Loop Driving with Open-Loop Training-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhenxin and Wang, Shihao and Lan, Shiyi and Yu, Zhiding and Wu, Zuxuan and Alvarez, Jose M.}, title = {Hydra-NeXt: Robust Closed-Loop Driving with Open-Loop Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27305-27314} }
Zero-Shot Vision Encoder Grafting via LLM Surrogates-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2025_ICCV, author = {Yue, Kaiyu and Singla, Vasu and Jia, Menglin and Kirchenbauer, John and Qadri, Rifaa and Cai, Zikui and Bhatele, Abhinav and Huang, Furong and Goldstein, Tom}, title = {Zero-Shot Vision Encoder Grafting via LLM Surrogates}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4275-4284} }
Depth Any Event Stream: Enhancing Event-based Monocular Depth Estimation via Dense-to-Sparse Distillation-
[pdf]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jinjing and Pan, Tianbo and Cao, Zidong and Liu, Yexin and Kwok, James T. and Xiong, Hui}, title = {Depth Any Event Stream: Enhancing Event-based Monocular Depth Estimation via Dense-to-Sparse Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5146-5155} }
Uncover Treasures in DCT: Advancing JPEG Quality Enhancement by Exploiting Latent Correlations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Jing and Xing, Qunliang and Xu, Mai and Qiao, Minglang}, title = {Uncover Treasures in DCT: Advancing JPEG Quality Enhancement by Exploiting Latent Correlations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17598-17607} }
Layer-wise Vision Injection with Disentangled Attention for Efficient LVLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xuange and Li, Dengjie and Liu, Bo and Bao, Zenghao and Zhou, Yao and Yang, Baisong and Liu, Zhongying and Zhong, Yujie and Yuan, Tongtong}, title = {Layer-wise Vision Injection with Disentangled Attention for Efficient LVLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7004-7013} }
Heatmap Regression without Soft-Argmax for Facial Landmark Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Chiao-An and Yeh, Raymond A.}, title = {Heatmap Regression without Soft-Argmax for Facial Landmark Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28729-28739} }
ICE-Bench: A Unified and Comprehensive Benchmark for Image Creating and Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2025_ICCV, author = {Pan, Yulin and He, Xiangteng and Mao, Chaojie and Han, Zhen and Jiang, Zeyinzi and Zhang, Jingfeng and Liu, Yu}, title = {ICE-Bench: A Unified and Comprehensive Benchmark for Image Creating and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16586-16596} }
Unified Adversarial Augmentation for Improving Palmprint Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2025_ICCV, author = {Jin, Jianlong and Zhao, Chenglong and Zhang, Ruixin and Shang, Sheng and Zhao, Yang and Wang, Jun and Zhang, Jingyun and Ding, Shouhong and Jia, Wei and Wu, Yunsheng}, title = {Unified Adversarial Augmentation for Improving Palmprint Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14141-14151} }
PlugMark: A Plug-in Zero-Watermarking Framework for Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Pengzhen and Liu, Yanwei and Gu, Xiaoyan and Liu, Enci and Shang, Zhuoyi and Ji, Xiangyang and Liu, Wu}, title = {PlugMark: A Plug-in Zero-Watermarking Framework for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17335-17345} }
FLSeg: Enhancing Privacy and Robustness in Federated Learning under Heterogeneous Data via Model Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2025_ICCV, author = {Su, Zichun and Lu, Zhi and Wu, Yutong and Shen, Renfei and Lu, Songfeng}, title = {FLSeg: Enhancing Privacy and Robustness in Federated Learning under Heterogeneous Data via Model Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3916-3925} }
Partially Matching Submap Helps: Uncertainty Modeling and Propagation for Text to Point Cloud Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2025_ICCV, author = {Feng, Mingtao and Mei, Longlong and Wu, Zijie and Luo, Jianqiao and Tian, Fenghao and Feng, Jie and Dong, Weisheng and Wang, Yaonan}, title = {Partially Matching Submap Helps: Uncertainty Modeling and Propagation for Text to Point Cloud Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8296-8305} }
Human-Object Interaction from Human-Level Instructions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Zhen and Li, Jiaman and Xu, Pei and Liu, C. Karen}, title = {Human-Object Interaction from Human-Level Instructions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11176-11186} }
SAM2Long: Enhancing SAM 2 for Long Video Segmentation with a Training-Free Memory Tree-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2025_ICCV, author = {Ding, Shuangrui and Qian, Rui and Dong, Xiaoyi and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and Guo, Yuwei and Lin, Dahua and Wang, Jiaqi}, title = {SAM2Long: Enhancing SAM 2 for Long Video Segmentation with a Training-Free Memory Tree}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13614-13624} }
Adapting Vehicle Detectors for Aerial Imagery to Unseen Domains with Weak Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_ICCV, author = {Fang, Xiao and Jeon, Minhyek and Qin, Zheyang and Panev, Stanislav and De Melo, Celso and Hu, Shuowen and Chakraborty, Shayok and De La Torre, Fernando}, title = {Adapting Vehicle Detectors for Aerial Imagery to Unseen Domains with Weak Supervision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8088-8099} }
Neural Shell Texture Splatting: More Details and Fewer Primitives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xin and Chen, Anpei and Xiong, Jincheng and Dai, Pinxuan and Shen, Yujun and Xu, Weiwei}, title = {Neural Shell Texture Splatting: More Details and Fewer Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25229-25238} }
3DSRBench: A Comprehensive 3D Spatial Reasoning Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Wufei and Chen, Haoyu and Zhang, Guofeng and Chou, Yu-Cheng and Chen, Jieneng and de Melo, Celso and Yuille, Alan}, title = {3DSRBench: A Comprehensive 3D Spatial Reasoning Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6924-6934} }
Causality-guided Prompt Learning for Vision-language Models via Visual Granulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Mengyu and Dong, Qiulei}, title = {Causality-guided Prompt Learning for Vision-language Models via Visual Granulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1141-1151} }
Arti-PG: A Toolbox for Procedurally Synthesizing Large-Scale and Diverse Articulated Objects with Rich Annotations-
[pdf]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Jianhua and Li, Yuxuan and Wei, Jiude and Xu, Longfei and Wang, Nange and Zhang, Yining and Lu, Cewu}, title = {Arti-PG: A Toolbox for Procedurally Synthesizing Large-Scale and Diverse Articulated Objects with Rich Annotations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6396-6405} }
X2I: Seamless Integration of Multimodal Understanding into Diffusion Transformer via Attention Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Jian and Peng, Qirong and Guo, Xu and Chen, Chen and Lu, Haonan and Yang, Zhenyu}, title = {X2I: Seamless Integration of Multimodal Understanding into Diffusion Transformer via Attention Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16733-16744} }
One Last Attention for Your Vision-Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Liang and Ahmad, Ghazi Shazan and Yao, Tianjun and Liu, Lingqiao and Shen, Zhiqiang}, title = {One Last Attention for Your Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1464-1473} }
MMCR: Benchmarking Cross-Source Reasoning in Scientific Papers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_ICCV, author = {Tian, Yang and Lu, Zheng and Gao, Mingqi and Liu, Zheng and Zhao, Bo}, title = {MMCR: Benchmarking Cross-Source Reasoning in Scientific Papers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {488-497} }
ZeroKey: Point-Level Reasoning and Zero-Shot 3D Keypoint Detection from Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2025_ICCV, author = {Gong, Bingchen and Gomez, Diego and Hamdi, Abdullah and Eldesokey, Abdelrahman and Abdelreheem, Ahmed and Wonka, Peter and Ovsjanikov, Maks}, title = {ZeroKey: Point-Level Reasoning and Zero-Shot 3D Keypoint Detection from Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22089-22099} }
Audio-visual Controlled Video Diffusion with Masked Selective State Spaces Modeling for Natural Talking Head Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2025_ICCV, author = {Hong, Fa-Ting and Xu, Zunnan and Zhou, Zixiang and Zhou, Jun and Li, Xiu and Lin, Qin and Lu, Qinglin and Xu, Dan}, title = {Audio-visual Controlled Video Diffusion with Masked Selective State Spaces Modeling for Natural Talking Head Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12549-12558} }
DreamCube: RGB-D Panorama Generation via Multi-plane Synchronization-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Yukun and Zhou, Yanning and Wang, Jianan and Huang, Kaiyi and Liu, Xihui}, title = {DreamCube: RGB-D Panorama Generation via Multi-plane Synchronization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24922-24932} }
SILO: Solving Inverse Problems with Latent Operators-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Raphaeli_2025_ICCV, author = {Raphaeli, Ron and Man, Sean and Elad, Michael}, title = {SILO: Solving Inverse Problems with Latent Operators}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10570-10580} }
Entropy-Adaptive Diffusion Policy Optimization with Dynamic Step Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2025_ICCV, author = {Yan, RenYe and Cheng, Jikang and Gan, Yaozhong and Sun, Shikun and Wu, You and Yang, Yunfan and Ling, Liang and Lin, Jinlong and Zhu, Yeshuang and Zhou, Jie and Zhang, Jinchao and Xing, Junliang and Cai, Yimao and Huang, Ru}, title = {Entropy-Adaptive Diffusion Policy Optimization with Dynamic Step Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1924-1934} }
ScanEdit: Hierarchically-Guided Functional 3D Scan Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{El_Amine_Boudjoghra_2025_ICCV, author = {El Amine Boudjoghra, Mohamed and Laptev, Ivan and Dai, Angela}, title = {ScanEdit: Hierarchically-Guided Functional 3D Scan Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27105-27115} }
DeFSS: Image-to-Mask Denoising Learning for Few-shot Segmentation-
[pdf]
[bibtex]@InProceedings{Qin_2025_ICCV, author = {Qin, Zishu and Xu, Junhao and Ge, Weifeng}, title = {DeFSS: Image-to-Mask Denoising Learning for Few-shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22232-22240} }
Bokehlicious: Photorealistic Bokeh Rendering with Controllable Apertures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seizinger_2025_ICCV, author = {Seizinger, Tim and Vasluianu, Florin-Alexandru and Conde, Marcos V. and Wu, Zongwei and Timofte, Radu}, title = {Bokehlicious: Photorealistic Bokeh Rendering with Controllable Apertures}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8908-8917} }
Turbo2K: Towards Ultra-Efficient and High-Quality 2K Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_ICCV, author = {Ren, Jingjing and Li, Wenbo and Wang, Zhongdao and Sun, Haoze and Liu, Bangzhen and Chen, Haoyu and Xu, Jiaqi and Li, Aoxue and Zhang, Shifeng and Shao, Bin and Guo, Yong and Zhu, Lei}, title = {Turbo2K: Towards Ultra-Efficient and High-Quality 2K Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18155-18165} }
Video2BEV: Transforming Drone Videos to BEVs for Video-based Geo-localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ju_2025_ICCV, author = {Ju, Hao and Huang, Shaofei and Liu, Si and Zheng, Zhedong}, title = {Video2BEV: Transforming Drone Videos to BEVs for Video-based Geo-localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27073-27083} }
Puppet-Master: Scaling Interactive Video Generation as a Motion Prior for Part-Level Dynamics-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Ruining and Zheng, Chuanxia and Rupprecht, Christian and Vedaldi, Andrea}, title = {Puppet-Master: Scaling Interactive Video Generation as a Motion Prior for Part-Level Dynamics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13405-13415} }
MMAT-1M: A Large Reasoning Dataset for Multimodal Agent Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Tianhong and Fu, Yannian and Wu, Weiqun and Yue, Haixiao and Liu, Shanshan and Zhang, Gang}, title = {MMAT-1M: A Large Reasoning Dataset for Multimodal Agent Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1484-1494} }
FlowChef: Steering of Rectified Flow Models for Controlled Generations-
[pdf]
[supp]
[bibtex]@InProceedings{Patel_2025_ICCV, author = {Patel, Maitreya and Wen, Song and Metaxas, Dimitris N. and Yang, Yezhou}, title = {FlowChef: Steering of Rectified Flow Models for Controlled Generations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15308-15318} }
Teaching VLMs to Localize Specific Objects from In-context Examples-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Doveh_2025_ICCV, author = {Doveh, Sivan and Shabtay, Nimrod and Schwartz, Eli and Kuehne, Hilde and Giryes, Raja and Feris, Rogerio and Karlinsky, Leonid and Glass, James and Arbelle, Assaf and Ullman, Shimon and Mirza, M. Jehanzeb}, title = {Teaching VLMs to Localize Specific Objects from In-context Examples}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9572-9582} }
A Hidden Stumbling Block in Generalized Category Discovery: Distracted Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Qiyu and Hu, Zhanxuan and Duan, Yu and Pei, Ercheng and Tai, Yonghang}, title = {A Hidden Stumbling Block in Generalized Category Discovery: Distracted Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {405-414} }
High-Precision 3D Measurement of Complex Textured Surfaces Using Multiple Filtering Approach-
[pdf]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yuchong and Yu, Jian and Gai, Shaoyan and Cai, Zeyu and Da, Feipeng}, title = {High-Precision 3D Measurement of Complex Textured Surfaces Using Multiple Filtering Approach}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25670-25679} }
Efficient Unsupervised Shortcut Learning Detection and Mitigation in Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Kuhn_2025_ICCV, author = {Kuhn, Lukas and Sadiya, Sari and Schl\"otterer, J\"org and Buettner, Florian and Seifert, Christin and Roig, Gemma}, title = {Efficient Unsupervised Shortcut Learning Detection and Mitigation in Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2217-2226} }
Adaptive Caching for Faster Video Generation with Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kahatapitiya_2025_ICCV, author = {Kahatapitiya, Kumara and Liu, Haozhe and He, Sen and Liu, Ding and Jia, Menglin and Zhang, Chenyang and Ryoo, Michael S. and Xie, Tian}, title = {Adaptive Caching for Faster Video Generation with Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15240-15252} }
RGE-GS: Reward-Guided Expansive Driving Scene Reconstruction via Diffusion Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Sicong and Liu, Jiarun and Chen, Qifeng and Chen, Hao-Xiang and Mu, Tai-Jiang and Yang, Sheng}, title = {RGE-GS: Reward-Guided Expansive Driving Scene Reconstruction via Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25756-25764} }
EgoMusic-driven Human Dance Motion Estimation with Skeleton Mamba-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Quang and Le, Nhat and Huang, Baoru and Vu, Minh Nhat and Tang, Chengcheng and Nguyen, Van and Le, Ngan and Vo, Thieu and Nguyen, Anh}, title = {EgoMusic-driven Human Dance Motion Estimation with Skeleton Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12023-12033} }
OmniCache: A Trajectory-Oriented Global Perspective on Training-Free Cache Reuse for Diffusion Transformer Models-
[pdf]
[supp]
[bibtex]@InProceedings{Chu_2025_ICCV, author = {Chu, Huanpeng and Wu, Wei and Feng, Guanyu and Zhang, Yutao}, title = {OmniCache: A Trajectory-Oriented Global Perspective on Training-Free Cache Reuse for Diffusion Transformer Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16302-16312} }
VIPerson: Flexibly Generating Virtual Identity for Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiao-Wen and Zhang, Delong and Peng, Yi-Xing and Ouyang, Zhi and Meng, Jingke and Zheng, Wei-Shi}, title = {VIPerson: Flexibly Generating Virtual Identity for Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23374-23384} }
St4RTrack: Simultaneous 4D Reconstruction and Tracking in the World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_ICCV, author = {Feng, Haiwen and Zhang, Junyi and Wang, Qianqian and Ye, Yufei and Yu, Pengcheng and Black, Michael J. and Darrell, Trevor and Kanazawa, Angjoo}, title = {St4RTrack: Simultaneous 4D Reconstruction and Tracking in the World}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8503-8513} }
PRM: Photometric Stereo based Large Reconstruction Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2025_ICCV, author = {Ge, Wenhang and Lin, Jiantao and Shen, Guibao and Feng, Jiawei and Hu, Tao and Xu, Xinli and Chen, Ying-Cong}, title = {PRM: Photometric Stereo based Large Reconstruction Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25009-25018} }
Exploiting Frequency Dynamics for Enhanced Multimodal Event-based Action Recognition-
[pdf]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Meiqi and Shu, Xiangbo and Jiang, Xin and Yan, Rui and Yao, Yazhou and Tang, Jinhui}, title = {Exploiting Frequency Dynamics for Enhanced Multimodal Event-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5969-5979} }
AerialVG: A Challenging Benchmark for Aerial Visual Grounding by Exploring Positional Relations-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Junli and Chen, Qizhi and Wang, Zhigang and Tang, Yiwen and Zhang, Yiting and Yan, Chi and Wang, Dong and Li, Xuelong and Zhao, Bin}, title = {AerialVG: A Challenging Benchmark for Aerial Visual Grounding by Exploring Positional Relations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5177-5187} }
DriveArena: A Closed-loop Generative Simulation Platform for Autonomous Driving-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Xuemeng and Wen, Licheng and Wei, Tiantian and Ma, Yukai and Mei, Jianbiao and Li, Xin and Lei, Wenjie and Fu, Daocheng and Cai, Pinlong and Dou, Min and He, Liang and Liu, Yong and Shi, Botian and Qiao, Yu}, title = {DriveArena: A Closed-loop Generative Simulation Platform for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26933-26943} }
InvRGB+L: Inverse Rendering of Complex Scenes with Unified Color and LiDAR Reflectance Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Xiaoxue and Chandaka, Bhargav and Lin, Chih-Hao and Zhang, Ya-Qin and Forsyth, David and Zhao, Hao and Wang, Shenlong}, title = {InvRGB+L: Inverse Rendering of Complex Scenes with Unified Color and LiDAR Reflectance Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27176-27186} }
DreamFuse: Adaptive Image Fusion with Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Junjia and Yan, Pengxiang and Liu, Jiyang and Wu, Jie and Wang, Zhao and Wang, Yitong and Lin, Liang and Li, Guanbin}, title = {DreamFuse: Adaptive Image Fusion with Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17292-17301} }
DALIP: Distribution Alignment-based Language-Image Pre-Training for Domain-Specific Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Junjie and Xie, Jiangtao and Zhang, Zhaolin and Wang, Qilong and Hu, Qinghua and Li, Peihua and Xu, Sen}, title = {DALIP: Distribution Alignment-based Language-Image Pre-Training for Domain-Specific Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2099-2109} }
Joint Semantic and Rendering Enhancements in 3D Gaussian Modeling with Anisotropic Local Encoding-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Jingming and Li, Chongyi and Wang, Shiqi and Kwong, Sam}, title = {Joint Semantic and Rendering Enhancements in 3D Gaussian Modeling with Anisotropic Local Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28354-28363} }
Unveiling the Invisible: Reasoning Complex Occlusions Amodally with AURA-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhixuan and Yoon, Hyunse and Lee, Sanghoon and Lin, Weisi}, title = {Unveiling the Invisible: Reasoning Complex Occlusions Amodally with AURA}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21927-21937} }
AGO: Adaptive Grounding for Open World 3D Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Peizheng and Ding, Shuxiao and Zhou, You and Zhang, Qingwen and Inak, Onat and Triess, Larissa and Hanselmann, Niklas and Cordts, Marius and Zell, Andreas}, title = {AGO: Adaptive Grounding for Open World 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8645-8655} }
Free2Guide: Training-Free Text-to-Video Alignment using Image LVLM-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jaemin and Kim, Bryan Sangwoo and Ye, Jong Chul}, title = {Free2Guide: Training-Free Text-to-Video Alignment using Image LVLM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17920-17929} }
T2I-Copilot: A Training-Free Multi-Agent Text-to-Image System for Enhanced Prompt Interpretation and Interactive Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Chieh-Yun and Shi, Min and Zhang, Gong and Shi, Humphrey}, title = {T2I-Copilot: A Training-Free Multi-Agent Text-to-Image System for Enhanced Prompt Interpretation and Interactive Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19396-19405} }
Lumina-Image 2.0: A Unified and Efficient Image Generative Framework-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2025_ICCV, author = {Qin, Qi and Zhuo, Le and Xin, Yi and Du, Ruoyi and Li, Zhen and Fu, Bin and Lu, Yiting and Li, Xinyue and Liu, Dongyang and Zhu, Xiangyang and Beddow, Will and Millon, Erwann and Perez, Victor and Wang, Wenhai and Qiao, Yu and Zhang, Bo and Liu, Xiaohong and Li, Hongsheng and Xu, Chang and Gao, Peng}, title = {Lumina-Image 2.0: A Unified and Efficient Image Generative Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20031-20042} }
mmCooper: A Multi-agent Multi-stage Communication-efficient and Collaboration-robust Cooperative Perception Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Bingyi and Teng, Jian and Xue, Hongfei and Wang, Enshu and Zhu, Chuanhui and Wang, Pu and Wu, Libing}, title = {mmCooper: A Multi-agent Multi-stage Communication-efficient and Collaboration-robust Cooperative Perception Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28396-28406} }
LightBSR: Towards Lightweight Blind Super-Resolution via Discriminative Implicit Degradation Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_ICCV, author = {Yuan, Jiang and Ma, Ji and Wang, Bo and Ke, Guanzhou and Hu, Weiming}, title = {LightBSR: Towards Lightweight Blind Super-Resolution via Discriminative Implicit Degradation Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11927-11936} }
Harnessing Uncertainty-aware Bounding Boxes for Unsupervised 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Ruiyang and Zhang, Hu and Zheng, Zhedong}, title = {Harnessing Uncertainty-aware Bounding Boxes for Unsupervised 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9230-9240} }
OCSplats: Observation Completeness Quantification and Label Noise Separation in 3DGS-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ling_2025_ICCV, author = {Ling, Han and Xu, Xian and Sun, Yinghui and Sun, Quansen}, title = {OCSplats: Observation Completeness Quantification and Label Noise Separation in 3DGS}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25680-25689} }
One Encoder to Rule them All: Representation Learning for Model-free Visual Reinforcement Learning using Fourier Neural Operators-
[pdf]
[bibtex]@InProceedings{Dutta_2025_ICCV, author = {Dutta, Parag and Ayyoob, Mohd and Bhatnagar, Shalabh and Dukkipati, Ambedkar}, title = {One Encoder to Rule them All: Representation Learning for Model-free Visual Reinforcement Learning using Fourier Neural Operators}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4818-4827} }
LOMM: Latest Object Memory Management for Temporally Consistent Video Instance Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Seunghun and Seo, Jiwan and Choi, Minwoo and Han, Kiljoon and Jeong, Jahoon and Durante, Zane and Adeli, Ehsan and Park, Sang Hyun and Im, Sunghoon}, title = {LOMM: Latest Object Memory Management for Temporally Consistent Video Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13719-13729} }
Open-Vocabulary HOI Detection with Interaction-aware Prompt and Concept Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2025_ICCV, author = {Lei, Ting and Yin, Shaofeng and Chen, Qingchao and Peng, Yuxin and Liu, Yang}, title = {Open-Vocabulary HOI Detection with Interaction-aware Prompt and Concept Calibration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23945-23957} }
DuCos: Duality Constrained Depth Super-Resolution via Foundation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_ICCV, author = {Yan, Zhiqiang and Wang, Zhengxue and Dong, Haoye and Li, Jun and Yang, Jian and Lee, Gim Hee}, title = {DuCos: Duality Constrained Depth Super-Resolution via Foundation Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8361-8371} }
ROVI: A VLM-LLM Re-Captioned Dataset for Open-Vocabulary Instance-Grounded Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_ICCV, author = {Peng, Cihang and Hou, Qiming and Ren, Zhong and Zhou, Kun}, title = {ROVI: A VLM-LLM Re-Captioned Dataset for Open-Vocabulary Instance-Grounded Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20204-20214} }
PBFG: A New Physically-Based Dataset and Removal of Lens Flares and Glares-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jie and Lee, Sungkil}, title = {PBFG: A New Physically-Based Dataset and Removal of Lens Flares and Glares}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5448-5457} }
Active Perception Meets Rule-Guided RL: A Two-Phase Approach for Precise Object Navigation in Complex Environments-
[pdf]
[bibtex]@InProceedings{Qin_2025_ICCV, author = {Qin, Liang and Wang, Min and Li, Peiwei and Zhou, Wengang and Li, Houqiang}, title = {Active Perception Meets Rule-Guided RL: A Two-Phase Approach for Precise Object Navigation in Complex Environments}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7603-7612} }
CAT: A Unified Click-and-Track Framework for Realistic Tracking-
[pdf]
[bibtex]@InProceedings{Yuan_2025_ICCV, author = {Yuan, Yongsheng and Zhao, Jie and Wang, Dong and Lu, Huchuan}, title = {CAT: A Unified Click-and-Track Framework for Realistic Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5690-5700} }
Differential-informed Sample Selection Accelerates Multimodal Contrastive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Zihua and Hong, Feng and Chen, Mengxi and Chen, Pengyi and Liu, Benyuan and Yao, Jiangchao and Zhang, Ya and Wang, Yanfeng}, title = {Differential-informed Sample Selection Accelerates Multimodal Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2930-2940} }
A View-consistent Sampling Method for Regularized Training of Neural Radiance Fields-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Aoxiang and Dumery, Corentin and Talabot, Nicolas and Fua, Pascal}, title = {A View-consistent Sampling Method for Regularized Training of Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25961-25971} }
LUT-Fuse: Towards Extremely Fast Infrared and Visible Image Fusion via Distillation to Learnable Look-Up Tables-
[pdf]
[supp]
[bibtex]@InProceedings{Yi_2025_ICCV, author = {Yi, Xunpeng and Zhang, Yibing and Xiang, Xinyu and Yan, Qinglong and Xu, Han and Ma, Jiayi}, title = {LUT-Fuse: Towards Extremely Fast Infrared and Visible Image Fusion via Distillation to Learnable Look-Up Tables}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14559-14568} }
AdsQA: Towards Advertisement Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Long_2025_ICCV, author = {Long, Xinwei and Tian, Kai and Xu, Peng and Jia, Guoli and Li, Jingxuan and Yang, Sa and Shao, Yihua and Zhang, Kaiyan and Jiang, Che and Xu, Hao and Liu, Yang and Ma, Jiaheng and Zhou, Bowen}, title = {AdsQA: Towards Advertisement Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23396-23407} }
Fine-grained Abnormality Prompt Learning for Zero-shot Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jiawen and Ong, Yew-Soon and Shen, Chunhua and Pang, Guansong}, title = {Fine-grained Abnormality Prompt Learning for Zero-shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22241-22251} }
Learning Dense Feature Matching via Lifting Single 2D Image to 3D Space-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Yingping and Hu, Yutao and Shao, Wenqi and Fu, Ying}, title = {Learning Dense Feature Matching via Lifting Single 2D Image to 3D Space}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6621-6631} }
DNF-Intrinsic: Deterministic Noise-Free Diffusion for Indoor Inverse Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Rongjia and Zhang, Qing and Long, Chengjiang and Zheng, Wei-Shi}, title = {DNF-Intrinsic: Deterministic Noise-Free Diffusion for Indoor Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10342-10352} }
Unified Open-World Segmentation with Multi-Modal Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yang and Yin, Yufei and Jing, Chenchen and Zhu, Muzhi and Chen, Hao and Xi, Yuling and Feng, Bo and Wang, Hao and Li, Shiyu and Shen, Chunhua}, title = {Unified Open-World Segmentation with Multi-Modal Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21557-21567} }
GausSim: Foreseeing Reality by Gaussian Simulator for Elastic Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2025_ICCV, author = {Shao, Yidi and Huang, Mu and Loy, Chen Change and Dai, Bo}, title = {GausSim: Foreseeing Reality by Gaussian Simulator for Elastic Objects}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7841-7850} }
ClaraVid: A Holistic Scene Reconstruction Benchmark From Aerial Perspective With Delentropy-Based Complexity Profiling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Beche_2025_ICCV, author = {Beche, Radu and Nedevschi, Sergiu}, title = {ClaraVid: A Holistic Scene Reconstruction Benchmark From Aerial Perspective With Delentropy-Based Complexity Profiling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26015-26025} }
TextMaster: A Unified Framework for Realistic Text Editing via Glyph-Style Dual-Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_ICCV, author = {Yan, Zhenyu and Wang, Jian and Wang, Aoqiang and Li, Yuhan and Shang, Wenxiang and Hangcheng, Zhu}, title = {TextMaster: A Unified Framework for Realistic Text Editing via Glyph-Style Dual-Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16112-16121} }
X2-Gaussian: 4D Radiative Gaussian Splatting for Continuous-time Tomographic Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Weihao and Cai, Yuanhao and Zha, Ruyi and Fan, Zhiwen and Li, Chenxin and Yuan, Yixuan}, title = {X2-Gaussian: 4D Radiative Gaussian Splatting for Continuous-time Tomographic Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24728-24738} }
FrameFusion: Combining Similarity and Importance for Video Token Reduction on Large Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_ICCV, author = {Fu, Tianyu and Liu, Tengxuan and Han, Qinghao and Dai, Guohao and Yan, Shengen and Yang, Huazhong and Ning, Xuefei and Wang, Yu}, title = {FrameFusion: Combining Similarity and Importance for Video Token Reduction on Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22654-22663} }
AURELIA: Test-time Reasoning Distillation in Audio-Visual LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chowdhury_2025_ICCV, author = {Chowdhury, Sanjoy and Gani, Hanan and Anand, Nishit and Nag, Sayan and Gao, Ruohan and Elhoseiny, Mohamed and Khan, Salman and Manocha, Dinesh}, title = {AURELIA: Test-time Reasoning Distillation in Audio-Visual LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22899-22910} }
Unraveling the Smoothness Properties of Diffusion Models: A Gaussian Mixture Perspective-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Yingyu and Sha, Zhizhou and Shi, Zhenmei and Song, Zhao and Wan, Mingda and Zhou, Yufa}, title = {Unraveling the Smoothness Properties of Diffusion Models: A Gaussian Mixture Perspective}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11436-11446} }
FB-Diff: Fourier Basis-guided Diffusion for Temporal Interpolation of 4D Medical Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{You_2025_ICCV, author = {You, Xin and Yang, Runze and Zhang, Chuyan and Jiang, Zhongliang and Yang, Jie and Navab, Nassir}, title = {FB-Diff: Fourier Basis-guided Diffusion for Temporal Interpolation of 4D Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28010-28020} }
UNIS: A Unified Framework for Achieving Unbiased Neural Implicit Surfaces in Volume Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_ICCV, author = {Deng, Junkai and Niu, Hanting and Li, Jiaze and Hou, Fei and He, Ying}, title = {UNIS: A Unified Framework for Achieving Unbiased Neural Implicit Surfaces in Volume Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27671-27680} }
Scendi Score: Prompt-Aware Diversity Evaluation via Schur Complement of CLIP Embeddings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ospanov_2025_ICCV, author = {Ospanov, Azim and Jalali, Mohammad and Farnia, Farzan}, title = {Scendi Score: Prompt-Aware Diversity Evaluation via Schur Complement of CLIP Embeddings}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16927-16937} }
Seeing the Unseen: A Semantic Alignment and Context-Aware Prompt Framework for Open-Vocabulary Camouflaged Object Segmentation-
[pdf]
[bibtex]@InProceedings{Ren_2025_ICCV, author = {Ren, Peng and Bai, Tian and Sun, Jing and Sun, Fuming}, title = {Seeing the Unseen: A Semantic Alignment and Context-Aware Prompt Framework for Open-Vocabulary Camouflaged Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23657-23666} }
HOLa: Zero-Shot HOI Detection with Low-Rank Decomposed VLM Feature Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2025_ICCV, author = {Lei, Qinqian and Wang, Bo and Tan, Robby T.}, title = {HOLa: Zero-Shot HOI Detection with Low-Rank Decomposed VLM Feature Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1825-1835} }
Context-Aware Academic Emotion Dataset and Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Luming and Xuan, Jingwen and Lou, Jiamin and Yu, Yonghui and Yang, Wenwu}, title = {Context-Aware Academic Emotion Dataset and Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13859-13868} }
Contrastive Test-Time Composition of Multiple LoRA Models for Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meral_2025_ICCV, author = {Meral, Tuna Han Salih and Simsar, Enis and Tombari, Federico and Yanardag, Pinar}, title = {Contrastive Test-Time Composition of Multiple LoRA Models for Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18090-18100} }
DiffRefine: Diffusion-based Proposal Specific Point Cloud Densification for Cross-Domain Object Detection-
[pdf]
[bibtex]@InProceedings{Shin_2025_ICCV, author = {Shin, Sangyun and He, Yuhang and Hou, Xinyu and Hodgson, Samuel and Markham, Andrew and Trigoni, Niki}, title = {DiffRefine: Diffusion-based Proposal Specific Point Cloud Densification for Cross-Domain Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4888-4897} }
Seeing Through Deepfakes: A Human-Inspired Framework for Multi-Face Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Juan and Fan, Shaojing and Sim, Terence}, title = {Seeing Through Deepfakes: A Human-Inspired Framework for Multi-Face Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14517-14527} }
Test-time Adaptation for Foundation Medical Segmentation Model Without Parametric Updates-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Kecheng and Luo, Xinyu and Qin, Tiexin and Liu, Jie and Liu, Hui and Lee, Victor Ho Fun and Yan, Hong and Li, Haoliang}, title = {Test-time Adaptation for Foundation Medical Segmentation Model Without Parametric Updates}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20075-20084} }
Hierarchy UGP: Hierarchy Unified Gaussian Primitive for Large-Scale Dynamic Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Hongyang and Yang, Qinglin and Wang, Jiawei and Xu, Zhen and Liu, Chen and Wang, Yida and Zhan, Kun and Bao, Hujun and Zhou, Xiaowei and Peng, Sida}, title = {Hierarchy UGP: Hierarchy Unified Gaussian Primitive for Large-Scale Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26252-26262} }
SMSTracker: Tri-path Score Mask Sigma Fusion for Multi-Modal Tracking-
[pdf]
[bibtex]@InProceedings{Chan_2025_ICCV, author = {Chan, Sixian and Li, Zedong and Li, Wenhao and Lu, Shijian and Shen, Chunhua and Zhang, Xiaoqin}, title = {SMSTracker: Tri-path Score Mask Sigma Fusion for Multi-Modal Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4766-4775} }
Distilling Parallel Gradients for Fast ODE Solvers of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Beier and Wang, Ruoyu and Zhao, Tong and Zhang, Hanwang and Zhang, Chi}, title = {Distilling Parallel Gradients for Fast ODE Solvers of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19557-19566} }
Aligning Constraint Generation with Design Intent in Parametric CAD-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Casey_2025_ICCV, author = {Casey, Evan and Zhang, Tianyu and Ishida, Shu and Thompson, John Roger and Khasahmadi, Amir and Lambourne, Joseph George and Jayaraman, Pradeep Kumar and Willis, Karl D.D.}, title = {Aligning Constraint Generation with Design Intent in Parametric CAD}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8613-8622} }
DisCo: Towards Distinct and Coherent Visual Encapsulation in Video MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Jiahe and Zheng, Rongkun and Wang, Yi and Wang, Helin and Zhao, Hengshuang}, title = {DisCo: Towards Distinct and Coherent Visual Encapsulation in Video MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21710-21720} }
PUMA: Empowering Unified MLLM with Multi-granular Visual Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_ICCV, author = {Fang, Rongyao and Duan, Chengqi and Wang, Kun and Li, Hao and Huang, Linjiang and Tian, Hao and Zeng, Xingyu and Zhao, Rui and Dai, Jifeng and Li, Hongsheng and Liu, Xihui}, title = {PUMA: Empowering Unified MLLM with Multi-granular Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15447-15457} }
Axis-level Symmetry Detection with Group-Equivariant Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Wongyun and Seo, Ahyun and Cho, Minsu}, title = {Axis-level Symmetry Detection with Group-Equivariant Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24791-24800} }
SMP-Attack: Boosting the Transferability of Feature Importance-based Adversarial Attack with Semantics-aware Multi-granularity Patchout-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Wen and Liu, Guodong and Ming, Di}, title = {SMP-Attack: Boosting the Transferability of Feature Importance-based Adversarial Attack with Semantics-aware Multi-granularity Patchout}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4444-4454} }
Referring to Any Person-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Qing and Wu, Lin and Zeng, Zhaoyang and Ren, Tianhe and Xiong, Yuda and Chen, Yihao and Qin, Liu and Zhang, Lei}, title = {Referring to Any Person}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21667-21678} }
GWM: Towards Scalable Gaussian World Models for Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Guanxing and Jia, Baoxiong and Li, Puhao and Chen, Yixin and Wang, Ziwei and Tang, Yansong and Huang, Siyuan}, title = {GWM: Towards Scalable Gaussian World Models for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9263-9274} }
Statistical Confidence Rescoring for Robust 3D Scene Graph Generation from Multi-View Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yeo_2025_ICCV, author = {Yeo, Qi Xun and Li, Yanyan and Lee, Gim Hee}, title = {Statistical Confidence Rescoring for Robust 3D Scene Graph Generation from Multi-View Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24999-25008} }
Draw Your Mind: Personalized Generation via Condition-Level Modeling in Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Hyungjin and Ahn, Seokho and Seo, Young-Duk}, title = {Draw Your Mind: Personalized Generation via Condition-Level Modeling in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17171-17180} }
CopyrightShield: Enhancing Diffusion Model Security Against Copyright Infringement Attacks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Zhixiang and Liang, Siyuan and Liu, Aishan and Tao, Dacheng}, title = {CopyrightShield: Enhancing Diffusion Model Security Against Copyright Infringement Attacks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19417-19426} }
PixTalk: Controlling Photorealistic Image Processing and Editing with Language-
[pdf]
[supp]
[bibtex]@InProceedings{Conde_2025_ICCV, author = {Conde, Marcos V. and Lu, Zihao and Timofte, Radu}, title = {PixTalk: Controlling Photorealistic Image Processing and Editing with Language}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19269-19279} }
Learning Streaming Video Representation via Multitask Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_ICCV, author = {Yan, Yibin and Xu, Jilan and Di, Shangzhe and Liu, Yikun and Shi, Yudi and Chen, Qirui and Li, Zeqian and Huang, Yifei and Xie, Weidi}, title = {Learning Streaming Video Representation via Multitask Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9900-9912} }
DASH: 4D Hash Encoding with Self-Supervised Decomposition for Real-Time Dynamic Scene Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Jie and Hu, Zhangchi and Wu, Peixi and Zhu, Huyue and Li, Hebei and Sun, Xiaoyan}, title = {DASH: 4D Hash Encoding with Self-Supervised Decomposition for Real-Time Dynamic Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26349-26359} }
Pose-Star: Anatomy-Aware Editing for Open-World Fashion Images-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Yuran and Ye, Mang}, title = {Pose-Star: Anatomy-Aware Editing for Open-World Fashion Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15822-15831} }
MamV2XCalib: V2X-based Target-less Infrastructure Camera Calibration with State Space Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yaoye and Wang, Zhe and Wang, Yan}, title = {MamV2XCalib: V2X-based Target-less Infrastructure Camera Calibration with State Space Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26696-26705} }
Beyond the Destination: A Novel Benchmark for Exploration-Aware Embodied Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Kaixuan and Liu, Yang and Chen, Weixing and Luo, Jingzhou and Chen, Ziliang and Pan, Ling and Li, Guanbin and Lin, Liang}, title = {Beyond the Destination: A Novel Benchmark for Exploration-Aware Embodied Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9091-9101} }
Semantic Equitable Clustering: A Simple and Effective Strategy for Clustering Vision Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Qihang and Huang, Huaibo and Chen, Mingrui and He, Ran}, title = {Semantic Equitable Clustering: A Simple and Effective Strategy for Clustering Vision Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4019-4028} }
HiP-AD: Hierarchical and Multi-Granularity Planning with Deformable Attention for Autonomous Driving in a Single Decoder-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2025_ICCV, author = {Tang, Yingqi and Xu, Zhuoran and Meng, Zhaotie and Cheng, Erkang}, title = {HiP-AD: Hierarchical and Multi-Granularity Planning with Deformable Attention for Autonomous Driving in a Single Decoder}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25605-25615} }
Visual Textualization for Image Prompted Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Yongjian and Zhou, Yang and Saiyin, Jiya and Wei, Bingzheng and Xu, Yan}, title = {Visual Textualization for Image Prompted Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20900-20910} }
Sliced Wasserstein Bridge for Open-Vocabulary Video Instance Segmentation-
[pdf]
[bibtex]@InProceedings{Qin_2025_ICCV, author = {Qin, Zheyun and Yu, Deng and Luo, Chuanchen and Chen, Zhumin}, title = {Sliced Wasserstein Bridge for Open-Vocabulary Video Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12470-12478} }
Moderating the Generalization of Score-based Generative Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Wan and Wang, He and Zhang, Xin and Guo, Dan and Fan, Zhaoxin and Diao, Yunfeng and Hong, Richang}, title = {Moderating the Generalization of Score-based Generative Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {360-369} }
A Token-level Text Image Foundation Model for Document Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2025_ICCV, author = {Guan, Tongkun and Wang, Zining and Fu, Pei and Guo, Zhengtao and Shen, Wei and Zhou, Kai and Yue, Tiezhu and Duan, Chen and Sun, Hao and Jiang, Qianyi and Luo, Junfeng and Yang, Xiaokang}, title = {A Token-level Text Image Foundation Model for Document Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23210-23220} }
Riemannian-Geometric Fingerprints of Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_ICCV, author = {Song, Hae Jin and Itti, Laurent}, title = {Riemannian-Geometric Fingerprints of Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11425-11435} }
NeRF Is a Valuable Assistant for 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_ICCV, author = {Fang, Shuangkang and Shen, I-Chao and Igarashi, Takeo and Wang, Yufeng and Wang, ZeSheng and Yang, Yi and Ding, Wenrui and Zhou, Shuchang}, title = {NeRF Is a Valuable Assistant for 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26230-26240} }
Towards Visual Localization Interoperability: Cross-Feature for Collaborative Visual Localization and Mapping-
[pdf]
[supp]
[bibtex]@InProceedings{Jaenal_2025_ICCV, author = {Jaenal, Alberto and Cubero, Paula Carb\'o and Ara\'ujo, Jos\'e and Mateus, Andr\'e}, title = {Towards Visual Localization Interoperability: Cross-Feature for Collaborative Visual Localization and Mapping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26783-26792} }
EasyControl: Adding Efficient and Flexible Control for Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yuxuan and Yuan, Yirui and Song, Yiren and Wang, Haofan and Liu, Jiaming}, title = {EasyControl: Adding Efficient and Flexible Control for Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19513-19524} }
PerLDiff: Controllable Street View Synthesis Using Perspective-Layout Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jinhua and Sheng, Hualian and Cai, Sijia and Deng, Bing and Liang, Qiao and Li, Wen and Fu, Ying and Ye, Jieping and Gu, Shuhang}, title = {PerLDiff: Controllable Street View Synthesis Using Perspective-Layout Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26306-26315} }
DocThinker: Explainable Multimodal Large Language Models with Rule-based Reinforcement Learning for Document Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Wenwen and Yang, Zhibo and Liu, Yuliang and Bai, Xiang}, title = {DocThinker: Explainable Multimodal Large Language Models with Rule-based Reinforcement Learning for Document Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {837-847} }
I2VControl: Disentangled and Unified Video Motion Synthesis Control-
[pdf]
[arXiv]
[bibtex]@InProceedings{Feng_2025_ICCV, author = {Feng, Wanquan and Qi, Tianhao and Liu, Jiawei and Sun, Mingzhen and Tu, Pengqi and Ma, Tianxiang and Dai, Fei and Zhao, Songtao and Zhou, Siyu and He, Qian}, title = {I2VControl: Disentangled and Unified Video Motion Synthesis Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14051-14060} }
Democratizing Text-to-Image Masked Generative Models with Compact Text-Aware One-Dimensional Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Dongwon and He, Ju and Yu, Qihang and Yang, Chenglin and Shen, Xiaohui and Kwak, Suha and Chen, Liang-Chieh}, title = {Democratizing Text-to-Image Masked Generative Models with Compact Text-Aware One-Dimensional Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18442-18452} }
Aligning Global Semantics and Local Textures in Generative Video Enhancement-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Zhikai and Long, Fuchen and Qiu, Zhaofan and Yao, Ting and Zhou, Wengang and Luo, Jiebo and Mei, Tao}, title = {Aligning Global Semantics and Local Textures in Generative Video Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17087-17096} }
VISO: Accelerating In-orbit Object Detection with Language-Guided Mask Learning and Sparse Inference-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Meiqi and Qiu, Han}, title = {VISO: Accelerating In-orbit Object Detection with Language-Guided Mask Learning and Sparse Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23300-23310} }
Reverse Convolution and Its Applications to Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Xuhong and Liu, Shiqi and Zhang, Kai and Tai, Ying and Yang, Jian and Zeng, Hui and Zhang, Lei}, title = {Reverse Convolution and Its Applications to Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10507-10516} }
ATCTrack: Aligning Target-Context Cues with Dynamic Target States for Robust Vision-Language Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_ICCV, author = {Feng, Xiaokun and Hu, Shiyu and Li, Xuchen and Zhang, Dailing and Wu, Meiqi and Zhang, Jing and Chen, Xiaotang and Huang, Kaiqi}, title = {ATCTrack: Aligning Target-Context Cues with Dynamic Target States for Robust Vision-Language Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19850-19861} }
Reminiscence Attack on Residuals: Exploiting Approximate Machine Unlearning for Privacy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_ICCV, author = {Xiao, Yaxin and Ye, Qingqing and Hu, Li and Zheng, Huadi and Hu, Haibo and Liang, Zi and Li, Haoyang and Jiao, Yijie}, title = {Reminiscence Attack on Residuals: Exploiting Approximate Machine Unlearning for Privacy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3058-3068} }
SeaS: Few-shot Industrial Anomaly Image Generation with Separation and Sharing Fine-tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2025_ICCV, author = {Dai, Zhewei and Zeng, Shilei and Liu, Haotian and Li, Xurui and Xue, Feng and Zhou, Yu}, title = {SeaS: Few-shot Industrial Anomaly Image Generation with Separation and Sharing Fine-tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23135-23144} }
CreatiLayout: Siamese Multimodal Diffusion Transformer for Creative Layout-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hui and Hong, Dexiang and Wang, Yitong and Shao, Jie and Wu, Xinglong and Wu, Zuxuan and Jiang, Yu-Gang}, title = {CreatiLayout: Siamese Multimodal Diffusion Transformer for Creative Layout-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18487-18497} }
AMD: Adaptive Momentum and Decoupled Contrastive Learning Framework for Robust Long-Tail Trajectory Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rao_2025_ICCV, author = {Rao, Bin and Liao, Haicheng and Guan, Yanchen and Wang, Chengyue and Wang, Bonan and Zhang, Jiaxun and Li, Zhenning}, title = {AMD: Adaptive Momentum and Decoupled Contrastive Learning Framework for Robust Long-Tail Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28849-28858} }
Music Grounding by Short Video-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xin_2025_ICCV, author = {Xin, Zijie and Wang, Minquan and Liu, Jingyu and Chen, Quan and Ma, Ye and Jiang, Peng and Li, Xirong}, title = {Music Grounding by Short Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22285-22293} }
DyWA: Dynamics-adaptive World Action Model for Generalizable Non-prehensile Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lyu_2025_ICCV, author = {Lyu, Jiangran and Li, Ziming and Shi, Xuesong and Xu, Chaoyi and Wang, Yizhou and Wang, He}, title = {DyWA: Dynamics-adaptive World Action Model for Generalizable Non-prehensile Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11058-11068} }
POMATO: Marrying Pointmap Matching with Temporal Motions for Dynamic 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Songyan and Ge, Yongtao and Tian, Jinyuan and Xu, Guangkai and Chen, Hao and Lv, Chen and Shen, Chunhua}, title = {POMATO: Marrying Pointmap Matching with Temporal Motions for Dynamic 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5680-5689} }
Flow Stochastic Segmentation Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{De_Sousa_Ribeiro_2025_ICCV, author = {De Sousa Ribeiro, Fabio and Todd, Omar and Jones, Charles and Kori, Avinash and Mehta, Raghav and Glocker, Ben}, title = {Flow Stochastic Segmentation Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14754-14765} }
Learning Large Motion Estimation from Intermediate Representations with a High-Resolution Optical Flow Dataset Featuring Long-Range Dynamic Motion-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2025_ICCV, author = {Cho, Hoonhee and Jeong, Yuhwan and Yoon, Kuk-Jin}, title = {Learning Large Motion Estimation from Intermediate Representations with a High-Resolution Optical Flow Dataset Featuring Long-Range Dynamic Motion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6176-6187} }
Towards Higher Effective Rank in Parameter-Efficient Fine-tuning using Khatri-Rao Product-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Albert_2025_ICCV, author = {Albert, Paul and Zhang, Frederic Z. and Saratchandran, Hemanth and van den Hengel, Anton and Abbasnejad, Ehsan}, title = {Towards Higher Effective Rank in Parameter-Efficient Fine-tuning using Khatri-Rao Product}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1292-1302} }
USP: Unified Self-Supervised Pretraining for Image Generation and Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chu_2025_ICCV, author = {Chu, Xiangxiang and Li, Renda and Wang, Yong}, title = {USP: Unified Self-Supervised Pretraining for Image Generation and Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18475-18486} }
Go to Zero: Towards Zero-shot Motion Generation with Million-scale Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Ke and Lu, Shunlin and Dai, Minyue and Yu, Runyi and Xiao, Lixing and Dou, Zhiyang and Dong, Junting and Ma, Lizhuang and Wang, Jingbo}, title = {Go to Zero: Towards Zero-shot Motion Generation with Million-scale Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13336-13348} }
Beyond Brain Decoding: Visual-Semantic Reconstructions to Mental Creation Extension Based on fMRI-
[pdf]
[bibtex]@InProceedings{Jing_2025_ICCV, author = {Jing, Haodong and Jiang, Dongyao and Ma, Yongqiang and Hua, Haibo and Huang, Bo and Zheng, Nanning}, title = {Beyond Brain Decoding: Visual-Semantic Reconstructions to Mental Creation Extension Based on fMRI}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19258-19268} }
DAViD: Modeling Dynamic Affordance of 3D Objects Using Pre-trained Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Hyeonwoo and Baik, Sangwon and Joo, Hanbyul}, title = {DAViD: Modeling Dynamic Affordance of 3D Objects Using Pre-trained Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10330-10341} }
SKALD: Learning-Based Shot Assembly for Coherent Multi-Shot Video Creation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Chen-Yi and Tanjim, Md Mehrab and Dasgupta, Ishita and Sarkhel, Somdeb and Wu, Gang and Mitra, Saayan and Chaterji, Somali}, title = {SKALD: Learning-Based Shot Assembly for Coherent Multi-Shot Video Creation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17859-17868} }
When Confidence Fails: Revisiting Pseudo-Label Selection in Semi-supervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Pan and Liu, Jinshi}, title = {When Confidence Fails: Revisiting Pseudo-Label Selection in Semi-supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21874-21884} }
Preacher: Paper-to-Video Agentic System-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Jingwei and Yang, Ling and Luo, Hao and Wang, Fan and Li, Hongyan and Wang, Mengdi}, title = {Preacher: Paper-to-Video Agentic System}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17129-17139} }
More Reliable Pseudo-labels, Better Performance: A Generalized Approach to Single Positive Multi-label Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2025_ICCV, author = {Tran, Luong and Vo, Thieu and Nguyen, Anh and Dinh, Sang and Nguyen, Van}, title = {More Reliable Pseudo-labels, Better Performance: A Generalized Approach to Single Positive Multi-label Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1349-1358} }
Detection, Pose Estimation and Segmentation for Multiple Bodies: Closing the Virtuous Circle-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Purkrabek_2025_ICCV, author = {Purkrabek, Miroslav and Matas, Jiri}, title = {Detection, Pose Estimation and Segmentation for Multiple Bodies: Closing the Virtuous Circle}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9004-9013} }
CalliReader: Contextualizing Chinese Calligraphy via an Embedding-Aligned Vision-Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Yuxuan and Tang, Jiaqi and Huang, Chenyi and Hao, Feiyang and Lian, Zhouhui}, title = {CalliReader: Contextualizing Chinese Calligraphy via an Embedding-Aligned Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23030-23040} }
NeuralSVG: An Implicit Representation for Text-to-Vector Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Polaczek_2025_ICCV, author = {Polaczek, Sagi and Alaluf, Yuval and Richardson, Elad and Vinker, Yael and Cohen-Or, Daniel}, title = {NeuralSVG: An Implicit Representation for Text-to-Vector Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15458-15468} }
Hierarchical Cross-modal Prompt Learning for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Hao and Yang, Shunzhi and He, Zhuoxin and Yang, Jinfeng and Huang, Zhenhua}, title = {Hierarchical Cross-modal Prompt Learning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1891-1901} }
RIPE: Reinforcement Learning on Unlabeled Image Pairs for Robust Keypoint Extraction-
[pdf]
[supp]
[bibtex]@InProceedings{Kunzel_2025_ICCV, author = {K\"unzel, Johannes and Hilsmann, Anna and Eisert, Peter}, title = {RIPE: Reinforcement Learning on Unlabeled Image Pairs for Robust Keypoint Extraction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4868-4877} }
Can We Achieve Efficient Diffusion Without Self-Attention? Distilling Self-Attention into Convolutions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Ziyi and Zhou, Chengxing and Deng, Weijian and Wei, Pengxu and Ji, Xiangyang and Lin, Liang}, title = {Can We Achieve Efficient Diffusion Without Self-Attention? Distilling Self-Attention into Convolutions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17401-17410} }
Activation Subspaces for Out-of-Distribution Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zongur_2025_ICCV, author = {Z\"ong\"ur, Bar{\i}\c{s} and Hesse, Robin and Roth, Stefan}, title = {Activation Subspaces for Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3509-3519} }
SIMS: Simulating Stylized Human-Scene Interactions with Retrieval-Augmented Script Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Wenjia and Pan, Liang and Dou, Zhiyang and Mei, Jidong and Liao, Zhouyingcheng and Lou, Yuke and Wu, Yifan and Yang, Lei and Wang, Jingbo and Komura, Taku}, title = {SIMS: Simulating Stylized Human-Scene Interactions with Retrieval-Augmented Script Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14117-14127} }
LOTS of Fashion! Multi-Conditioning for Image Generation via Sketch-Text Pairing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Girella_2025_ICCV, author = {Girella, Federico and Talon, Davide and Liu, Ziyue and Ruan, Zanxi and Wang, Yiming and Cristani, Marco}, title = {LOTS of Fashion! Multi-Conditioning for Image Generation via Sketch-Text Pairing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19711-19720} }
DuoCLR: Dual-Surrogate Contrastive Learning for Skeleton-based Human Action Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tian_2025_ICCV, author = {Tian, Haitao}, title = {DuoCLR: Dual-Surrogate Contrastive Learning for Skeleton-based Human Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13772-13782} }
Plug-in Feedback Self-adaptive Attention in CLIP for Training-free Open-Vocabulary Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chi_2025_ICCV, author = {Chi, Zhixiang and Wu, Yanan and Gu, Li and Liu, Huan and Wang, Ziqiang and Zhang, Yang and Wang, Yang and Plataniotis, Konstantinos}, title = {Plug-in Feedback Self-adaptive Attention in CLIP for Training-free Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22815-22825} }
Street Gaussians without 3D Object Tracker-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Ruida and Li, Chengxi and Zhang, Chenyangguang and Liu, Xingyu and Yuan, Haili and Li, Yanyan and Ji, Xiangyang and Lee, Gim Hee}, title = {Street Gaussians without 3D Object Tracker}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25722-25734} }
AllGCD: Leveraging All Unlabeled Data for Generalized Category Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Xinzi and Chen, Ke and Yang, Feidiao and Zheng, Xiawu and Tian, Yonghong and Lu, Yutong}, title = {AllGCD: Leveraging All Unlabeled Data for Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3293-3303} }
Principles of Visual Tokens for Efficient Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hao_2025_ICCV, author = {Hao, Xinyue and Li, Gen and Gowda, Shreyank N and Fisher, Robert B. and Huang, Jonathan and Arnab, Anurag and Sevilla-Lara, Laura}, title = {Principles of Visual Tokens for Efficient Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21254-21264} }
Boosting Domain Generalized and Adaptive Detection with Diffusion Models: Fitness, Generalization, and Transferability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Boyong and Ji, Yuxiang and Tan, Zhuoyue and Wu, Liaoni}, title = {Boosting Domain Generalized and Adaptive Detection with Diffusion Models: Fitness, Generalization, and Transferability}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1912-1923} }
Power of Cooperative Supervision: Multiple Teachers Framework for Advanced 3D Semi-Supervised Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Jin-Hee and Lee, Jae-Keun and Kim, Jeseok and Soon, Kwon}, title = {Power of Cooperative Supervision: Multiple Teachers Framework for Advanced 3D Semi-Supervised Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6994-7003} }
SAM Encoder Breach by Adversarial Simplicial Complex Triggers Downstream Model Failures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2025_ICCV, author = {Qin, Yi and Wang, Rui and Huang, Tao and Xiao, Tong and Jing, Liping}, title = {SAM Encoder Breach by Adversarial Simplicial Complex Triggers Downstream Model Failures}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10624-10634} }
ReassembleNet: Learnable Keypoints and Diffusion for 2D Fresco Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Islam_2025_ICCV, author = {Islam, Adeela and Fiorini, Stefano and James, Stuart and Morerio, Pietro and Del Bue, Alessio}, title = {ReassembleNet: Learnable Keypoints and Diffusion for 2D Fresco Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9048-9057} }
Similarity Memory Prior is All You Need for Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_ICCV, author = {Tang, Hao and Guo, Zhiqing and Wang, Liejun and Liu, Chao}, title = {Similarity Memory Prior is All You Need for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23009-23018} }
MMGeo: Multimodal Compositional Geo-Localization for UAVs-
[pdf]
[bibtex]@InProceedings{Ji_2025_ICCV, author = {Ji, Yuxiang and He, Boyong and Tan, Zhuoyue and Wu, Liaoni}, title = {MMGeo: Multimodal Compositional Geo-Localization for UAVs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25165-25175} }
Tracing Copied Pixels and Regularizing Patch Affinity in Copy Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Yichen and Nie, Siwei and Lu, Minlong and Yang, Xudong and Zhang, Xiaobo and Zhang, Peng}, title = {Tracing Copied Pixels and Regularizing Patch Affinity in Copy Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19248-19257} }
InstructSeg: Unifying Instructed Visual Segmentation with Multi-modal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Cong and Zhong, Yujie and Tan, Haoxian and Zeng, Yingsen and Liu, Yong and Wang, Hongfa and Yang, Yujiu}, title = {InstructSeg: Unifying Instructed Visual Segmentation with Multi-modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20193-20203} }
Multispectral Demosaicing via Dual Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tedla_2025_ICCV, author = {Tedla, SaiKiran and Lee, Junyong and Yang, Beixuan and Afifi, Mahmoud and Brown, Michael S.}, title = {Multispectral Demosaicing via Dual Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5405-5414} }
Reflect-DiT: Inference-Time Scaling for Text-to-Image Diffusion Transformers via In-Context Reflection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Shufan and Kallidromitis, Konstantinos and Gokul, Akash and Koneru, Arsh and Kato, Yusuke and Kozuka, Kazuki and Grover, Aditya}, title = {Reflect-DiT: Inference-Time Scaling for Text-to-Image Diffusion Transformers via In-Context Reflection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15657-15668} }
HAMoBE: Hierarchical and Adaptive Mixture of Biometric Experts for Video-based Person ReID-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2025_ICCV, author = {Su, Yiyang and Shi, Yunping and Liu, Feng and Liu, Xiaoming}, title = {HAMoBE: Hierarchical and Adaptive Mixture of Biometric Experts for Video-based Person ReID}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11525-11536} }
TopicGeo: An Efficient Unified Framework for Geolocation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Xin and Wang, Xinlin and Gou, Shuiping}, title = {TopicGeo: An Efficient Unified Framework for Geolocation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8241-8251} }
Neighboring Autoregressive Modeling for Efficient Visual Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Yefei and He, Yuanyu and He, Shaoxuan and Chen, Feng and Zhou, Hong and Zhang, Kaipeng and Zhuang, Bohan}, title = {Neighboring Autoregressive Modeling for Efficient Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19000-19010} }
Beyond Simple Edits: Composed Video Retrieval with Dense Modifications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thawakar_2025_ICCV, author = {Thawakar, Omkar and Demidov, Dmitry and Thawkar, Ritesh and Anwer, Rao Muhammad and Shah, Mubarak and Khan, Fahad Shahbaz and Khan, Salman}, title = {Beyond Simple Edits: Composed Video Retrieval with Dense Modifications}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20435-20444} }
MagicColor: Multi-Instance Sketch Colorization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yinhan and Ma, Yue and Wang, Bingyuan and Chen, Qifeng and Wang, Zeyu}, title = {MagicColor: Multi-Instance Sketch Colorization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15205-15217} }
Active Learning Meets Foundation Models: Fast Remote Sensing Data Annotation for Object Detection-
[pdf]
[bibtex]@InProceedings{Burges_2025_ICCV, author = {Burges, Marvin and Dias, Philipe Ambrozio and Woody, Carson and Walters, Sarah and Lunga, Dalton}, title = {Active Learning Meets Foundation Models: Fast Remote Sensing Data Annotation for Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6058-6068} }
Im2Haircut: Single-view Strand-based Hair Reconstruction for Human Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sklyarova_2025_ICCV, author = {Sklyarova, Vanessa and Zakharov, Egor and Prinzler, Malte and Becherini, Giorgio and Black, Michael J. and Thies, Justus}, title = {Im2Haircut: Single-view Strand-based Hair Reconstruction for Human Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10656-10665} }
Leveraging Prior Knowledge of Diffusion Model for Person Search-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Giyeol and Yang, Sooyoung and Oh, Jihyong and Kang, Myungjoo and Eom, Chanho}, title = {Leveraging Prior Knowledge of Diffusion Model for Person Search}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20301-20312} }
PS3: A Multimodal Transformer Integrating Pathology Reports with Histology Images and Biological Pathways for Cancer Survival Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Raza_2025_ICCV, author = {Raza, Manahil and Azam, Ayesha and Qaiser, Talha and Rajpoot, Nasir}, title = {PS3: A Multimodal Transformer Integrating Pathology Reports with Histology Images and Biological Pathways for Cancer Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22175-22186} }
Debiasing Trace Guidance: Top-down Trace Distillation and Bottom-up Velocity Alignment for Unsupervised Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Xingjian and Chai, Li and Chen, Jiming}, title = {Debiasing Trace Guidance: Top-down Trace Distillation and Bottom-up Velocity Alignment for Unsupervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22989-22998} }
EDM: Efficient Deep Feature Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Xi and Rao, Tong and Pan, Cihui}, title = {EDM: Efficient Deep Feature Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26198-26208} }
Progressive Growing of Video Tokenizers for Temporally Compact Latent Spaces-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mahapatra_2025_ICCV, author = {Mahapatra, Aniruddha and Mai, Long and Bourgin, David and Zhang, Yitian and Liu, Feng}, title = {Progressive Growing of Video Tokenizers for Temporally Compact Latent Spaces}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17629-17639} }
Stronger, Steadier & Superior: Geometric Consistency in Depth VFM Forges Domain Generalized Semantic Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Siyu and Han, Ting and Zhang, Changshe and Luo, Xin and Wu, Meiliu and Cai, Guorong and Su, Jinhe}, title = {Stronger, Steadier \& Superior: Geometric Consistency in Depth VFM Forges Domain Generalized Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8285-8295} }
CorrCLIP: Reconstructing Patch Correlations in CLIP for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Dengke and Liu, Fagui and Tang, Quan}, title = {CorrCLIP: Reconstructing Patch Correlations in CLIP for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24677-24687} }
LBM: Latent Bridge Matching for Fast Image-to-Image Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chadebec_2025_ICCV, author = {Chadebec, Cl\'ement and Tasar, Onur and Sreetharan, Sanjeev and Aubin, Benjamin}, title = {LBM: Latent Bridge Matching for Fast Image-to-Image Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29086-29098} }
DreamRenderer: Taming Multi-Instance Attribute Control in Large-Scale Text-to-Image Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Dewei and Li, Mingwei and Yang, Zongxin and Yang, Yi}, title = {DreamRenderer: Taming Multi-Instance Attribute Control in Large-Scale Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16712-16722} }
Towards a 3D Transfer-based Black-box Attack via Critical Feature Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pang_2025_ICCV, author = {Pang, Shuchao and Chen, Zhenghan and Zhang, Shen and Lu, Liming and Liang, Siyuan and Du, Anan and Zhou, Yongbin}, title = {Towards a 3D Transfer-based Black-box Attack via Critical Feature Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26912-26922} }
TurboVSR: Fantastic Video Upscalers and Where to Find Them-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zhongdao and Zhao, Guodongfang and Ren, Jingjing and Feng, Bailan and Zhang, Shifeng and Li, Wenbo}, title = {TurboVSR: Fantastic Video Upscalers and Where to Find Them}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18132-18142} }
LLM-Assisted Semantic Guidance for Sparsely Annotated Remote Sensing Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liao_2025_ICCV, author = {Liao, Wei and Xu, Chunyan and Wang, Chenxu and Cui, Zhen}, title = {LLM-Assisted Semantic Guidance for Sparsely Annotated Remote Sensing Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22519-22528} }
NuPlanQA: A Large-Scale Dataset and Benchmark for Multi-View Driving Scene Understanding in Multi-Modal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Sung-Yeon and Cui, Can and Ma, Yunsheng and Moradipari, Ahmadreza and Gupta, Rohit and Han, Kyungtae and Wang, Ziran}, title = {NuPlanQA: A Large-Scale Dataset and Benchmark for Multi-View Driving Scene Understanding in Multi-Modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8066-8076} }
Geometric Alignment and Prior Modulation for View-Guided Point Cloud Completion on Unseen Categories-
[pdf]
[bibtex]@InProceedings{Xiu_2025_ICCV, author = {Xiu, Jingqiao and Li, Yicong and Zhao, Na and Fang, Han and Wang, Xiang and Yao, Angela}, title = {Geometric Alignment and Prior Modulation for View-Guided Point Cloud Completion on Unseen Categories}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27435-27444} }
ODDR: Outlier Detection & Dimension Reduction Based Defense Against Adversarial Patches-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chattopadhyay_2025_ICCV, author = {Chattopadhyay, Nandish and Guesmi, Amira and Hanif, Muhammad Abdullah and Ouni, Bassem and Shafique, Muhammad}, title = {ODDR: Outlier Detection \& Dimension Reduction Based Defense Against Adversarial Patches}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22999-23008} }
Mind the Gap: Preserving and Compensating for the Modality Gap in CLIP-Based Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Linlan and Cao, Xusheng and Lu, Haori and Meng, Yifan and Yang, Fei and Liu, Xialei}, title = {Mind the Gap: Preserving and Compensating for the Modality Gap in CLIP-Based Continual Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3777-3786} }
Stochastic Gradient Estimation for Higher-Order Differentiable Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zican and Fischer, Michael and Ritschel, Tobias}, title = {Stochastic Gradient Estimation for Higher-Order Differentiable Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28198-28206} }
PolGS: Polarimetric Gaussian Splatting for Fast Reflective Surface Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Yufei and Tie, Bowen and Guo, Heng and Lyu, Youwei and Li, Si and Shi, Boxin and Jia, Yunpeng and Ma, Zhanyu}, title = {PolGS: Polarimetric Gaussian Splatting for Fast Reflective Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28073-28082} }
AdvDreamer Unveils: Are Vision-Language Models Truly Ready for Real-World 3D Variations?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ruan_2025_ICCV, author = {Ruan, Shouwei and Liu, Hanqing and Huang, Yao and Wang, Xiaoqi and Kang, Caixin and Su, Hang and Dong, Yinpeng and Wei, Xingxing}, title = {AdvDreamer Unveils: Are Vision-Language Models Truly Ready for Real-World 3D Variations?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7894-7904} }
ERNet: Efficient Non-Rigid Registration Network for Point Sequences-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Guangzhao and Xiao, Yuxi and Xu, Zhen and Zhou, Xiaowei and Peng, Sida}, title = {ERNet: Efficient Non-Rigid Registration Network for Point Sequences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27156-27165} }
Does Your Vision-Language Model Get Lost in the Long Video Sampling Dilemma?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_ICCV, author = {Qu, Tianyuan and Tang, Longxiang and Peng, Bohao and Yang, Senqiao and Yu, Bei and Jia, Jiaya}, title = {Does Your Vision-Language Model Get Lost in the Long Video Sampling Dilemma?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20889-20899} }
Not all Views are Created Equal: Analyzing Viewpoint Instabilities in Vision Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Michalkiewicz_2025_ICCV, author = {Michalkiewicz, Mateusz and Bai, Sheena and Baktashmotlagh, Mahsa and Jampani, Varun and Balakrishnan, Guha}, title = {Not all Views are Created Equal: Analyzing Viewpoint Instabilities in Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9113-9123} }
HumorDB: Can AI understand graphical humor?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jain_2025_ICCV, author = {Jain, Vedaant V and Kreiman, Gabriel and dos Santos Alves Feitosa, Felipe}, title = {HumorDB: Can AI understand graphical humor?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {604-613} }
Towards Safer and Understandable Driver Intention Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karuppasamy_2025_ICCV, author = {Karuppasamy, Mukilan and Gangisetty, Shankar and Rai, Shyam Nandan and Masone, Carlo and Jawahar, C V}, title = {Towards Safer and Understandable Driver Intention Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25378-25387} }
Generative Gaussian Splatting: Generating 3D Scenes with Video Diffusion Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Schwarz_2025_ICCV, author = {Schwarz, Katja and M\"uller, Norman and Kontschieder, Peter}, title = {Generative Gaussian Splatting: Generating 3D Scenes with Video Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27510-27520} }
AffordDexGrasp: Open-set Language-guided Dexterous Grasp with Generalizable-Instructive Affordance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Yi-Lin and Lin, Mu and Lin, Yuhao and Jiang, Jian-Jian and Wu, Xiao-Ming and Zeng, Ling-An and Zheng, Wei-Shi}, title = {AffordDexGrasp: Open-set Language-guided Dexterous Grasp with Generalizable-Instructive Affordance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11818-11828} }
AR-1-to-3: Single Image to Consistent 3D Object via Next-View Prediction-
[pdf]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xuying and Zhou, Yupeng and Wang, Kai and Wang, Yikai and Li, Zhen and Jiao, Shaohui and Zhou, Daquan and Hou, Qibin and Cheng, Ming-Ming}, title = {AR-1-to-3: Single Image to Consistent 3D Object via Next-View Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26273-26283} }
Inverse 3D Microscopy Rendering for Cell Shape Inference with Active Mesh-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ichbiah_2025_ICCV, author = {Ichbiah, Sacha and Sinha, Anshuman and Delbary, Fabrice and Turlier, Herv\'e}, title = {Inverse 3D Microscopy Rendering for Cell Shape Inference with Active Mesh}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26987-26998} }
NeurOp-Diff: Continuous Remote Sensing Image Super-Resolution via Neural Operator Diffusion-
[pdf]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Zihao and Tang, Yuzhi and Xu, Bowen and Li, Qingquan}, title = {NeurOp-Diff: Continuous Remote Sensing Image Super-Resolution via Neural Operator Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12491-12501} }
DC-TTA: Divide-and-Conquer Framework for Test-Time Adaptation of Interactive Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jihun and Kwon, Hoyong and Kweon, Hyeokjun and Jeong, Wooseong and Yoon, Kuk-Jin}, title = {DC-TTA: Divide-and-Conquer Framework for Test-Time Adaptation of Interactive Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23279-23289} }
PROL : Rehearsal Free Continual Learning in Streaming Data via Prompt Online Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma'sum_2025_ICCV, author = {Ma'sum, M. Anwar and Pratama, Mahardhika and Ramasamy, Savitha and Liu, Lin and Habibullah, Habibullah and Kowalczyk, Ryszard}, title = {PROL : Rehearsal Free Continual Learning in Streaming Data via Prompt Online Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2471-2481} }
Generalization-Preserved Learning: Closing the Backdoor to Catastrophic Forgetting in Continual Deepfake Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xueyi and Zhu, Peiyin and Zhang, Chengwei and Yan, Zhiyuan and Cheng, Jikang and Lao, Mingrui and Cai, Siqi and Guo, Yanming}, title = {Generalization-Preserved Learning: Closing the Backdoor to Catastrophic Forgetting in Continual Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3798-3808} }
JailbreakDiffBench: A Comprehensive Benchmark for Jailbreaking Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2025_ICCV, author = {Jin, Xiaolong and Weng, Zixuan and Guo, Hanxi and Yin, Chenlong and Cheng, Siyuan and Shen, Guangyu and Zhang, Xiangyu}, title = {JailbreakDiffBench: A Comprehensive Benchmark for Jailbreaking Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16461-16471} }
InfiniteYou: Flexible Photo Recrafting While Preserving Your Identity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Liming and Yan, Qing and Jia, Yumin and Liu, Zichuan and Kang, Hao and Lu, Xin}, title = {InfiniteYou: Flexible Photo Recrafting While Preserving Your Identity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10898-10907} }
GeoMan: Temporally Consistent Human Geometry Estimation using Image-to-Video Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Gwanghyun and Li, Xueting and Yuan, Ye and Nagano, Koki and Li, Tianye and Kautz, Jan and Chun, Se Young and Iqbal, Umar}, title = {GeoMan: Temporally Consistent Human Geometry Estimation using Image-to-Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7451-7461} }
NAVER: A Neuro-Symbolic Compositional Automaton for Visual Grounding with Explicit Logic Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2025_ICCV, author = {Cai, Zhixi and Ke, Fucai and Jahangard, Simindokht and de la Banda, Maria Garcia and Haffari, Reza and Stuckey, Peter J. and Rezatofighi, Hamid}, title = {NAVER: A Neuro-Symbolic Compositional Automaton for Visual Grounding with Explicit Logic Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24078-24089} }
Generative Adversarial Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Jun_2025_ICCV, author = {Jun, U-Chae and Ko, Jaeeun and Kang, Jiwoo}, title = {Generative Adversarial Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16786-16796} }
GaussianVideo: Efficient Video Representation via Hierarchical Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bond_2025_ICCV, author = {Bond, Andrew and Wang, Jui-Hsien and Mai, Long and Erdem, Erkut and Erdem, Aykut}, title = {GaussianVideo: Efficient Video Representation via Hierarchical Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7187-7196} }
MergeOcc: Bridge the Domain Gap between Different LiDARs for Robust Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Zikun and Xu, Shaobing}, title = {MergeOcc: Bridge the Domain Gap between Different LiDARs for Robust Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26539-26548} }
AutoScape: Geometry-Consistent Long-Horizon Scene Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Jiacheng and Jiang, Ziyu and Liang, Mingfu and Zhuang, Bingbing and Su, Jong-Chyi and Garg, Sparsh and Wu, Ying and Chandraker, Manmohan}, title = {AutoScape: Geometry-Consistent Long-Horizon Scene Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25700-25711} }
Few-Shot Image Quality Assessment via Adaptation of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Xudong and Huang, Zihao and Zhang, Yan and Shen, Yunhang and Li, Ke and Zheng, Xiawu and Cao, Liujuan and Ji, Rongrong}, title = {Few-Shot Image Quality Assessment via Adaptation of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10442-10452} }
UIP2P: Unsupervised Instruction-based Image Editing via Edit Reversibility Constraint-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Simsar_2025_ICCV, author = {Simsar, Enis and Tonioni, Alessio and Xian, Yongqin and Hofmann, Thomas and Tombari, Federico}, title = {UIP2P: Unsupervised Instruction-based Image Editing via Edit Reversibility Constraint}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18895-18905} }
Beyond Spatial Frequency: Pixel-wise Temporal Frequency-based Deepfake Video Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Taehoon and Choi, Jongwook and Jeong, Yonghyun and Noh, Haeun and Yoo, Jaejun and Baek, Seungryul and Choi, Jongwon}, title = {Beyond Spatial Frequency: Pixel-wise Temporal Frequency-based Deepfake Video Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11198-11207} }
Synchronizing Task Behavior: Aligning Multiple Tasks during Test-Time Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_ICCV, author = {Jeong, Wooseong and Cho, Jegyeong and Yoon, Youngho and Yoon, Kuk-Jin}, title = {Synchronizing Task Behavior: Aligning Multiple Tasks during Test-Time Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24340-24350} }
FLOSS: Free Lunch in Open-vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Benigmim_2025_ICCV, author = {Benigmim, Yasser and Fahes, Mohammad and Vu, Tuan-Hung and Bursuc, Andrei and de Charette, Raoul}, title = {FLOSS: Free Lunch in Open-vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21471-21481} }
p-MoD: Building Mixture-of-Depths MLLMs via Progressive Ratio Decay-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jun and Meng, Desen and Zhang, Zhengming and Huang, Zhenpeng and Wu, Tao and Wang, Limin}, title = {p-MoD: Building Mixture-of-Depths MLLMs via Progressive Ratio Decay}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3705-3715} }
SAME: Learning Generic Language-Guided Visual Navigation with State-Adaptive Mixture of Experts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Gengze and Hong, Yicong and Wang, Zun and Zhao, Chongyang and Bansal, Mohit and Wu, Qi}, title = {SAME: Learning Generic Language-Guided Visual Navigation with State-Adaptive Mixture of Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7794-7807} }
ToF-Splatting: Dense SLAM using Sparse Time-of-Flight Depth and Multi-Frame Integration-
[pdf]
[supp]
[bibtex]@InProceedings{Conti_2025_ICCV, author = {Conti, Andrea and Poggi, Matteo and Cambareri, Valerio and Oswald, Martin R. and Mattoccia, Stefano}, title = {ToF-Splatting: Dense SLAM using Sparse Time-of-Flight Depth and Multi-Frame Integration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28344-28353} }
PASD: A Pixel-Adaptive Swarm Dynamics Approach for Unsupervised Low-Light Image Enhancement-
[pdf]
[bibtex]@InProceedings{Jin_2025_ICCV, author = {Jin, Shuai and Qian, Yuhua and Li, Feijiang and Liu, Guoqing and Liang, Xinyan}, title = {PASD: A Pixel-Adaptive Swarm Dynamics Approach for Unsupervised Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9070-9079} }
ConsNoTrainLoRA: Data-driven Weight Initialization of Low-rank Adapters using Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Das_2025_ICCV, author = {Das, Debasmit and Park, Hyoungwoo and Hayat, Munawar and Choi, Seokeon and Yun, Sungrack and Porikli, Fatih}, title = {ConsNoTrainLoRA: Data-driven Weight Initialization of Low-rank Adapters using Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {498-507} }
Correspondence-Free Fast and Robust Spherical Point Pattern Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sarker_2025_ICCV, author = {Sarker, Anik and Asbeck, Alan T.}, title = {Correspondence-Free Fast and Robust Spherical Point Pattern Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28156-28166} }
Adversarial Robustness of Discriminative Self-Supervised Learning in Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Cagatan_2025_ICCV, author = {\c{C}a\u{g}atan, \"Omer Veysel and Tal, \"Omer Faruk and Gursoy, M. Emre}, title = {Adversarial Robustness of Discriminative Self-Supervised Learning in Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2313-2324} }
CT-ScanGaze: A Dataset and Baselines for 3D Volumetric Scanpath Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Pham_2025_ICCV, author = {Pham, Trong Thang and Awasthi, Akash and Khan, Saba and Marti, Esteban Duran and Nguyen, Tien-Phat and Vo, Khoa and Tran, Minh and Nguyen, Son and Tran, Cuong and Ikebe, Yuki and Nguyen, Anh Totti and Nguyen, Anh and Deng, Zhigang and Wu, Carol C. and Nguyen, Hien and Le, Ngan}, title = {CT-ScanGaze: A Dataset and Baselines for 3D Volumetric Scanpath Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21732-21743} }
MOBIUS: Big-to-Mobile Universal Instance Segmentation via Multi-modal Bottleneck Fusion and Calibrated Decoder Pruning-
[pdf]
[supp]
[bibtex]@InProceedings{Segu_2025_ICCV, author = {Segu, Mattia and Gazulla, Marta Tintore and Xian, Yongqin and Van Gool, Luc and Tombari, Federico}, title = {MOBIUS: Big-to-Mobile Universal Instance Segmentation via Multi-modal Bottleneck Fusion and Calibrated Decoder Pruning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20726-20736} }
VoxelKP: A Voxel-based Network Architecture for Human Keypoint Estimation in LiDAR Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Jian and Wonka, Peter}, title = {VoxelKP: A Voxel-based Network Architecture for Human Keypoint Estimation in LiDAR Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28282-28291} }
TemCoCo: Temporally Consistent Multi-modal Video Fusion with Visual-Semantic Collaboration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2025_ICCV, author = {Gong, Meiqi and Zhang, Hao and Yi, Xunpeng and Tang, Linfeng and Ma, Jiayi}, title = {TemCoCo: Temporally Consistent Multi-modal Video Fusion with Visual-Semantic Collaboration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14326-14335} }
Normal and Abnormal Pathology Knowledge-Augmented Vision-Language Model for Anomaly Detection in Pathology Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_ICCV, author = {Song, Jinsol and Wang, Jiamu and Nguyen, Anh Tien and Byeon, Keunho and Ahn, Sangjeong and Lee, Sung Hak and Kwak, Jin Tae}, title = {Normal and Abnormal Pathology Knowledge-Augmented Vision-Language Model for Anomaly Detection in Pathology Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22066-22076} }
COME: Dual Structure-Semantic Learning with Collaborative MoE for Universal Lesion Detection Across Heterogeneous Ultrasound Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Lingyu and Zeng, Yawen and Wang, Yue and Wan, Peng and Ning, Guochen and Liao, Hongen and Zhang, Daoqiang and Chen, Fang}, title = {COME: Dual Structure-Semantic Learning with Collaborative MoE for Universal Lesion Detection Across Heterogeneous Ultrasound Datasets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21460-21470} }
Fine-Tuning Visual Autogressive Models for Subject-Driven Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Chung_2025_ICCV, author = {Chung, Jiwoo and Hyun, Sangeek and Kim, Hyunjun and Koh, Eunseo and Lee, MinKyu and Heo, Jae-Pil}, title = {Fine-Tuning Visual Autogressive Models for Subject-Driven Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19174-19184} }
Voyaging into Perpetual Dynamic Scenes from a Single View-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_ICCV, author = {Tian, Fengrui and Ding, Tianjiao and Luo, Jinqi and Min, Hancheng and Vidal, Rene}, title = {Voyaging into Perpetual Dynamic Scenes from a Single View}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7698-7708} }
Accelerating Diffusion Transformer via Gradient-Optimized Cache-
[pdf]
[arXiv]
[bibtex]@InProceedings{Qiu_2025_ICCV, author = {Qiu, Junxiang and Liu, Lin and Wang, Shuo and Lu, Jinda and Chen, Kezhou and Hao, Yanbin}, title = {Accelerating Diffusion Transformer via Gradient-Optimized Cache}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17608-17617} }
SceneSplat: Gaussian Splatting-based Scene Understanding with Vision-Language Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yue and Ma, Qi and Yang, Runyi and Li, Huapeng and Ma, Mengjiao and Ren, Bin and Popovic, Nikola and Sebe, Nicu and Konukoglu, Ender and Gevers, Theo and Van Gool, Luc and Oswald, Martin R. and Paudel, Danda Pani}, title = {SceneSplat: Gaussian Splatting-based Scene Understanding with Vision-Language Pretraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4961-4972} }
Beyond the Limits: Overcoming Negative Correlation of Activation-Based Training-Free NAS-
[pdf]
[supp]
[bibtex]@InProceedings{Kang_2025_ICCV, author = {Kang, Haidong and Ma, Lianbo and Chen, Pengjun and Yu, Guo and Wang, Xingwei and Huang, Min}, title = {Beyond the Limits: Overcoming Negative Correlation of Activation-Based Training-Free NAS}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {796-805} }
Unknown Text Learning for CLIP-based Few-Shot Open-set Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Rui and Wang, Qilong and Cao, Bing and Hu, Qinghua and Han, Yahong}, title = {Unknown Text Learning for CLIP-based Few-Shot Open-set Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {657-667} }
VoteSplat: Hough Voting Gaussian Splatting for 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Minchao and Jia, Shunyu and Gu, Jiaming and Lu, Xiaoyuan and Zhu, Guangming and Dong, Anqi and Zhang, Liang}, title = {VoteSplat: Hough Voting Gaussian Splatting for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6456-6465} }
C4D: 4D Made from 3D through Dual Correspondences-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Shizun and Jiang, Zhenxiang and Yang, Xingyi and Wang, Xinchao}, title = {C4D: 4D Made from 3D through Dual Correspondences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7570-7580} }
An Efficient Hybrid Vision Transformer for TinyML Applications-
[pdf]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Fanhong and Li, Huanan and Guan, Juntao and Fan, Rui and Wu, Tong and Wang, Xilong and Lai, Rui}, title = {An Efficient Hybrid Vision Transformer for TinyML Applications}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19914-19924} }
Object-centric Video Question Answering with Visual Grounding and Referring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Haochen and Chen, Qirui and Yan, Cilin and Cai, Jiayin and Jiang, Xiaolong and Hu, Yao and Xie, Weidi and Gavves, Stratis}, title = {Object-centric Video Question Answering with Visual Grounding and Referring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22274-22284} }
Face Retouching with Diffusion Data Generation and Spectral Restorement-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Zhidan and Zhang, Xiaoqin and Lu, Shijian}, title = {Face Retouching with Diffusion Data Generation and Spectral Restorement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14722-14731} }
AG2aussian: Anchor-Graph Structured Gaussian Splatting for Instance-Level 3D Scene Understanding and Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zhaonan and Li, Manyi and Tu, Changhe}, title = {AG2aussian: Anchor-Graph Structured Gaussian Splatting for Instance-Level 3D Scene Understanding and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26806-26816} }
Beyond Blur: A Fluid Perspective on Generative Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gruszczynski_2025_ICCV, author = {Gruszczynski, Grzegorz and Meixner, Jakub and Wlodarczyk, Michal and Musialski, Przemyslaw}, title = {Beyond Blur: A Fluid Perspective on Generative Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17818-17827} }
CanonSwap: High-Fidelity and Consistent Video Face Swapping via Canonical Space Modulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Xiangyang and Zhu, Ye and Liu, Yunfei and Lin, Lijian and Wan, Cong and Cai, Zijian and Li, Yu and Huang, Shao-Lun}, title = {CanonSwap: High-Fidelity and Consistent Video Face Swapping via Canonical Space Modulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10064-10074} }
Soft Local Completeness: Rethinking Completeness in XAI-
[pdf]
[supp]
[bibtex]@InProceedings{Haddad_2025_ICCV, author = {Haddad, Ziv Weiss and Barkan, Oren and Elisha, Yehonatan and Koenigstein, Noam}, title = {Soft Local Completeness: Rethinking Completeness in XAI}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19794-19804} }
Perceiving and Acting in First-Person: A Dataset and Benchmark for Egocentric Human-Object-Human Interactions-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Liang and Yang, Chengqun and Lin, Zili and Xu, Fei and Liu, Yifan and Xu, Congsheng and Zhang, Yiyi and Qin, Jie and Sheng, Xingdong and Liu, Yunhui and Jin, Xin and Yan, Yichao and Zeng, Wenjun and Yang, Xiaokang}, title = {Perceiving and Acting in First-Person: A Dataset and Benchmark for Egocentric Human-Object-Human Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12535-12548} }
Open-ended Hierarchical Streaming Video Understanding with Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_ICCV, author = {Kang, Hyolim and Park, Yunsu and Yoo, Youngbeom and Choi, Yeeun and Kim, Seon Joo}, title = {Open-ended Hierarchical Streaming Video Understanding with Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20715-20725} }
SUB: Benchmarking CBM Generalization via Synthetic Attribute Substitutions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bader_2025_ICCV, author = {Bader, Jessica and Girrbach, Leander and Alaniz, Stephan and Akata, Zeynep}, title = {SUB: Benchmarking CBM Generalization via Synthetic Attribute Substitutions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23188-23198} }
G-DexGrasp: Generalizable Dexterous Grasping Synthesis Via Part-Aware Prior Retrieval and Prior-Assisted Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Jian_2025_ICCV, author = {Jian, Juntao and Liu, Xiuping and Chen, Zixuan and Li, Manyi and Liu, Jian and Hu, Ruizhen}, title = {G-DexGrasp: Generalizable Dexterous Grasping Synthesis Via Part-Aware Prior Retrieval and Prior-Assisted Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11447-11457} }
Autoregressive Denoising Score Matching is a Good Video Anomaly Detector-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hanwen and Cao, Congqi and Lv, Qinyi and Min, Lingtong and Zhang, Yanning}, title = {Autoregressive Denoising Score Matching is a Good Video Anomaly Detector}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12057-12067} }
LUDVIG: Learning-Free Uplifting of 2D Visual Features to Gaussian Splatting Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Marrie_2025_ICCV, author = {Marrie, Juliette and Menegaux, Romain and Arbel, Michael and Larlus, Diane and Mairal, Julien}, title = {LUDVIG: Learning-Free Uplifting of 2D Visual Features to Gaussian Splatting Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7440-7450} }
PARTE: Part-Guided Texturing for 3D Human Reconstruction from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2025_ICCV, author = {Nam, Hyeongjin and Kim, Donghwan and Moon, Gyeongsik and Lee, Kyoung Mu}, title = {PARTE: Part-Guided Texturing for 3D Human Reconstruction from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8547-8557} }
LLaVA-SP: Enhancing Visual Representation with Visual Spatial Tokens for MLLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Lou_2025_ICCV, author = {Lou, Haoran and Fan, Chunxiao and Liu, Ziyan and Wu, Yuexin and Wang, Xinliang}, title = {LLaVA-SP: Enhancing Visual Representation with Visual Spatial Tokens for MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22014-22024} }
3D-MOOD: Lifting 2D to 3D for Monocular Open-Set Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Yung-Hsu and Piccinelli, Luigi and Segu, Mattia and Li, Siyuan and Huang, Rui and Fu, Yuqian and Pollefeys, Marc and Blum, Hermann and Bauer, Zuria}, title = {3D-MOOD: Lifting 2D to 3D for Monocular Open-Set Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7429-7439} }
LLM-enhanced Action-aware Multi-modal Prompt Tuning for Image-Text Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_ICCV, author = {Tian, Mengxiao and Wu, Xinxiao and Yang, Shuo}, title = {LLM-enhanced Action-aware Multi-modal Prompt Tuning for Image-Text Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20748-20757} }
PointGAC: Geometric-Aware Codebook for Masked Point Modeling-
[pdf]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Abiao and Lv, Chenlei and Fang, Yuming and Zuo, Yifan and Zhang, Jian and Mei, Guofeng}, title = {PointGAC: Geometric-Aware Codebook for Masked Point Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24989-24998} }
Unbiased Region-Language Alignment for Open-Vocabulary Dense Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yunheng and Li, Yuxuan and Zeng, Quan-Sheng and Wang, Wenhai and Hou, Qibin and Cheng, Ming-Ming}, title = {Unbiased Region-Language Alignment for Open-Vocabulary Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23795-23805} }
CLIPer: Hierarchically Improving Spatial Representation of CLIP for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Lin and Cao, Jiale and Xie, Jin and Jiang, Xiaoheng and Pang, Yanwei}, title = {CLIPer: Hierarchically Improving Spatial Representation of CLIP for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23199-23209} }
Adversarial Reconstruction Feedback for Robust Fine-grained Generalization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Shijie and Shi, Jian and Li, Haojie}, title = {Adversarial Reconstruction Feedback for Robust Fine-grained Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3080-3090} }
ConceptSplit: Decoupled Multi-Concept Personalization of Diffusion Models via Token-wise Adaptation and Attention Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lim_2025_ICCV, author = {Lim, Habin and Won, Yeongseob and Seo, Juwon and Park, Gyeong-Moon}, title = {ConceptSplit: Decoupled Multi-Concept Personalization of Diffusion Models via Token-wise Adaptation and Attention Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18421-18430} }
Consistency Trajectory Matching for One-Step Generative Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2025_ICCV, author = {You, Weiyi and Zhang, Mingyang and Zhang, Leheng and Zhou, Xingyu and Shi, Kexuan and Gu, Shuhang}, title = {Consistency Trajectory Matching for One-Step Generative Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12747-12756} }
SGAD: Semantic and Geometric-aware Descriptor for Local Feature Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Xiangzeng and Wang, Chi and Shi, Guanglu and Zhang, Xiaodong and Miao, Qiguang and Fan, Miao}, title = {SGAD: Semantic and Geometric-aware Descriptor for Local Feature Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27095-27104} }
SpectralAR: Spectral Autoregressive Visual Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Yuanhui and Chen, Weiliang and Zheng, Wenzhao and Duan, Yueqi and Zhou, Jie and Lu, Jiwen}, title = {SpectralAR: Spectral Autoregressive Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15842-15852} }
RogSplat: Robust Gaussian Splatting via Generative Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Kong_2025_ICCV, author = {Kong, Hanyang and Yang, Xingyi and Wang, Xinchao}, title = {RogSplat: Robust Gaussian Splatting via Generative Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25735-25745} }
When Schrodinger Bridge Meets Real-World Image Dehazing with Unpaired Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lan_2025_ICCV, author = {Lan, Yunwei and Cui, Zhigao and Luo, Xin and Liu, Chang and Wang, Nian and Zhang, Menglin and Su, Yanzhao and Liu, Dong}, title = {When Schrodinger Bridge Meets Real-World Image Dehazing with Unpaired Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8756-8765} }
CARIM: Caption-Based Autonomous Driving Scene Retrieval via Inclusive Text Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Ki_2025_ICCV, author = {Ki, Minjoo and Kim, Daejung and Kim, Kisung and Kim, Seon Joo and Lee, Jinhan}, title = {CARIM: Caption-Based Autonomous Driving Scene Retrieval via Inclusive Text Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22036-22045} }
Punching Bag vs. Punching Person: Motion Transferability in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Abdullah_2025_ICCV, author = {Abdullah, Raiyaan and Claypoole, Jared and Cogswell, Michael and Divakaran, Ajay and Rawat, Yogesh}, title = {Punching Bag vs. Punching Person: Motion Transferability in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11348-11358} }
CODE-CL: Conceptor-Based Gradient Projection for Deep Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Apolinario_2025_ICCV, author = {Apolinario, Marco P. E. and Choudhary, Sakshi and Roy, Kaushik}, title = {CODE-CL: Conceptor-Based Gradient Projection for Deep Continual Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {775-784} }
Boosting Generative Adversarial Transferability with Self-supervised Vision Transformer Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Shangbo and Tan, Yu-an and Ma, Ruinan and Ma, Wencong and Zhu, Dehua and Li, Yuanzhang}, title = {Boosting Generative Adversarial Transferability with Self-supervised Vision Transformer Features}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {530-540} }
Zero-Shot Compositional Video Learning with Coding Rate Reduction-
[pdf]
[supp]
[bibtex]@InProceedings{Jung_2025_ICCV, author = {Jung, Heeseok and Bak, Jun-Hyeon and Jeong, Yujin and Lee, Gyugeun and Ahn, Jinwoo and Kim, Eun-Sol}, title = {Zero-Shot Compositional Video Learning with Coding Rate Reduction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20508-20518} }
AVTrustBench: Assessing and Enhancing Reliability and Robustness in Audio-Visual LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chowdhury_2025_ICCV, author = {Chowdhury, Sanjoy and Nag, Sayan and Dasgupta, Subhrajyoti and Wang, Yaoting and Elhoseiny, Mohamed and Gao, Ruohan and Manocha, Dinesh}, title = {AVTrustBench: Assessing and Enhancing Reliability and Robustness in Audio-Visual LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1590-1601} }
LOTA: Bit-Planes Guided AI-Generated Image Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Hongsong and Cheng, Renxi and Zhang, Yang and Han, Chaolei and Gui, Jie}, title = {LOTA: Bit-Planes Guided AI-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17246-17255} }
Benchmarking Egocentric Visual-Inertial SLAM at City Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Krishnan_2025_ICCV, author = {Krishnan, Anusha and Liu, Shaohui and Sarlin, Paul-Edouard and Gentilhomme, Oscar and Caruso, David and Monge, Maurizio and Newcombe, Richard and Engel, Jakob and Pollefeys, Marc}, title = {Benchmarking Egocentric Visual-Inertial SLAM at City Scale}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25207-25217} }
CATSplat: Context-Aware Transformer with Spatial Guidance for Generalizable 3D Gaussian Splatting from A Single-View Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Roh_2025_ICCV, author = {Roh, Wonseok and Jung, Hwanhee and Kim, Jong Wook and Lee, Seunggwan and Yoo, Innfarn and Lugmayr, Andreas and Chi, Seunggeun and Ramani, Karthik and Kim, Sangpil}, title = {CATSplat: Context-Aware Transformer with Spatial Guidance for Generalizable 3D Gaussian Splatting from A Single-View Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28228-28238} }
Supercharged One-step Text-to-Image Diffusion Models with Negative Prompts-
[pdf]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Viet and Nguyen, Anh and Dao, Trung and Nguyen, Khoi and Pham, Cuong and Tran, Toan and Tran, Anh}, title = {Supercharged One-step Text-to-Image Diffusion Models with Negative Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18004-18013} }
Learning Normals of Noisy Points by Local Gradient-Aware Surface Filtering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Qing and Feng, Huifang and Gong, Xun and Liu, Yu-Shen}, title = {Learning Normals of Noisy Points by Local Gradient-Aware Surface Filtering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28828-28838} }
CoTracker3: Simpler and Better Point Tracking by Pseudo-Labelling Real Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karaev_2025_ICCV, author = {Karaev, Nikita and Makarov, Yuri and Wang, Jianyuan and Neverova, Natalia and Vedaldi, Andrea and Rupprecht, Christian}, title = {CoTracker3: Simpler and Better Point Tracking by Pseudo-Labelling Real Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6013-6022} }
IMG: Calibrating Diffusion Models via Implicit Multimodal Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Jiayi and Yan, Chuanhao and Xu, Xingqian and Wang, Yulin and Wang, Kai and Huang, Gao and Shi, Humphrey}, title = {IMG: Calibrating Diffusion Models via Implicit Multimodal Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16079-16089} }
VRBench: A Benchmark for Multi-Step Reasoning in Long Narrative Videos-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Jiashuo and Wu, Yue and Chu, Meng and Ren, Zhifei and Huang, Zizheng and Chu, Pei and Zhang, Ruijie and He, Yinan and Li, Qirui and Li, Songze and Li, Zhenxiang and Tu, Zhongying and He, Conghui and Qiao, Yu and Wang, Yali and Wang, Yi and Wang, Limin}, title = {VRBench: A Benchmark for Multi-Step Reasoning in Long Narrative Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21655-21666} }
PUMPS: Skeleton-Agnostic Point-based Universal Motion Pre-Training for Synthesis in Human Motion Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mo_2025_ICCV, author = {Mo, Clinton Ansun and Hu, Kun and Long, Chengjiang and Yuan, Dong and Siu, Wan-Chi and Wang, Zhiyong}, title = {PUMPS: Skeleton-Agnostic Point-based Universal Motion Pre-Training for Synthesis in Human Motion Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14496-14506} }
When and Where do Data Poisons Attack Textual Inversion?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Styborski_2025_ICCV, author = {Styborski, Jeremy and Lyu, Mingzhi and Lu, Jiayou and Kapur, Nupur and Kong, Adams Wai-Kin}, title = {When and Where do Data Poisons Attack Textual Inversion?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19439-19449} }
RAGD: Regional-Aware Diffusion Model for Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Zhennan and Li, Yajie and Wang, Haofan and Chen, Zhibo and Jiang, Zhengkai and Li, Jun and Wang, Qian and Yang, Jian and Tai, Ying}, title = {RAGD: Regional-Aware Diffusion Model for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19331-19341} }
OpenM3D: Open Vocabulary Multi-view Indoor 3D Object Detection without Human Annotations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hsu_2025_ICCV, author = {Hsu, Peng-Hao and Zhang, Ke and Wang, Fu-En and Tu, Tao and Li, Ming-Feng and Liu, Yu-Lun and Chen, Albert Y. C. and Sun, Min and Kuo, Cheng-Hao}, title = {OpenM3D: Open Vocabulary Multi-view Indoor 3D Object Detection without Human Annotations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8688-8698} }
From Prompt to Progression: Taming Video Diffusion Models for Seamless Attribute Transition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lo_2025_ICCV, author = {Lo, Ling and Chan, Kelvin C.K. and Cheng, Wen-Huang and Yang, Ming-Hsuan}, title = {From Prompt to Progression: Taming Video Diffusion Models for Seamless Attribute Transition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18651-18660} }
Benchmarking Multimodal CoT Reward Model Stepwise by Visual Program-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Minghe and Liu, Xuqi and Yue, Zhongqi and Wu, Yang and Chen, Shuang and Li, Juncheng and Tang, Siliang and Wu, Fei and Chua, Tat-Seng and Zhuang, Yueting}, title = {Benchmarking Multimodal CoT Reward Model Stepwise by Visual Program}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1718-1728} }
Web Artifact Attacks Disrupt Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qraitem_2025_ICCV, author = {Qraitem, Maan and Teterwak, Piotr and Saenko, Kate and Plummer, Bryan A.}, title = {Web Artifact Attacks Disrupt Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1048-1057} }
GlassWizard: Harvesting Diffusion Priors for Glass Surface Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Wenxue and Ye, Tian and Xiong, Xinyu and Bai, Jinbin and Tang, Feilong and Song, Wenxuan and Xing, Zhaohu and Ju, Lie and Li, Guanbin and Zhu, Lei}, title = {GlassWizard: Harvesting Diffusion Priors for Glass Surface Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17848-17858} }
Quanta Neural Networks: From Photons to Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Sundar_2025_ICCV, author = {Sundar, Varun and Zhang, Tianyi and Jungerman, Sacha and Gupta, Mohit}, title = {Quanta Neural Networks: From Photons to Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5091-5101} }
Cross-Subject Mind Decoding from Inaccurate Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Yangyang and Liu, Bangzhen and Shao, Wenqi and Du, Yong and He, Shengfeng and Zhu, Tingting}, title = {Cross-Subject Mind Decoding from Inaccurate Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15066-15075} }
How Can Objects Help Video-Language Understanding?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_ICCV, author = {Tang, Zitian and Wang, Shijie and Cho, Junho and Yoo, Jaewook and Sun, Chen}, title = {How Can Objects Help Video-Language Understanding?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21994-22003} }
ImageGen-CoT: Enhancing Text-to-Image In-context Learning with Chain-of-Thought Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2025_ICCV, author = {Liao, Jiaqi and Yang, Zhengyuan and Li, Linjie and Li, Dianqi and Lin, Kevin and Cheng, Yu and Wang, Lijuan}, title = {ImageGen-CoT: Enhancing Text-to-Image In-context Learning with Chain-of-Thought Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17214-17223} }
Auto-Regressive Transformation for Image Alignment-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Kanggeon and Lee, Soochahn and Lee, Kyoung Mu}, title = {Auto-Regressive Transformation for Image Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13569-13579} }
Beyond RGB: Adaptive Parallel Processing for RAW Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gamrian_2025_ICCV, author = {Gamrian, Shani and Barel, Hila and Li, Feiran and Yoshimura, Masakazu and Iso, Daisuke}, title = {Beyond RGB: Adaptive Parallel Processing for RAW Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5547-5557} }
Stable Virtual Camera: Generative View Synthesis with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Jensen and Gao, Hang and Voleti, Vikram and Vasishta, Aaryaman and Yao, Chun-Han and Boss, Mark and Torr, Philip and Rupprecht, Christian and Jampani, Varun}, title = {Stable Virtual Camera: Generative View Synthesis with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12405-12414} }
Conditional Latent Diffusion Models for Zero-Shot Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ulmer_2025_ICCV, author = {Ulmer, Maximilian and Boerdijk, Wout and Triebel, Rudolph and Durner, Maximilian}, title = {Conditional Latent Diffusion Models for Zero-Shot Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24360-24369} }
Learning Counterfactually Decoupled Attention for Open-World Model Attribution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Yu and Gong, Boyang and Kong, Fanye and Duan, Yueqi and Yu, Bingyao and Zheng, Wenzhao and Chen, Lei and Lu, Jiwen and Zhou, Jie}, title = {Learning Counterfactually Decoupled Attention for Open-World Model Attribution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {122-132} }
DepR: Depth Guided Single-view Scene Reconstruction with Instance-level Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Qingcheng and Zhang, Xiang and Xu, Haiyang and Chen, Zeyuan and Xie, Jianwen and Gao, Yuan and Tu, Zhuowen}, title = {DepR: Depth Guided Single-view Scene Reconstruction with Instance-level Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5722-5733} }
Edicho: Consistent Image Editing in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2025_ICCV, author = {Bai, Qingyan and Ouyang, Hao and Xu, Yinghao and Wang, Qiuyu and Yang, Ceyuan and Cheng, Ka Leong and Shen, Yujun and Chen, Qifeng}, title = {Edicho: Consistent Image Editing in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15277-15287} }
Stable Diffusion Models are Secretly Good at Visual In-Context Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Oorloff_2025_ICCV, author = {Oorloff, Trevine and Sindagi, Vishwanath and Bandara, Wele Gedara Chaminda and Shafahi, Ali and Ghiasi, Amin and Prakash, Charan and Ardekani, Reza}, title = {Stable Diffusion Models are Secretly Good at Visual In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23604-23613} }
T2Bs: Text-to-Character Blendshapes via Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Jiahao and Wang, Chaoyang and Vasilkovsky, Michael and Shakhrai, Vladislav and Liu, Di and Zhuang, Peiye and Tulyakov, Sergey and Wonka, Peter and Lee, Hsin-Ying and Davis, James and Wang, Jian}, title = {T2Bs: Text-to-Character Blendshapes via Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13625-13637} }
QuEST: Low-bit Diffusion Model Quantization via Efficient Selective Finetuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Haoxuan and Shang, Yuzhang and Yuan, Zhihang and Wu, Junyi and Yan, Junchi and Yan, Yan}, title = {QuEST: Low-bit Diffusion Model Quantization via Efficient Selective Finetuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15542-15551} }
Task-Specific Zero-shot Quantization-Aware Training for Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Changhao and Chen, Xinrui and Wang, Ji and Zhao, Kang and Chen, Jianfei}, title = {Task-Specific Zero-shot Quantization-Aware Training for Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22868-22878} }
Shot-by-Shot: Film-Grammar-Aware Training-Free Audio Description Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Junyu and Han, Tengda and Bain, Max and Nagrani, Arsha and Khandelwal, Eshika and Varol, G\"ul and Xie, Weidi and Zisserman, Andrew}, title = {Shot-by-Shot: Film-Grammar-Aware Training-Free Audio Description Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16503-16513} }
Snakes and Ladders: Two Steps Up for VideoMamba-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Hui and Salah, Albert A. and Poppe, Ronald}, title = {Snakes and Ladders: Two Steps Up for VideoMamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24234-24244} }
Generalized Few-Shot Point Cloud Segmentation via LLM-Assisted Hyper-Relation Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhaoyang and Wang, Yuan and Xiong, Guoxin and Li, Wangkai and Pan, Yuwen and Zhang, Tianzhu}, title = {Generalized Few-Shot Point Cloud Segmentation via LLM-Assisted Hyper-Relation Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23063-23073} }
Extending Foundational Monocular Depth Estimators to Fisheye Cameras with Calibration Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gangopadhyay_2025_ICCV, author = {Gangopadhyay, Suchisrit and Kim, Jung-Hee and Chen, Xien and Rim, Patrick and Park, Hyoungseob and Wong, Alex}, title = {Extending Foundational Monocular Depth Estimators to Fisheye Cameras with Calibration Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5198-5209} }
IGD: Instructional Graphic Design with Multimodal Layer Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_ICCV, author = {Qu, Yadong and Fang, Shancheng and Wang, Yuxin and Wang, Xiaorui and Chen, Zhineng and Xie, Hongtao and Zhang, Yongdong}, title = {IGD: Instructional Graphic Design with Multimodal Layer Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18218-18228} }
AdaptiveAE: An Adaptive Exposure Strategy for HDR Capturing in Dynamic Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Tianyi and Zhang, Fan and Shi, Boxin and Xue, Tianfan and Wang, Yujin}, title = {AdaptiveAE: An Adaptive Exposure Strategy for HDR Capturing in Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25176-25185} }
UINavBench: A Framework for Comprehensive Evaluation of Interactive Digital Agents-
[pdf]
[supp]
[bibtex]@InProceedings{Agrawal_2025_ICCV, author = {Agrawal, Harsh and Schoop, Eldon and Pan, Xinlei and Mahajan, Anuj and Seff, Ari and Feng, Di and Cheng, Ruijia and Teran, Andres Romero Mier Y and Gomez, Esteban and Sundararajan, Abhishek and Huang, Forrest and Swearngin, Amanda and Moorthy, Mohana Prasad Sathya and Nichols, Jeff and Toshev, Alexander}, title = {UINavBench: A Framework for Comprehensive Evaluation of Interactive Digital Agents}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23353-23363} }
FedWSQ: Efficient Federated Learning with Weight Standardization and Distribution-Aware Non-Uniform Quantization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Seung-Wook and Kim, Seongyeol and Kim, Jiah and Ji, Seowon and Lee, Se-Ho}, title = {FedWSQ: Efficient Federated Learning with Weight Standardization and Distribution-Aware Non-Uniform Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4616-4625} }
Knowledge Transfer from Interaction Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Yilin and Chen, Kangyi and Peng, Zhongxing and Lu, Hengjie and Xu, Shugong}, title = {Knowledge Transfer from Interaction Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3585-3595} }
Instruction-based Image Editing with Planning, Reasoning, and Generation-
[pdf]
[bibtex]@InProceedings{Ji_2025_ICCV, author = {Ji, Liya and Qi, Chenyang and Chen, Qifeng}, title = {Instruction-based Image Editing with Planning, Reasoning, and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17506-17515} }
Enhancing Mamba Decoder with Bidirectional Interaction in Multi-Task Dense Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Mang and Zhou, Sanping and Li, Yizhe and Deng, Ye and Huang, Wenli and Wang, Le}, title = {Enhancing Mamba Decoder with Bidirectional Interaction in Multi-Task Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18815-18824} }
CityGS-X: A Scalable Architecture for Efficient and Geometrically Accurate Large-Scale Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Yuanyuan and Li, Hao and Chen, Jiaqi and Zou, Zhengyu and Zhong, Zhihang and Zhang, Dingwen and Sun, Xiao and Han, Junwei}, title = {CityGS-X: A Scalable Architecture for Efficient and Geometrically Accurate Large-Scale Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27187-27196} }
Dataset Distillation via Vision-Language Category Prototype-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zou_2025_ICCV, author = {Zou, Yawen and Li, Guang and Su, Duo and Wang, Zi and Yu, Jun and Zhang, Chao}, title = {Dataset Distillation via Vision-Language Category Prototype}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2941-2950} }
Token Activation Map to Visually Explain Multimodal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yi and Wang, Hualiang and Ding, Xinpeng and Wang, Haonan and Li, Xiaomeng}, title = {Token Activation Map to Visually Explain Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {48-58} }
OcRFDet: Object-Centric Radiance Fields for Multi-View 3D Object Detection in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2025_ICCV, author = {Ji, Mingqian and Zhang, Shanshan and Yang, Jian}, title = {OcRFDet: Object-Centric Radiance Fields for Multi-View 3D Object Detection in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24933-24942} }
A Unified Framework to BRIDGE Complete and Incomplete Deep Multi-View Clustering under Non-IID Missing Patterns-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Xiaorui and He, Buyun and Zhou, Peng Yuan and Chen, Xinyue and Guo, Jingcai and Xu, Jie and Liao, Yong}, title = {A Unified Framework to BRIDGE Complete and Incomplete Deep Multi-View Clustering under Non-IID Missing Patterns}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {594-603} }
FairHuman: Boosting Hand and Face Quality in Human Image Generation with Minimum Potential Delay Fairness in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuxuan and Cao, Tianwei and Zhang, Huayu and He, Zhongjiang and Liang, Kongming and Ma, Zhanyu}, title = {FairHuman: Boosting Hand and Face Quality in Human Image Generation with Minimum Potential Delay Fairness in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17046-17055} }
CC-OCR: A Comprehensive and Challenging OCR Benchmark for Evaluating Large Multimodal Models in Literacy-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Zhibo and Tang, Jun and Li, Zhaohai and Wang, Pengfei and Wan, Jianqiang and Zhong, Humen and Liu, Xuejing and Yang, Mingkun and Wang, Peng and Bai, Shuai and Jin, Lianwen and Lin, Junyang}, title = {CC-OCR: A Comprehensive and Challenging OCR Benchmark for Evaluating Large Multimodal Models in Literacy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21744-21754} }
PEFTDiff: Diffusion-Guided Transferability Estimation for Parameter-Efficient Fine-Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Khoba_2025_ICCV, author = {Khoba, Prafful Kumar and Wang, Zijian and Arora, Chetan and Baktashmotlagh, Mahsa}, title = {PEFTDiff: Diffusion-Guided Transferability Estimation for Parameter-Efficient Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1454-1463} }
CODA: Repurposing Continuous VAEs for Discrete Tokenization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Zeyu and Ni, Zanlin and Hua, Yeguo and Deng, Xin and Ma, Xiao and Zhong, Cheng and Huang, Gao}, title = {CODA: Repurposing Continuous VAEs for Discrete Tokenization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18906-18916} }
GeoAvatar: Adaptive Geometrical Gaussian Splatting for 3D Head Avatar-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moon_2025_ICCV, author = {Moon, SeungJun and Lew, Hah Min and Lee, Seungeun and Kang, Ji-Su and Park, Gyeong-Moon}, title = {GeoAvatar: Adaptive Geometrical Gaussian Splatting for 3D Head Avatar}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12811-12821} }
Group-wise Scaling and Orthogonal Decomposition for Domain-Invariant Feature Extraction in Face Anti-Spoofing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2025_ICCV, author = {Jung, Seungjin and Lee, Kanghee and Jeong, Yonghyun and Noh, Haeun and Lee, Jungmin and Choi, Jongwon}, title = {Group-wise Scaling and Orthogonal Decomposition for Domain-Invariant Feature Extraction in Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13372-13381} }
RCTDistill: Cross-Modal Knowledge Distillation Framework for Radar-Camera 3D Object Detection with Temporal Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bang_2025_ICCV, author = {Bang, Geonho and Seong, Minjae and Kim, Jisong and Baek, Geunju and Oh, Daye and Kim, Junhyung and Koh, Junho and Choi, Jun Won}, title = {RCTDistill: Cross-Modal Knowledge Distillation Framework for Radar-Camera 3D Object Detection with Temporal Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25315-25324} }
Towards Annotation-Free Evaluation: KPAScore for Human Keypoint Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Xiaoxiao and Li, Chunxiao and Sun, Peng and Miao, Boming and Zhang, Yunjian and Zhu, Yao}, title = {Towards Annotation-Free Evaluation: KPAScore for Human Keypoint Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8441-8450} }
TITAN: Query-Token based Domain Adaptive Adversarial Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ashraf_2025_ICCV, author = {Ashraf, Tajamul and Bashir, Janibul}, title = {TITAN: Query-Token based Domain Adaptive Adversarial Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {250-262} }
AnyCalib: On-Manifold Learning for Model-Agnostic Single-View Camera Calibration-
[pdf]
[supp]
[bibtex]@InProceedings{Tirado-Garin_2025_ICCV, author = {Tirado-Gar{\'\i}n, Javier and Civera, Javier}, title = {AnyCalib: On-Manifold Learning for Model-Agnostic Single-View Camera Calibration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8044-8055} }
SEGA: A Stepwise Evolution Paradigm for Content-Aware Layout Generation with Design Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Haoran and Zhao, Bo and Wang, Jinghui and Wang, Hanzhang and Yang, Huan and Ji, Wei and Liu, Hao and Xiao, Xinyan}, title = {SEGA: A Stepwise Evolution Paradigm for Content-Aware Layout Generation with Design Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19321-19330} }
GEOPARD: Geometric Pretraining for Articulation Prediction in 3D Shapes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Goyal_2025_ICCV, author = {Goyal, Pradyumn and Petrov, Dmitry and Andrews, Sheldon and Ben-Shabat, Yizhak and Liu, Hsueh-Ti Derek and Kalogerakis, Evangelos}, title = {GEOPARD: Geometric Pretraining for Articulation Prediction in 3D Shapes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9332-9341} }
ViSpeak: Visual Instruction Feedback in Streaming Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_ICCV, author = {Fu, Shenghao and Yang, Qize and Li, Yuan-Ming and Peng, Yi-Xing and Lin, Kun-Yu and Wei, Xihan and Hu, Jian-Fang and Xie, Xiaohua and Zheng, Wei-Shi}, title = {ViSpeak: Visual Instruction Feedback in Streaming Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21778-21788} }
Feature Coding in the Era of Large Models: Dataset, Test Conditions, and Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Changsheng and Ma, Yifan and Chen, Qiaoxi and Xu, Yenan and Liu, Dong and Lin, Weisi}, title = {Feature Coding in the Era of Large Models: Dataset, Test Conditions, and Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1068-1077} }
VideoAds for Fast-Paced Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zheyuan and Dou, Wanying and Peng, Linkai and Pan, Hongyi and Bagci, Ulas and Gong, Boqing}, title = {VideoAds for Fast-Paced Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21812-21821} }
MGSR: 2D/3D Mutual-boosted Gaussian Splatting for High-fidelity Surface Reconstruction under Various Light Conditions-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Qingyuan and Gong, Yuehu and Yang, Weidong and Li, Jiaze and Luo, Yeqi and Xu, Baixin and Li, Shuhao and Fei, Ben and He, Ying}, title = {MGSR: 2D/3D Mutual-boosted Gaussian Splatting for High-fidelity Surface Reconstruction under Various Light Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27295-27304} }
Modeling Human Gaze Behavior with Diffusion Models for Unified Scanpath Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cartella_2025_ICCV, author = {Cartella, Giuseppe and Cuculo, Vittorio and D'Amelio, Alessandro and Cornia, Marcella and Boccignone, Giuseppe and Cucchiara, Rita}, title = {Modeling Human Gaze Behavior with Diffusion Models for Unified Scanpath Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16206-16216} }
Efficient Multi-Person Motion Prediction by Lightweight Spatial and Temporal Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Yuanhong and Yu, Ruixuan and Sun, Jian}, title = {Efficient Multi-Person Motion Prediction by Lightweight Spatial and Temporal Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10844-10853} }
I2V3D: Controllable Image-to-video Generation with 3D Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zhiyuan and Chen, Dongdong and Liao, Jing}, title = {I2V3D: Controllable Image-to-video Generation with 3D Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13360-13371} }
TAG-WM: Tamper-Aware Generative Image Watermarking via Diffusion Inversion Sensitivity-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yuzhuo and Ma, Zehua and Fang, Han and Zhang, Weiming and Yu, Nenghai}, title = {TAG-WM: Tamper-Aware Generative Image Watermarking via Diffusion Inversion Sensitivity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16723-16732} }
MIEB: Massive Image Embedding Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_ICCV, author = {Xiao, Chenghao and Chung, Isaac and Kerboua, Imene and Stirling, Jamie and Zhang, Xin and Kardos, M\'arton and Solomatin, Roman and Al Moubayed, Noura and Enevoldsen, Kenneth and Muennighoff, Niklas}, title = {MIEB: Massive Image Embedding Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22187-22198} }
IMoRe: Implicit Program-Guided Reasoning for Human Motion Q&A-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Chen and Sugandhika, Chinthani and Ee, Yeo Keat and Peh, Eric and Zhang, Hao and Yang, Hong and Rajan, Deepu and Fernando, Basura}, title = {IMoRe: Implicit Program-Guided Reasoning for Human Motion Q\&A}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12987-12996} }
SViM3D: Stable Video Material Diffusion for Single Image 3D Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Engelhardt_2025_ICCV, author = {Engelhardt, Andreas and Boss, Mark and Voleti, Vikram and Yao, Chun-Han and Lensch, Hendrik P. A. and Jampani, Varun}, title = {SViM3D: Stable Video Material Diffusion for Single Image 3D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28428-28439} }
VAGUE: Visual Contexts Clarify Ambiguous Expressions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2025_ICCV, author = {Nam, Heejeong and Ahn, Jinwoo and Ka, Keummin and Chung, Jiwan and Yu, Youngjae}, title = {VAGUE: Visual Contexts Clarify Ambiguous Expressions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1537-1547} }
Enhancing Numerical Prediction of MLLMs with Soft Labeling-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Pei and Cai, Zhaowei and Yang, Hao and Modolo, Davide and Swaminathan, Ashwin}, title = {Enhancing Numerical Prediction of MLLMs with Soft Labeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3424-3434} }
Fine-Grained 3D Gaussian Head Avatars Modeling from Static Captures via Joint Reconstruction and Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Yuan and Wang, Xuan and Wang, Cong and Zhang, WeiLi and Fan, Yanbo and Guo, Yu and Wang, Fei}, title = {Fine-Grained 3D Gaussian Head Avatars Modeling from Static Captures via Joint Reconstruction and Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14293-14304} }
DynamicFace: High-Quality and Consistent Face Swapping for Image and Video using Composable 3D Facial Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Runqi and Chen, Yang and Xu, Sijie and He, Tianyao and Zhu, Wei and Song, Dejia and Chen, Nemo and Tang, Xu and Hu, Yao}, title = {DynamicFace: High-Quality and Consistent Face Swapping for Image and Video using Composable 3D Facial Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13438-13447} }
CoMoGaussian: Continuous Motion-Aware Gaussian Splatting from Motion-Blurred Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Jungho and Kim, Donghyeong and Lee, Dogyoon and Cho, Suhwan and Lee, Minhyeok and Lee, Wonjoon and Kim, Taeoh and Wee, Dongyoon and Lee, Sangyoun}, title = {CoMoGaussian: Continuous Motion-Aware Gaussian Splatting from Motion-Blurred Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26415-26424} }
Open-Unfairness Adversarial Mitigation for Generalized Deepfake Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhaoyang and Teng, Zhu and Zhang, Baopeng and Fan, Jianping}, title = {Open-Unfairness Adversarial Mitigation for Generalized Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {698-707} }
Recognizing Actions from Robotic View for Natural Human-Robot Interaction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Ziyi and Li, Peiming and Liu, Hong and Deng, Zhichao and Wang, Can and Liu, Jun and Yuan, Junsong and Liu, Mengyuan}, title = {Recognizing Actions from Robotic View for Natural Human-Robot Interaction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14218-14227} }
WIPES: Wavelet-based Visual Primitives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wenhao and Zhu, Hao and Wu, Delong and Kang, Di and Bao, Linchao and Cao, Xun and Ma, Zhan}, title = {WIPES: Wavelet-based Visual Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27338-27347} }
Frequency Domain-Based Diffusion Model for Unpaired Image Dehazing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Chengxu and Qi, Lu and Pan, Jinshan and Qian, Xueming and Yang, Ming-Hsuan}, title = {Frequency Domain-Based Diffusion Model for Unpaired Image Dehazing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7538-7547} }
Unsupervised Histopathological Image Semantic Segmentation with Overlapping Patches Consistency Constraint-
[pdf]
[bibtex]@InProceedings{Cai_2025_ICCV, author = {Cai, Wentian and Weng, Weizhao and Huang, Zihao and Chen, Yandan and Huang, Siquan and Gao, Ping and Leung, Victor C. M. and Gao, Ying}, title = {Unsupervised Histopathological Image Semantic Segmentation with Overlapping Patches Consistency Constraint}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23332-23341} }
SAGI: Semantically Aligned and Uncertainty Guided AI Image Inpainting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Giakoumoglou_2025_ICCV, author = {Giakoumoglou, Paschalis and Karageorgiou, Dimitrios and Papadopoulos, Symeon and Petrantonakis, Panagiotis C.}, title = {SAGI: Semantically Aligned and Uncertainty Guided AI Image Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16090-16101} }
Adversarial Purification via Super-Resolution and Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Mincheol and Park, Cheonjun and Lim, Seungseop and Koo, Mijin and Lee, Hyunwuk and Ro, Won Woo and Kim, Suhyun}, title = {Adversarial Purification via Super-Resolution and Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4605-4615} }
RAGDiffusion: Faithful Cloth Generation via External Knowledge Assimilation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yuhan and Tan, Xianfeng and Shang, Wenxiang and Wu, Yubo and Wang, Jian and Chen, Xuanhong and Zhang, Yi and Zhu, Hangcheng and Ni, Bingbing}, title = {RAGDiffusion: Faithful Cloth Generation via External Knowledge Assimilation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17485-17495} }
Diff2I2P: Differentiable Image-to-Point Cloud Registration with Diffusion Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Mu_2025_ICCV, author = {Mu, Juncheng and Ren, Chengwei and Zhang, Weixiang and Pan, Liang and Zhang, Xiao-Ping and Gao, Yue}, title = {Diff2I2P: Differentiable Image-to-Point Cloud Registration with Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25777-25787} }
Dynamic Typography: Bringing Text to Life via Video Diffusion Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Zichen and Meng, Yihao and Ouyang, Hao and Yu, Yue and Zhao, Bolin and Cohen-Or, Daniel and Qu, Huamin}, title = {Dynamic Typography: Bringing Text to Life via Video Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14787-14797} }
LightSwitch: Multi-view Relighting with Material-guided Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Litman_2025_ICCV, author = {Litman, Yehonathan and De la Torre, Fernando and Tulsiani, Shubham}, title = {LightSwitch: Multi-view Relighting with Material-guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27750-27759} }
CanFields: Consolidating Diffeomorphic Flows for Non-Rigid 4D Interpolation from Arbitrary-Length Sequences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Miaowei and Li, Changjian and Vaxman, Amir}, title = {CanFields: Consolidating Diffeomorphic Flows for Non-Rigid 4D Interpolation from Arbitrary-Length Sequences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28587-28598} }
Occlusion-robust Stylization for Drawing-based 3D Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yoon_2025_ICCV, author = {Yoon, Sunjae and Koo, Gwanhyeong and Lee, Younghwan and Hong, Ji Woo and Yoo, Chang D.}, title = {Occlusion-robust Stylization for Drawing-based 3D Animation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12263-12273} }
Towards Foundational Models for Single-Chip Radar-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Tianshu and Prabhakara, Akarsh and Chen, Chuhan and Karhade, Jay and Ramanan, Deva and O'toole, Matthew and Rowe, Anthony}, title = {Towards Foundational Models for Single-Chip Radar}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24655-24665} }
Augmented and Softened Matching for Unsupervised Visible-Infrared Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Pang_2025_ICCV, author = {Pang, Zhiqi and Wang, Chunyu and Zhao, Lingling and Wang, Junjie}, title = {Augmented and Softened Matching for Unsupervised Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11100-11109} }
Spatial Alignment and Temporal Matching Adapter for Video-Radar Remote Physiological Measurement-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Qian and Geng, Ruixu and Chen, Jinbo and Wang, Haoyu and Chen, Yan and Hu, Yang}, title = {Spatial Alignment and Temporal Matching Adapter for Video-Radar Remote Physiological Measurement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8623-8633} }
StableCodec: Taming One-Step Diffusion for Extreme Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Tianyu and Luo, Xin and Li, Li and Liu, Dong}, title = {StableCodec: Taming One-Step Diffusion for Extreme Image Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17379-17389} }
Seal Your Backdoor with Variational Defense-
[pdf]
[supp]
[bibtex]@InProceedings{Sabolic_2025_ICCV, author = {Saboli\'c, Ivan and Grci\'c, Matej and \v{S}egvi\'c, Sini\v{s}a}, title = {Seal Your Backdoor with Variational Defense}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {752-764} }
From Abyssal Darkness to Blinding Glare: A Benchmark on Extreme Exposure Correction in Real World-
[pdf]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Bo and Fu, Huiyuan and Huang, Zhiye and Zhang, Siru and Wang, Xin and Ma, Huadong}, title = {From Abyssal Darkness to Blinding Glare: A Benchmark on Extreme Exposure Correction in Real World}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7666-7675} }
MVTrajecter: Multi-View Pedestrian Tracking with Trajectory Motion Cost and Trajectory Appearance Cost-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yamane_2025_ICCV, author = {Yamane, Taiga and Masumura, Ryo and Suzuki, Satoshi and Orihashi, Shota}, title = {MVTrajecter: Multi-View Pedestrian Tracking with Trajectory Motion Cost and Trajectory Appearance Cost}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13270-13280} }
GeoDiffusion: A Training-Free Framework for Accurate 3D Geometric Conditioning in Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Mueller_2025_ICCV, author = {Mueller, Phillip and Uenlue, Talip and Schmidt, Sebastian and Kollovieh, Marcel and Fan, Jiajie and G\"unnemann, Stephan and Mikelsons, Lars}, title = {GeoDiffusion: A Training-Free Framework for Accurate 3D Geometric Conditioning in Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6374-6384} }
VALLR: Visual ASR Language Model for Lip Reading-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thomas_2025_ICCV, author = {Thomas, Marshall and Fish, Edward and Bowden, Richard}, title = {VALLR: Visual ASR Language Model for Lip Reading}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2846-2856} }
Learning Efficient and Generalizable Human Representation with Human Gaussian Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yifan and Zhang, Shengjun and Dai, Chensheng and Chen, Yang and Liu, Hao and Li, Chen and Duan, Yueqi}, title = {Learning Efficient and Generalizable Human Representation with Human Gaussian Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11797-11806} }
DexH2R: A Benchmark for Dynamic Dexterous Grasping in Human-to-Robot Handover-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Youzhuo and Ye, Jiayi and Xiao, Chuyang and Zhong, Yiming and Tao, Heng and Yu, Hang and Liu, Yumeng and Yu, Jingyi and Ma, Yuexin}, title = {DexH2R: A Benchmark for Dynamic Dexterous Grasping in Human-to-Robot Handover}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12702-12712} }
Mobile Video Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ben_Yahia_2025_ICCV, author = {Ben Yahia, Haitam and Korzhenkov, Denis and Lelekas, Ioannis and Ghodrati, Amir and Habibian, Amirhossein}, title = {Mobile Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19450-19460} }
Importance-Based Token Merging for Efficient Image and Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Haoyu and Xu, Jingyi and Le, Hieu and Samaras, Dimitris}, title = {Importance-Based Token Merging for Efficient Image and Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4983-4995} }
Semi-ViM: Bidirectional State Space Model for Mitigating Label Imbalance in Semi-Supervised Learning-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Hongyang and Xie, Hongyang and You, Haochen and Sanchez, Victor}, title = {Semi-ViM: Bidirectional State Space Model for Mitigating Label Imbalance in Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {765-774} }
Diffusion Transformer meets Multi-level Wavelet Spectrum for Single Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Peng and Li, Hui and Xu, Han and Jeon, Paul Barom and Lee, Dongwook and Ji, Daehyun and Yang, Ran and Zhu, Feng}, title = {Diffusion Transformer meets Multi-level Wavelet Spectrum for Single Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19700-19710} }
Towards Human-like Virtual Beings: Simulating Human Behavior in 3D Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Chen and Wang, Wenguan and Yang, Yi}, title = {Towards Human-like Virtual Beings: Simulating Human Behavior in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10753-10763} }
HiGarment: Cross-modal Harmony Based Diffusion Model for Flat Sketch to Realistic Garment Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Junyi and Zhang, Jingxuan and Wu, Fangyu and Lu, Huanda and Wang, Qiufeng and Yang, Wenmian and Lim, Eng Gee and Lu, Dongming}, title = {HiGarment: Cross-modal Harmony Based Diffusion Model for Flat Sketch to Realistic Garment Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18542-18551} }
Robust Unfolding Network for HDR Imaging with Modulo Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Zhile and Ji, Hui}, title = {Robust Unfolding Network for HDR Imaging with Modulo Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25218-25228} }
Player-Centric Multimodal Prompt Generation for Large Language Model Based Identity-Aware Basketball Video Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xi_2025_ICCV, author = {Xi, Zeyu and Sun, Haoying and Wu, Yaofei and Yan, Junchi and Zhang, Haoran and Wu, Lifang and Wang, Liang and Chen, Changwen}, title = {Player-Centric Multimodal Prompt Generation for Large Language Model Based Identity-Aware Basketball Video Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24330-24339} }
ArchiSet: Benchmarking Editable and Consistent Single-View 3D Reconstruction of Buildings with Specific Window-to-Wall Ratios-
[pdf]
[bibtex]@InProceedings{Yin_2025_ICCV, author = {Yin, Jun and Zeng, Pengyu and Shen, Licheng and Zhang, Miao and Zhong, Jing and Han, Yuxing and Lu, Shuai}, title = {ArchiSet: Benchmarking Editable and Consistent Single-View 3D Reconstruction of Buildings with Specific Window-to-Wall Ratios}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26004-26014} }
Scale Your Instructions: Enhance the Instruction-Following Fidelity of Unified Image Generation Model by Self-Adaptive Attention Scaling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Chao and Wei, Tianyi and Yu, Nenghai}, title = {Scale Your Instructions: Enhance the Instruction-Following Fidelity of Unified Image Generation Model by Self-Adaptive Attention Scaling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15171-15181} }
From Enhancement to Understanding: Build a Generalized Bridge for Low-light Vision via Semantically Consistent Unsupervised Fine-tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Sen and Zeng, Shao and Gu, Tianjun and Zhang, Zhizhong and Zhang, Ruixin and Ding, Shouhong and Zhang, Jingyun and Wang, Jun and Tan, Xin and Xie, Yuan and Ma, Lizhuang}, title = {From Enhancement to Understanding: Build a Generalized Bridge for Low-light Vision via Semantically Consistent Unsupervised Fine-tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13804-13814} }
Translation of Text Embedding via Delta Vector to Suppress Strongly Entangled Content in Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koh_2025_ICCV, author = {Koh, Eunseo and Hong, Seunghoo and Kim, Tae-Young and Woo, Simon S. and Heo, Jae-Pil}, title = {Translation of Text Embedding via Delta Vector to Suppress Strongly Entangled Content in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15365-15374} }
ARMO: Autoregressive Rigging for Multi-Category Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Mingze and Mao, Shiwei and Chen, Keyi and Chen, Yurun and Lu, Shunlin and Wang, Jingbo and Dong, Junting and Huang, Ruqi}, title = {ARMO: Autoregressive Rigging for Multi-Category Objects}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7721-7730} }
DIP: Unsupervised Dense In-Context Post-training of Visual Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sirko-Galouchenko_2025_ICCV, author = {Sirko-Galouchenko, Sophia and Gidaris, Spyros and Vobecky, Antonin and Bursuc, Andrei and Thome, Nicolas}, title = {DIP: Unsupervised Dense In-Context Post-training of Visual Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4264-4274} }
Think Twice: Test-Time Reasoning for Robust CLIP Zero-Shot Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Shenyu and Pan, Zhaoying and Wang, Xiaoqian}, title = {Think Twice: Test-Time Reasoning for Robust CLIP Zero-Shot Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2919-2929} }
Training-free Geometric Image Editing on Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Hanshen and Zhu, Zhen and Zhang, Kaile and Gong, Yiming and Liu, Yuliang and Bai, Xiang}, title = {Training-free Geometric Image Editing on Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19130-19140} }
HyperGCT: A Dynamic Hyper-GNN-Learned Geometric Constraint for 3D Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiyu and Ma, Jiayi and Guo, Jianwei and Hu, Wei and Qi, Zhaoshuai and Hui, Fei and Yang, Jiaqi and Zhang, Yanning}, title = {HyperGCT: A Dynamic Hyper-GNN-Learned Geometric Constraint for 3D Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24750-24759} }
Dark-ISP: Enhancing RAW Image Processing for Low-Light Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Jiasheng and Gao, Xin and Yan, Yuxiang and Li, Guanghao and Pu, Jian}, title = {Dark-ISP: Enhancing RAW Image Processing for Low-Light Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9583-9593} }
Harmonizing Visual Representations for Unified Multimodal Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Size and Zhang, Wenwei and Xu, Lumin and Jin, Sheng and Wu, Zhonghua and Tao, Qingyi and Liu, Wentao and Li, Wei and Loy, Chen Change}, title = {Harmonizing Visual Representations for Unified Multimodal Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17739-17750} }
Environment-Agnostic Pose: Generating Environment-independent Object Representations for 6D Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shaobo and Huang, Yuhang and Zhao, Wanqing and Zhao, Wei and Guan, Ziyu and Peng, Jinye}, title = {Environment-Agnostic Pose: Generating Environment-independent Object Representations for 6D Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8678-8687} }
UnMix-NeRF: Spectral Unmixing Meets Neural Radiance Fields-
[pdf]
[supp]
[bibtex]@InProceedings{Perez_2025_ICCV, author = {Perez, Fabian and Rojas, Sara and Hinojosa, Carlos and Rueda-Chac\'on, Hoover and Ghanem, Bernard}, title = {UnMix-NeRF: Spectral Unmixing Meets Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26284-26293} }
Progressive Homeostatic and Plastic Prompt Tuning for Audio-Visual Multi-Task Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_ICCV, author = {Yin, Jiong and Li, Liang and Zhang, Jiehua and Gao, Yuhan and Yan, Chenggang and Sheng, Xichun}, title = {Progressive Homeostatic and Plastic Prompt Tuning for Audio-Visual Multi-Task Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2022-2033} }
Multidimensional Byte Pair Encoding: Shortened Sequences for Improved Visual Data Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Elsner_2025_ICCV, author = {Elsner, Tim and Usinger, Paula and Nehring-Wirxel, Julius and Kobsik, Gregor and Czech, Victor and He, Yanjiang and Lim, Isaak and Kobbelt, Leif}, title = {Multidimensional Byte Pair Encoding: Shortened Sequences for Improved Visual Data Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21331-21341} }
All in One: Visual-Description-Guided Unified Point Cloud Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Zongyan and El Amine Boudjoghra, Mohamed and Dong, Jiahua and Wang, Jinhong and Anwer, Rao Muhammad}, title = {All in One: Visual-Description-Guided Unified Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24835-24845} }
Controllable-LPMoE: Adapting to Challenging Object Segmentation via Dynamic Local Priors from Mixture-of-Experts-
[pdf]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Yanguang and Lian, Jiawei and Yang, Jian and Luo, Lei}, title = {Controllable-LPMoE: Adapting to Challenging Object Segmentation via Dynamic Local Priors from Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22327-22337} }
Frequency-Dynamic Attention Modulation For Dense Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Linwei and Gu, Lin and Fu, Ying}, title = {Frequency-Dynamic Attention Modulation For Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22620-22632} }
Enpowering Your Pansharpening Models with Generalizability: Unified Distribution is All You Need-
[pdf]
[supp]
[bibtex]@InProceedings{Cui_2025_ICCV, author = {Cui, Yongchuan and Liu, Peng and Zhang, Hui}, title = {Enpowering Your Pansharpening Models with Generalizability: Unified Distribution is All You Need}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11850-11860} }
Semantic versus Identity: A Divide-and-Conquer Approach towards Adjustable Medical Image De-Identification-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tian_2025_ICCV, author = {Tian, Yuan and Wang, Shuo and Zhang, Rongzhao and Chen, Zijian and Jiang, Yankai and Li, Chunyi and Zhu, Xiangyang and Yan, Fang and Hu, Qiang and Wang, XiaoSong and Zhai, Guangtao}, title = {Semantic versus Identity: A Divide-and-Conquer Approach towards Adjustable Medical Image De-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20613-20625} }
DEPTHOR: Depth Enhancement from a Practical Light-Weight dToF Sensor and RGB Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2025_ICCV, author = {Xiang, Jijun and Zhu, Xuan and Wang, Xianqi and Wang, Yu and Zhang, Hong and Guo, Fei and Yang, Xin}, title = {DEPTHOR: Depth Enhancement from a Practical Light-Weight dToF Sensor and RGB Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6101-6111} }
Describe Anything: Detailed Localized Image and Video Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lian_2025_ICCV, author = {Lian, Long and Ding, Yifan and Ge, Yunhao and Liu, Sifei and Mao, Hanzi and Li, Boyi and Pavone, Marco and Liu, Ming-Yu and Darrell, Trevor and Yala, Adam and Cui, Yin}, title = {Describe Anything: Detailed Localized Image and Video Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21766-21777} }
Point Cloud Self-supervised Learning via 3D to Multi-view Masked Learner-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Zhimin and Chen, Xuewei and Guo, Xiao and Li, Yingwei and Jing, Longlong and Yang, Liang and Li, Bing}, title = {Point Cloud Self-supervised Learning via 3D to Multi-view Masked Learner}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27618-27629} }
Probabilistic Prototype Calibration of Vision-language Models for Generalized Few-shot Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Jie and Shen, Jiayi and Zhou, Pan and Sonke, Jan-Jakob and Gavves, Efstratios}, title = {Probabilistic Prototype Calibration of Vision-language Models for Generalized Few-shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21155-21165} }
KV-Edit: Training-Free Image Editing for Precise Background Preservation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Tianrui and Zhang, Shiyi and Shao, Jiawei and Tang, Yansong}, title = {KV-Edit: Training-Free Image Editing for Precise Background Preservation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16607-16617} }
Triad: Empowering LMM-based Anomaly Detection with Expert-guided Region-of-Interest Tokenizer and Manufacturing Process-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yuanze and Yuan, Shihao and Wang, Haolin and Li, Qizhang and Liu, Ming and Xu, Chen and Shi, Guangming and Zuo, Wangmeng}, title = {Triad: Empowering LMM-based Anomaly Detection with Expert-guided Region-of-Interest Tokenizer and Manufacturing Process}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21917-21926} }
When Anchors Meet Cold Diffusion: A Multi-Stage Approach to Lane Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Bo-Lun and Ni, Zi-Xiang and Huang, Feng-Kai and Shuai, Hong-Han and Cheng, Wen-Huang}, title = {When Anchors Meet Cold Diffusion: A Multi-Stage Approach to Lane Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27917-27926} }
Dream-to-Recon: Monocular 3D Reconstruction with Diffusion-Depth Distillation from Single Images-
[pdf]
[supp]
[bibtex]@InProceedings{Wulff_2025_ICCV, author = {Wulff, Philipp and Wimbauer, Felix and Muhle, Dominik and Cremers, Daniel}, title = {Dream-to-Recon: Monocular 3D Reconstruction with Diffusion-Depth Distillation from Single Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9352-9362} }
Breaking Grid Constraints: Dynamic Graph Reconstruction Network for Multi-organ Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2025_ICCV, author = {Xiao, Junhao and Wei, Yang and Wang, Jingyu and Wang, Yongchao and Bi, Xiuli and Xiao, Bin}, title = {Breaking Grid Constraints: Dynamic Graph Reconstruction Network for Multi-organ Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24413-24422} }
SplArt: Articulation Estimation and Part-Level Reconstruction with 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Shengjie and Fang, Jiading and Irshad, Muhammad Zubair and Guizilini, Vitor Campagnolo and Ambrus, Rares Andrei and Shakhnarovich, Greg and Walter, Matthew R.}, title = {SplArt: Articulation Estimation and Part-Level Reconstruction with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8841-8851} }
FedXDS: Leveraging Model Attribution Methods to counteract Data Heterogeneity in Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Hoefler_2025_ICCV, author = {Hoefler, Maximilian Andreas and Mueller, Karsten and Samek, Wojciech}, title = {FedXDS: Leveraging Model Attribution Methods to counteract Data Heterogeneity in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4572-4581} }
Retinex-MEF: Retinex-based Glare Effects Aware Unsupervised Multi-Exposure Image Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2025_ICCV, author = {Bai, Haowen and Zhang, Jiangshe and Zhao, Zixiang and Deng, Lilun and Cui, Yukun and Xu, Shuang}, title = {Retinex-MEF: Retinex-based Glare Effects Aware Unsupervised Multi-Exposure Image Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7251-7261} }
Structure Matters: Revisiting Boundary Refinement in Video Object Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Qin_2025_ICCV, author = {Qin, Guanyi and Wang, Ziyue and Shen, Daiyun and Liu, Haofeng and Zhou, Hantao and Wu, Junde and Hu, Runze and Jin, Yueming}, title = {Structure Matters: Revisiting Boundary Refinement in Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14431-14442} }
Bilateral Collaboration with Large Vision-Language Models for Open Vocabulary Human-Object Interaction Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Yupeng and Ding, Changxing and Sun, Chang and Huang, Shaoli and Xu, Xiangmin}, title = {Bilateral Collaboration with Large Vision-Language Models for Open Vocabulary Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20126-20136} }
GaSLight: Gaussian Splats for Spatially-Varying Lighting in HDR-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bolduc_2025_ICCV, author = {Bolduc, Christophe and Hold-Geoffroy, Yannick and Lalonde, Jean-Fran\c{c}ois}, title = {GaSLight: Gaussian Splats for Spatially-Varying Lighting in HDR}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29120-29130} }
Hate in Plain Sight: On the Risks of Moderating AI-Generated Hateful Illusions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_ICCV, author = {Qu, Yiting and Yang, Ziqing and Ma, Yihan and Backes, Michael and Zannettou, Savvas and Zhang, Yang}, title = {Hate in Plain Sight: On the Risks of Moderating AI-Generated Hateful Illusions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19617-19627} }
Steering Guidance for Personalized Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Sunghyun and Choi, Seokeon and Park, Hyoungwoo and Yun, Sungrack}, title = {Steering Guidance for Personalized Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15907-15916} }
Improving Rectified Flow with Boundary Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Xixi and Liao, Runlong and Xu, Keyang and Liu, Bo and Li, Yeqing and Ie, Eugene and Fei, Hongliang and Liu, Qiang}, title = {Improving Rectified Flow with Boundary Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18177-18186} }
ATAS: Any-to-Any Self-Distillation for Enhanced Open-Vocabulary Dense Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yeo_2025_ICCV, author = {Yeo, Juan and Cha, Soonwoo and Song, Jiwoo and Jin, Hyunbin and Kim, Taesup}, title = {ATAS: Any-to-Any Self-Distillation for Enhanced Open-Vocabulary Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20390-20400} }
Multimodal Prompt Alignment for Facial Expression Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Fuyan and He, Yiran and Sun, Bin and Li, Shutao}, title = {Multimodal Prompt Alignment for Facial Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12581-12591} }
Laboring on less labors: RPCA Paradigm for Pan-sharpening-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Honghui and Fang, Chuangjie and Wang, Yibin and Wu, Jie and Zheng, Jianwei}, title = {Laboring on less labors: RPCA Paradigm for Pan-sharpening}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11393-11402} }
MVQA: Mamba with Unified Sampling for Efficient Video Quality Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mi_2025_ICCV, author = {Mi, Yachun and Li, Yu and Meng, Weicheng and Chen, Chaofeng and Hui, Chen and Liu, Shaohui}, title = {MVQA: Mamba with Unified Sampling for Efficient Video Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18498-18509} }
Semantic-guided Camera Ray Regression for Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yesheng and Zhao, Xu}, title = {Semantic-guided Camera Ray Regression for Visual Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25639-25648} }
HAMSt3R: Human-Aware Multi-view Stereo 3D Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Rojas_2025_ICCV, author = {Rojas, Sara and Armando, Matthieu and Ghanem, Bernard and Weinzaepfel, Philippe and Leroy, Vincent and Rogez, Gr\'egory}, title = {HAMSt3R: Human-Aware Multi-view Stereo 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5027-5037} }
TrajectoryCrafter: Redirecting Camera Trajectory for Monocular Videos via Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Mark and Hu, Wenbo and Xing, Jinbo and Shan, Ying}, title = {TrajectoryCrafter: Redirecting Camera Trajectory for Monocular Videos via Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {100-111} }
AV-Flow: Transforming Text to Audio-Visual Human-like Interactions-
[pdf]
[supp]
[bibtex]@InProceedings{Chatziagapi_2025_ICCV, author = {Chatziagapi, Aggelina and Morency, Louis-Philippe and Gong, Hongyu and Zollh\"ofer, Michael and Samaras, Dimitris and Richard, Alexander}, title = {AV-Flow: Transforming Text to Audio-Visual Human-like Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14270-14282} }
Multi-View Slot Attention Using Paraphrased Texts for Face Anti-Spoofing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Jeongmin and Kim, Susang and Lee, Kisu and Kwon, Taekyoung and Shin, Won-Yong and Kim, Ha Young}, title = {Multi-View Slot Attention Using Paraphrased Texts for Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21117-21128} }
Self-Supervised Monocular 4D Scene Reconstruction for Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_ICCV, author = {Yuan, Chengbo and Chen, Geng and Yi, Li and Gao, Yang}, title = {Self-Supervised Monocular 4D Scene Reconstruction for Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8863-8874} }
What we need is explicit controllability: Training 3D gaze estimator using only facial images-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Tingwei and Bao, Jun and Kuang, Zhenzhong and Liu, Buyu}, title = {What we need is explicit controllability: Training 3D gaze estimator using only facial images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11414-11424} }
WaveMamba: Wavelet-Driven Mamba Fusion for RGB-Infrared Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Haodong and Dong, Wenhao and Yang, Linlin and Li, Hong and Yang, Yuguang and Ren, Yangyang and Zhu, Qingcheng and Feng, Zichao and Li, Changbai and Lin, Shaohui and Wang, Runqi and Luo, Xiaoyan and Zhang, Baochang}, title = {WaveMamba: Wavelet-Driven Mamba Fusion for RGB-Infrared Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11219-11229} }
CoST: Efficient Collaborative Perception From Unified Spatiotemporal Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_ICCV, author = {Tang, Zongheng and Liu, Yi and Sun, Yifan and Gao, Yulu and Chen, Jinyu and Xu, Runsheng and Liu, Si}, title = {CoST: Efficient Collaborative Perception From Unified Spatiotemporal Perspective}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1120-1129} }
GCRayDiffusion: Pose-Free Surface Reconstruction via Geometric Consistent Ray Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Li-Heng and Zou, Zi-Xin and Liu, Chang and Jing, Tianjiao and Cao, Yan-Pei and Huang, Shi-Sheng and Fu, Hongbo and Huang, Hua}, title = {GCRayDiffusion: Pose-Free Surface Reconstruction via Geometric Consistent Ray Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25335-25345} }
AU-Blendshape for Fine-grained Stylized 3D Facial Expression Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Hao and Dai, Ju and Zhou, Feng and Ning, Kaida and Li, Lei and Pan, Junjun}, title = {AU-Blendshape for Fine-grained Stylized 3D Facial Expression Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12605-12614} }
Noise-Modeled Diffusion Models for Low-Light Spike Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Ruonan and Zhu, Lin and Xiang, Xijie and Wang, Lizhi and Huang, Hua}, title = {Noise-Modeled Diffusion Models for Low-Light Spike Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4080-4089} }
Rethinking Discrete Tokens: Treating Them as Conditions for Continuous Autoregressive Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Peng and Wang, Junke and Chang, Yi and Yu, Yizhou and Ma, Rui and Wu, Zuxuan}, title = {Rethinking Discrete Tokens: Treating Them as Conditions for Continuous Autoregressive Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17390-17400} }
MotionLab: Unified Human Motion Generation and Editing via the Motion-Condition-Motion Paradigm-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Ziyan and Hu, Zeyu and Soh, De Wen and Zhao, Na}, title = {MotionLab: Unified Human Motion Generation and Editing via the Motion-Condition-Motion Paradigm}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13869-13879} }
Continual Multiple Instance Learning with Enhanced Localization for Histopathological Whole Slide Image Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Byung Hyun and Jeong, Wongi and Han, Woojae and Lee, Kyoungbun and Chun, Se Young}, title = {Continual Multiple Instance Learning with Enhanced Localization for Histopathological Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23232-23242} }
DisCoRD: Discrete Tokens to Continuous Motion via Rectified Flow Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cho_2025_ICCV, author = {Cho, Jungbin and Kim, Junwan and Kim, Jisoo and Kim, Minseo and Kang, Mingu and Hong, Sungeun and Oh, Tae-Hyun and Yu, Youngjae}, title = {DisCoRD: Discrete Tokens to Continuous Motion via Rectified Flow Decoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14602-14612} }
TREAD: Token Routing for Efficient Architecture-agnostic Diffusion Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Krause_2025_ICCV, author = {Krause, Felix and Phan, Timy and Gui, Ming and Baumann, Stefan Andreas and Hu, Vincent Tao and Ommer, Bj\"orn}, title = {TREAD: Token Routing for Efficient Architecture-agnostic Diffusion Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15703-15713} }
Leveraging Debiased Cross-modal Attention Maps and Code-based Reasoning for Zero-shot Referring Expression Comprehension-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Juntao and Shen, Wen and Wei, Zhihua and Sun, Lijun and Zhang, Hongyun}, title = {Leveraging Debiased Cross-modal Attention Maps and Code-based Reasoning for Zero-shot Referring Expression Comprehension}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20413-20424} }
Controllable Weather Synthesis and Removal with Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Chih-Hao and Wang, Zian and Liang, Ruofan and Zhang, Yuxuan and Fidler, Sanja and Wang, Shenlong and Gojcic, Zan}, title = {Controllable Weather Synthesis and Removal with Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13580-13591} }
Bias in Gender Bias Benchmarks: How Spurious Features Distort Evaluation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hirota_2025_ICCV, author = {Hirota, Yusuke and Hachiuma, Ryo and Li, Boyi and Lu, Ximing and Boone, Michael Ross and Ivanovic, Boris and Choi, Yejin and Pavone, Marco and Wang, Yu-Chiang Frank and Garcia, Noa and Nakashima, Yuta and Yang, Chao-Han Huck}, title = {Bias in Gender Bias Benchmarks: How Spurious Features Distort Evaluation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8634-8644} }
Clink! Chop! Thud! - Learning Object Sounds from Real-World Interactions-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Mengyu and Chen, Yiming and Pei, Haozheng and Agarwal, Siddhant and Vasudevan, Arun Balajee and Hays, James}, title = {Clink! Chop! Thud! - Learning Object Sounds from Real-World Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14549-14558} }
Instant GaussianImage: A Generalizable and Self-Adaptive Image Representation via 2D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Zhaojie and Wang, Yuesong and Guan, Tao and Yang, Chao and Ju, Lili}, title = {Instant GaussianImage: A Generalizable and Self-Adaptive Image Representation via 2D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27896-27905} }
Learning to Inference Adaptively for Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Zhuoyan and Nguyen, Khoi Duc and Mukherjee, Preeti and Bagchi, Saurabh and Chaterji, Somali and Liang, Yingyu and Li, Yin}, title = {Learning to Inference Adaptively for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3552-3563} }
HazeFlow: Revisit Haze Physical Model as ODE and Non-Homogeneous Haze Generation for Real-World Dehazing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shin_2025_ICCV, author = {Shin, Junseong and Chung, Seungwoo and Yang, Yunjeong and Kim, Tae Hyun}, title = {HazeFlow: Revisit Haze Physical Model as ODE and Non-Homogeneous Haze Generation for Real-World Dehazing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6263-6272} }
AcZeroTS: Active Learning for Zero-shot Tissue Segmentation in Pathology Images-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2025_ICCV, author = {Tang, Jiao and Zhou, Junjie and Qian, Bo and Wan, Peng and Zuo, Yingli and Shao, Wei and Zhang, Daoqiang}, title = {AcZeroTS: Active Learning for Zero-shot Tissue Segmentation in Pathology Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23508-23518} }
FaceShield: Defending Facial Image against Deepfake Threats-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_ICCV, author = {Jeong, Jaehwan and In, Sumin and Kim, Sieun and Shin, Hannie and Jeong, Jongheon and Yoon, Sang Ho and Chung, Jaewook and Kim, Sangpil}, title = {FaceShield: Defending Facial Image against Deepfake Threats}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10364-10374} }
SEAL: Semantic Aware Image Watermarking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Arabi_2025_ICCV, author = {Arabi, Kasra and Witter, R. Teal and Hegde, Chinmay and Cohen, Niv}, title = {SEAL: Semantic Aware Image Watermarking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16196-16205} }
Modeling Saliency Dataset Bias-
[pdf]
[supp]
[bibtex]@InProceedings{Kummerer_2025_ICCV, author = {K\"ummerer, Matthias and Khanuja, Harneet Singh and Bethge, Matthias}, title = {Modeling Saliency Dataset Bias}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22077-22088} }
MCOP: Multi-UAV Collaborative Occupancy Prediction-
[pdf]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Zefu and Chen, Wenbo and Jin, Xiaojuan and Yang, Yuran and Fan, Lue and Zhang, Yixin and Zhang, Yufeng and Zhang, Zhaoxiang}, title = {MCOP: Multi-UAV Collaborative Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27242-27251} }
Forecasting Continuous Non-Conservative Dynamical Systems in SO(3)-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bastian_2025_ICCV, author = {Bastian, Lennart and Rashed, Mohammad and Navab, Nassir and Birdal, Tolga}, title = {Forecasting Continuous Non-Conservative Dynamical Systems in SO(3)}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14845-14855} }
HoliTracer: Holistic Vectorization of Geographic Objects from Large-Size Remote Sensing Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yu and Dang, Bo and Li, Wanchun and Chen, Wei and Li, Yansheng}, title = {HoliTracer: Holistic Vectorization of Geographic Objects from Large-Size Remote Sensing Imagery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8482-8491} }
Tensor-aggregated LoRA in Federated Fine-tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhixuan and Xu, Binqian and Shu, Xiangbo and Zhang, Jiachao and Yao, Yazhou and Xie, Guo-Sen and Tang, Jinhui}, title = {Tensor-aggregated LoRA in Federated Fine-tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1058-1067} }
TextSSR: Diffusion-based Data Synthesis for Scene Text Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_ICCV, author = {Ye, Xingsong and Du, Yongkun and Tao, Yunbo and Chen, Zhineng}, title = {TextSSR: Diffusion-based Data Synthesis for Scene Text Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17464-17473} }
OVA-Fields: Weakly Supervised Open-Vocabulary Affordance Fields for Robot Operational Part Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2025_ICCV, author = {Su, Heng and Xie, Mengying and Cao, Nieqing and Ding, Yan and Shao, Beichen and Long, Xianlei and Gu, Fuqiang and Chen, Chao}, title = {OVA-Fields: Weakly Supervised Open-Vocabulary Affordance Fields for Robot Operational Part Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6385-6395} }
Tree Skeletonization from 3D Point Clouds by Denoising Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Marks_2025_ICCV, author = {Marks, Elias Ariel and Nunes, Lucas and Magistri, Federico and Sodano, Matteo and Marcuzzi, Rodrigo and Zimmermann, Lars and Behley, Jens and Stachniss, Cyrill}, title = {Tree Skeletonization from 3D Point Clouds by Denoising Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27607-27617} }
DiMPLe - Disentangled Multi-Modal Prompt Learning: Enhancing Out-Of-Distribution Alignment with Invariant and Spurious Feature Separation-
[pdf]
[supp]
[bibtex]@InProceedings{Rahman_2025_ICCV, author = {Rahman, Umaima and Yaqub, Mohammad and Mahapatra, Dwarikanath}, title = {DiMPLe - Disentangled Multi-Modal Prompt Learning: Enhancing Out-Of-Distribution Alignment with Invariant and Spurious Feature Separation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1634-1643} }
From Panels to Prose: Generating Literary Narratives from Comics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sachdeva_2025_ICCV, author = {Sachdeva, Ragav and Zisserman, Andrew}, title = {From Panels to Prose: Generating Literary Narratives from Comics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21864-21873} }
Collaborative Instance Object Navigation: Leveraging Uncertainty-Awareness to Minimize Human-Agent Dialogues-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Taioli_2025_ICCV, author = {Taioli, Francesco and Zorzi, Edoardo and Franchi, Gianni and Castellini, Alberto and Farinelli, Alessandro and Cristani, Marco and Wang, Yiming}, title = {Collaborative Instance Object Navigation: Leveraging Uncertainty-Awareness to Minimize Human-Agent Dialogues}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18781-18792} }
VMem: Consistent Interactive Video Scene Generation with Surfel-Indexed View Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Runjia and Torr, Philip and Vedaldi, Andrea and Jakab, Tomas}, title = {VMem: Consistent Interactive Video Scene Generation with Surfel-Indexed View Memory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25690-25699} }
Learning Null Geodesics for Gravitational Lensing Rendering in General Relativity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Mingyuan and Fang, Zheng and Wang, Jiaxu and Zhang, Kunyi and Zhang, Qiang and Xu, Renjing}, title = {Learning Null Geodesics for Gravitational Lensing Rendering in General Relativity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28473-28482} }
X-Capture: An Open-Source Portable Device for Multi-Sensory Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Clarke_2025_ICCV, author = {Clarke, Samuel and Wistreich, Suzannah and Ze, Yanjie and Wu, Jiajun}, title = {X-Capture: An Open-Source Portable Device for Multi-Sensory Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6436-6446} }
Towards Accurate and Efficient 3D Object Detection for Autonomous Driving: A Mixture of Experts Computing System on Edge-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Linshen and Su, Boyan and Jiang, Junyue and Wu, Guanlin and Guo, Cong and Xu, Ceyu and Yang, Hao Frank}, title = {Towards Accurate and Efficient 3D Object Detection for Autonomous Driving: A Mixture of Experts Computing System on Edge}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25903-25913} }
UrbanLLaVA: A Multi-modal Large Language Model for Urban Intelligence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_ICCV, author = {Feng, Jie and Wang, Shengyuan and Liu, Tianhui and Xi, Yanxin and Li, Yong}, title = {UrbanLLaVA: A Multi-modal Large Language Model for Urban Intelligence}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6209-6219} }
TITAN-Guide: Taming Inference-Time Alignment for Guided Text-to-Video Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Simon_2025_ICCV, author = {Simon, Christian and Ishii, Masato and Hayakawa, Akio and Zhong, Zhi and Takahashi, Shusuke and Shibuya, Takashi and Mitsufuji, Yuki}, title = {TITAN-Guide: Taming Inference-Time Alignment for Guided Text-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16662-16671} }
ART: Adaptive Relation Tuning for Generalized Relation Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sudhakaran_2025_ICCV, author = {Sudhakaran, Gopika and Shindo, Hikaru and Schramowski, Patrick and Schaub-Meyer, Simone and Kersting, Kristian and Roth, Stefan}, title = {ART: Adaptive Relation Tuning for Generalized Relation Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16323-16332} }
VLM4D: Towards Spatiotemporal Awareness in Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Shijie and Vilesov, Alexander and He, Xuehai and Wan, Ziyu and Zhang, Shuwang and Nagachandra, Aditya and Chang, Di and Chen, Dongdong and Wang, Xin Eric and Kadambi, Achuta}, title = {VLM4D: Towards Spatiotemporal Awareness in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8600-8612} }
DimensionX: Create Any 3D and 4D Scenes from a Single Image with Decoupled Video Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Wenqiang and Chen, Shuo and Liu, Fangfu and Chen, Zilong and Duan, Yueqi and Zhu, Jun and Zhang, Jun and Wang, Yikai}, title = {DimensionX: Create Any 3D and 4D Scenes from a Single Image with Decoupled Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13695-13706} }
Uncertainty-Driven Expert Control: Enhancing the Reliability of Medical Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Xiao and Wang, Di and Jiao, Zhicheng and Li, Ronghan and Yang, Pengfei and Wang, Quan and Chua, Tat-Seng}, title = {Uncertainty-Driven Expert Control: Enhancing the Reliability of Medical Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21144-21154} }
Mitigating Geometric Degradation in Fast DownSampling via FastAdapter for Point Cloud Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Shuofeng and Yan, Haibin}, title = {Mitigating Geometric Degradation in Fast DownSampling via FastAdapter for Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25983-25992} }
Dual-S3D: Hierarchical Dual-Path Selective SSM-CNN for High-Fidelity Implicit Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Luoxi and Shrestha, Pragyan and Zhou, Yu and Xie, Chun and Kitahara, Itaru}, title = {Dual-S3D: Hierarchical Dual-Path Selective SSM-CNN for High-Fidelity Implicit Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25104-25113} }
SparseFlex: High-Resolution and Arbitrary-Topology 3D Shape Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Xianglong and Zou, Zi-Xin and Chen, Chia-Hao and Guo, Yuan-Chen and Liang, Ding and Yuan, Chun and Ouyang, Wanli and Cao, Yan-Pei and Li, Yangguang}, title = {SparseFlex: High-Resolution and Arbitrary-Topology 3D Shape Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14822-14833} }
Multimodal LLM Guided Exploration and Active Mapping using Fisher Information-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Wen and Lei, Boshu and Ashton, Katrina and Daniilidis, Kostas}, title = {Multimodal LLM Guided Exploration and Active Mapping using Fisher Information}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5392-5404} }
Lidar Waveforms are Worth 40x128x33 Words-
[pdf]
[supp]
[bibtex]@InProceedings{Scheuble_2025_ICCV, author = {Scheuble, Dominik and Holzh\"uter, Hanno and Peters, Steven and Bijelic, Mario and Heide, Felix}, title = {Lidar Waveforms are Worth 40x128x33 Words}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28913-28924} }
LoRA.rar: Learning to Merge LoRAs via Hypernetworks for Subject-Style Conditioned Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Shenaj_2025_ICCV, author = {Shenaj, Donald and Bohdal, Ondrej and Ozay, Mete and Zanuttigh, Pietro and Michieli, Umberto}, title = {LoRA.rar: Learning to Merge LoRAs via Hypernetworks for Subject-Style Conditioned Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16132-16142} }
X-Fusion: Introducing New Modality to Frozen Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Mo_2025_ICCV, author = {Mo, Sicheng and Nguyen, Thao and Huang, Xun and Iyer, Siddharth Srinivasan and Li, Yijun and Liu, Yuchen and Tandon, Abhishek and Shechtman, Eli and Singh, Krishna Kumar and Lee, Yong Jae and Zhou, Bolei and Li, Yuheng}, title = {X-Fusion: Introducing New Modality to Frozen Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {228-238} }
KOEnsAttack: Towards Efficient Data-Free Black-Box Adversarial Attacks via Knowledge-Orthogonalized Substitute Ensembles-
[pdf]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Chaoyong and Yin, Jia-Li and Chen, Bin and Hu, Zhaozhe and Liu, Xiaolei and Lin, Wei}, title = {KOEnsAttack: Towards Efficient Data-Free Black-Box Adversarial Attacks via Knowledge-Orthogonalized Substitute Ensembles}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3101-3110} }
Multimodal Large Language Model-Guided ISP Hyperparameter Optimization with Dynamic Preference Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Xinyu and Zhao, Zhikun and Lang, Congyan and Li, Bing and Wang, Juan}, title = {Multimodal Large Language Model-Guided ISP Hyperparameter Optimization with Dynamic Preference Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {437-446} }
SALAD -- Semantics-Aware Logical Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Fucka_2025_ICCV, author = {Fu\v{c}ka, Matic and Zavrtanik, Vitjan and Sko\v{c}aj, Danijel}, title = {SALAD -- Semantics-Aware Logical Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21843-21852} }
Federated Continuous Category Discovery and Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Lixu and Liu, Chenxi and Guo, Junfeng and Ye, Qingqing and Huang, Heng and Hu, Haibo and Dong, Wei}, title = {Federated Continuous Category Discovery and Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2429-2439} }
Motion Synthesis with Sparse and Flexible Keyjoint Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hwang_2025_ICCV, author = {Hwang, Inwoo and Bae, Jinseok and Lim, Donggeun and Kim, Young Min}, title = {Motion Synthesis with Sparse and Flexible Keyjoint Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13203-13213} }
EVOLVE: Event-Guided Deformable Feature Transfer and Dual-Memory Refinement for Low-Light Video Object Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Baek_2025_ICCV, author = {Baek, Jong-Hyeon and Oh, Jiwon and Koh, Yeong Jun}, title = {EVOLVE: Event-Guided Deformable Feature Transfer and Dual-Memory Refinement for Low-Light Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11273-11282} }
ARGUS: Hallucination and Omission Evaluation in Video-LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rawal_2025_ICCV, author = {Rawal, Ruchit and Shirkavand, Reza and Huang, Heng and Somepalli, Gowthami and Goldstein, Tom}, title = {ARGUS: Hallucination and Omission Evaluation in Video-LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20280-20290} }
DiTaiListener: Controllable High Fidelity Listener Video Generation with Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Siniukov_2025_ICCV, author = {Siniukov, Maksim and Chang, Di and Tran, Minh and Gong, Hongkun and Chaubey, Ashutosh and Soleymani, Mohammad}, title = {DiTaiListener: Controllable High Fidelity Listener Video Generation with Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11991-12001} }
Humans as a Calibration Pattern: Dynamic 3D Scene Reconstruction from Unsynchronized and Uncalibrated Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2025_ICCV, author = {Choi, Changwoon and Kim, Jeongjun and Cha, Geonho and Kim, Minkwan and Wee, Dongyoon and Kim, Young Min}, title = {Humans as a Calibration Pattern: Dynamic 3D Scene Reconstruction from Unsynchronized and Uncalibrated Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6598-6608} }
ISP2HRNet: Learning to Reconstruct High Resolution Image from Irregularly Sampled Pixels via Hierarchical Gradient Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuanlin and Xiong, Ruiqin and Zhao, Rui and Wang, Jin and Fan, Xiaopeng and Huang, Tiejun}, title = {ISP2HRNet: Learning to Reconstruct High Resolution Image from Irregularly Sampled Pixels via Hierarchical Gradient Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11547-11557} }
Group Inertial Poser: Multi-Person Pose and Global Translation from Sparse Inertial Sensors and Ultra-Wideband Ranging-
[pdf]
[bibtex]@InProceedings{Xue_2025_ICCV, author = {Xue, Ying and Jiang, Jiaxi and Armani, Rayan and Hollidt, Dominik and Liao, Yi-Chi and Holz, Christian}, title = {Group Inertial Poser: Multi-Person Pose and Global Translation from Sparse Inertial Sensors and Ultra-Wideband Ranging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24910-24921} }
NormalLoc: Visual Localization on Textureless 3D Models using Surface Normals-
[pdf]
[supp]
[bibtex]@InProceedings{Abe_2025_ICCV, author = {Abe, Jiro and Nakano, Gaku and Ogura, Kazumine}, title = {NormalLoc: Visual Localization on Textureless 3D Models using Surface Normals}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25421-25430} }
Light-A-Video: Training-free Video Relighting via Progressive Light Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Yujie and Bu, Jiazi and Ling, Pengyang and Zhang, Pan and Wu, Tong and Huang, Qidong and Li, Jinsong and Dong, Xiaoyi and Zang, Yuhang and Cao, Yuhang and Rao, Anyi and Wang, Jiaqi and Niu, Li}, title = {Light-A-Video: Training-free Video Relighting via Progressive Light Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13315-13325} }
Decoupled Multi-Predictor Optimization for Inference-Efficient Model Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Liwei and Li, Shuaitengyuan and Ren, Dongwei and Wang, Qilong and Zhu, Pengfei and Hu, Qinghua}, title = {Decoupled Multi-Predictor Optimization for Inference-Efficient Model Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3628-3638} }
Deterministic Object Pose Confidence Region Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Jinghao and Li, Zhang and Wang, Zi and Guan, Banglei and Shang, Yang and Yu, Qifeng}, title = {Deterministic Object Pose Confidence Region Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14866-14875} }
MedVSR: Medical Video Super-Resolution with Cross State-Space Propagation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Xinyu and Sun, Guolei and Wang, Cheng and Yuan, Yixuan and Konukoglu, Ender}, title = {MedVSR: Medical Video Super-Resolution with Cross State-Space Propagation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11697-11707} }
MPBR: Multimodal Progressive Bidirectional Reasoning for Open-Set Fine-Grained Recognition-
[pdf]
[bibtex]@InProceedings{Tan_2025_ICCV, author = {Tan, Junfu and Jing, Peiguang and Zhu, Yu and Liu, Yu}, title = {MPBR: Multimodal Progressive Bidirectional Reasoning for Open-Set Fine-Grained Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1282-1291} }
Domain-aware Category-level Geometry Learning Segmentation for 3D Point Clouds-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Pei and Li, Lingling and Jiao, Licheng and Shang, Ronghua and Liu, Fang and Wang, Shuang and Liu, Xu and Ma, Wenping}, title = {Domain-aware Category-level Geometry Learning Segmentation for 3D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28324-28333} }
Di[M]O: Distilling Masked Diffusion Models into One-step Generator-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yuanzhi and Wang, Xi and Lathuili\`ere, St\'ephane and Kalogeiton, Vicky}, title = {Di[M]O: Distilling Masked Diffusion Models into One-step Generator}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18606-18618} }
TerraMind: Large-Scale Generative Multimodality for Earth Observation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jakubik_2025_ICCV, author = {Jakubik, Johannes and Yang, Felix and Blumenstiel, Benedikt and Scheurer, Erik and Sedona, Rocco and Maurogiovanni, Stefano and Bosmans, Jente and Dionelis, Nikolaos and Marsocci, Valerio and Kopp, Niklas and Ramachandran, Rahul and Fraccaro, Paolo and Brunschwiler, Thomas and Cavallaro, Gabriele and Bernabe-Moreno, Juan and Long\'ep\'e, Nicolas}, title = {TerraMind: Large-Scale Generative Multimodality for Earth Observation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7383-7394} }
An Efficient Post-hoc Framework for Reducing Task Discrepancy of Text Encoders for Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Byun_2025_ICCV, author = {Byun, Jaeseok and Jeong, Seokhyeon and Kim, Wonjae and Chun, Sanghyuk and Moon, Taesup}, title = {An Efficient Post-hoc Framework for Reducing Task Discrepancy of Text Encoders for Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3895-3904} }
Q-Norm: Robust Representation Learning via Quality-Adaptive Normalization-
[pdf]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Lanning and Zhou, Ying and Gao, Fei and Li, Ziyun and Qiao, Maoying and Xu, Jinlan and Wang, Nannan}, title = {Q-Norm: Robust Representation Learning via Quality-Adaptive Normalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13901-13911} }
VoiceCraft-Dub: Automated Video Dubbing with Neural Codec Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Sung-Bin_2025_ICCV, author = {Sung-Bin, Kim and Choi, Jeongsoo and Peng, Puyuan and Chung, Joon Son and Oh, Tae-Hyun and Harwath, David}, title = {VoiceCraft-Dub: Automated Video Dubbing with Neural Codec Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14623-14632} }
SUV: Suppressing Undesired Video Content via Semantic Modulation Based on Text Embeddings-
[pdf]
[supp]
[bibtex]@InProceedings{Lv_2025_ICCV, author = {Lv, Xiang and Shao, Mingwen and Meng, Lingzhuang and Liu, Chang and Wan, Yecong and Chen, Xinyuan}, title = {SUV: Suppressing Undesired Video Content via Semantic Modulation Based on Text Embeddings}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18357-18366} }
FonTS: Text Rendering With Typography and Style Controls-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Wenda and Song, Yiren and Zhang, Dengming and Liu, Jiaming and Zou, Xingxing}, title = {FonTS: Text Rendering With Typography and Style Controls}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18463-18474} }
Improving SAM for Camouflaged Object Detection via Dual Stream Adapters-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Jiaming and Kong, Linghe and Chen, Guihai}, title = {Improving SAM for Camouflaged Object Detection via Dual Stream Adapters}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21906-21916} }
DiSCO-3D : Discovering and Segmenting Sub-Concepts from Open-vocabulary Queries in NeRF-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Petit_2025_ICCV, author = {Petit, Doriand and Bourgeois, Steve and Gay-Bellile, Vincent and Chabot, Florian and Barthe, Lo{\"\i}c}, title = {DiSCO-3D : Discovering and Segmenting Sub-Concepts from Open-vocabulary Queries in NeRF}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20043-20052} }
PRIMAL: Physically Reactive and Interactive Motor Model for Avatar Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yan and Feng, Yao and Cseke, Alp\'ar and Saini, Nitin and Bajandas, Nathan and Heron, Nicolas and Black, Michael J.}, title = {PRIMAL: Physically Reactive and Interactive Motor Model for Avatar Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12725-12736} }
VCA: Video Curious Agent for Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Zeyuan and Chen, Delin and Yu, Xueyang and Shen, Maohao and Gan, Chuang}, title = {VCA: Video Curious Agent for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20168-20179} }
Self-Reinforcing Prototype Evolution with Dual-Knowledge Cooperation for Semi-Supervised Lifelong Person Re-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Kunlun and Zhuo, Fan and Li, Jiangmeng and Zou, Xu and Zhou, Jiahuan}, title = {Self-Reinforcing Prototype Evolution with Dual-Knowledge Cooperation for Semi-Supervised Lifelong Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3564-3574} }
What to Distill? Fast Knowledge Distillation with Adaptive Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Chae_2025_ICCV, author = {Chae, Byungchul and Heo, Seonyeong}, title = {What to Distill? Fast Knowledge Distillation with Adaptive Sampling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2407-2416} }
Revisiting Pool-based Prompt Learning for Few-shot Class-incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Yongwei and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {Revisiting Pool-based Prompt Learning for Few-shot Class-incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1303-1313} }
HOMO-Feature: Cross-Arbitrary-Modal Image Matching with Homomorphism of Organized Major Orientation-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Chenzhong and Li, Wei and Weng, Desheng}, title = {HOMO-Feature: Cross-Arbitrary-Modal Image Matching with Homomorphism of Organized Major Orientation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10538-10548} }
ForeSight: Multi-View Streaming Joint Object Detection and Trajectory Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Papais_2025_ICCV, author = {Papais, Sandro and Wang, Letian and Cheong, Brian and Waslander, Steven L.}, title = {ForeSight: Multi-View Streaming Joint Object Detection and Trajectory Forecasting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25474-25484} }
SG-LDM: Semantic-Guided LiDAR Generation via Latent-Aligned Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Xiang_2025_ICCV, author = {Xiang, Zhengkang and Li, Zizhao and Khodabandeh, Amir and Khoshelham, Kourosh}, title = {SG-LDM: Semantic-Guided LiDAR Generation via Latent-Aligned Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24965-24976} }
Dynamic Multi-Layer Null Space Projection for Vision-Language Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Kang_2025_ICCV, author = {Kang, Borui and Wang, Lei and Wu, Zhiping and Feng, Tao and Li, Yawen and Gao, Yang and Li, Wenbin}, title = {Dynamic Multi-Layer Null Space Projection for Vision-Language Continual Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2077-2086} }
AFUNet: Cross-Iterative Alignment-Fusion Synergy for HDR Reconstruction via Deep Unfolding Paradigm-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Xinyue and Ni, Zhangkai and Yang, Wenhan}, title = {AFUNet: Cross-Iterative Alignment-Fusion Synergy for HDR Reconstruction via Deep Unfolding Paradigm}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10666-10675} }
DiT4SR: Taming Diffusion Transformer for Real-World Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duan_2025_ICCV, author = {Duan, Zheng-Peng and Zhang, Jiawei and Jin, Xin and Zhang, Ziheng and Xiong, Zheng and Zou, Dongqing and Ren, Jimmy S. and Guo, Chunle and Li, Chongyi}, title = {DiT4SR: Taming Diffusion Transformer for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18948-18958} }
Balanced Sharpness-Aware Minimization for Imbalanced Regression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yahao and Wang, Qin and Duan, Lixin and Li, Wen}, title = {Balanced Sharpness-Aware Minimization for Imbalanced Regression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6242-6251} }
Dynamic-DINO: Fine-Grained Mixture of Experts Tuning for Real-time Open-Vocabulary Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Yehao and Weng, Minghe and Xiao, Zekang and Jiang, Rui and Su, Wei and Zheng, Guangcong and Lu, Ping and Li, Xi}, title = {Dynamic-DINO: Fine-Grained Mixture of Experts Tuning for Real-time Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20847-20856} }
Video Individual Counting for Moving Drones-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Yaowu and Wan, Jia and Han, Tao and Chan, Antoni B. and Ma, Andy J.}, title = {Video Individual Counting for Moving Drones}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12284-12293} }
BridgeDepth: Bridging Monocular and Stereo Reasoning with Latent Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2025_ICCV, author = {Guan, Tongfan and Guo, Jiaxin and Wang, Chen and Liu, Yun-Hui}, title = {BridgeDepth: Bridging Monocular and Stereo Reasoning with Latent Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27681-27691} }
SuperEvent: Cross-Modal Learning of Event-based Keypoint Detection for SLAM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Burkhardt_2025_ICCV, author = {Burkhardt, Yannick and Schaefer, Simon and Leutenegger, Stefan}, title = {SuperEvent: Cross-Modal Learning of Event-based Keypoint Detection for SLAM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8918-8928} }
ViLLa: Video Reasoning Segmentation with Large Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Rongkun and Qi, Lu and Chen, Xi and Wang, Yi and Wang, Kun and Zhao, Hengshuang}, title = {ViLLa: Video Reasoning Segmentation with Large Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23667-23677} }
Bridging Class Imbalance and Partial Labeling via Spectral-Balanced Energy Propagation for Skeleton-based Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yandan and Guo, Chenqi and Ma, Yinglong and Chen, Jiangyan and Gao, Yuan and Dong, Weiming}, title = {Bridging Class Imbalance and Partial Labeling via Spectral-Balanced Energy Propagation for Skeleton-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10162-10172} }
PRVQL: Progressive Knowledge-guided Refinement for Robust Egocentric Visual Query Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Bing and Feng, Yunhe and Tian, Yapeng and Liang, James Chenhao and Lin, Yuewei and Huang, Yan and Fan, Heng}, title = {PRVQL: Progressive Knowledge-guided Refinement for Robust Egocentric Visual Query Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5156-5165} }
ARIG: Autoregressive Interactive Head Generation for Real-time Conversations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Ying and Liu, Xi and Zhen, Cheng and Yan, Pengfei and Wei, Xiaoming}, title = {ARIG: Autoregressive Interactive Head Generation for Real-time Conversations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12956-12965} }
RARE: Refine Any Registration of Pairwise Point Clouds via Zero-Shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Chengyu and Huang, Jin and Chen, Honghua and Wei, Mingqiang}, title = {RARE: Refine Any Registration of Pairwise Point Clouds via Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26549-26558} }
Bayesian-Inspired Space-Time Superpixels-
[pdf]
[supp]
[bibtex]@InProceedings{Gauen_2025_ICCV, author = {Gauen, Kent and Chan, Stanley}, title = {Bayesian-Inspired Space-Time Superpixels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5382-5391} }
FEVER-OOD: Free Energy Vulnerability Elimination for Robust Out-of-Distribution Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Isaac-Medina_2025_ICCV, author = {Isaac-Medina, Brian K.S. and Che, Mauricio and Gaus, Yona Falinie A. and Akcay, Samet and Breckon, Toby P.}, title = {FEVER-OOD: Free Energy Vulnerability Elimination for Robust Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4529-4538} }
TRNAS: A Training-Free Robust Neural Architecture Search-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Yeming and Zhu, Qingling and Luo, Jianping and Wong, Ka-Chun and Lin, Qiuzhen and Li, Jianqiang}, title = {TRNAS: A Training-Free Robust Neural Architecture Search}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2336-2345} }
Hierarchical Divide-and-Conquer Grouping for Classification Adaptation of Pre-Trained Models-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Ziqian and Yu, Yunlong and Tong, Qinyue and Liu, Jun}, title = {Hierarchical Divide-and-Conquer Grouping for Classification Adaptation of Pre-Trained Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3575-3584} }
Text-IRSTD: Leveraging Semantic Text to Promote Infrared Small Target Detection in Complex Scenes-
[pdf]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Feng and Zheng, Shuyuan and Qiu, Zhaobing and Liu, Huanxian and Bai, Huanxin and Chen, Liqiong}, title = {Text-IRSTD: Leveraging Semantic Text to Promote Infrared Small Target Detection in Complex Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10635-10644} }
IntrinsicControlNet: Cross-distribution Image Generation with Real and Unreal-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Jiayuan and Xie, Rengan and Xie, Zixuan and Wu, Zhizhen and Xi, Dianbing and Ye, Qi and Wang, Rui and Bao, Hujun and Huo, Yuchi}, title = {IntrinsicControlNet: Cross-distribution Image Generation with Real and Unreal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27315-27325} }
Hypergraph Clustering Network with Partial Attribute Imputation-
[pdf]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Qianqian and Zhao, Bowen and Ding, Zhengming and Feng, Wei and Gao, Quanxue}, title = {Hypergraph Clustering Network with Partial Attribute Imputation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2697-2706} }
Flash-VStream: Efficient Real-Time Understanding for Long Video Streams-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Haoji and Wang, Yiqin and Tang, Yansong and Liu, Yong and Feng, Jiashi and Jin, Xiaojie}, title = {Flash-VStream: Efficient Real-Time Understanding for Long Video Streams}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21059-21069} }
RoboTron-Drive: All-in-One Large Multimodal Model for Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Zhijian and Feng, Chengjian and Yan, Feng and Xiao, Baihui and Jie, Zequn and Zhong, Yujie and Liang, Xiaodan and Ma, Lin}, title = {RoboTron-Drive: All-in-One Large Multimodal Model for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8011-8021} }
Dual-Process Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Grace and Granskog, Jonathan and Holynski, Aleksander and Darrell, Trevor}, title = {Dual-Process Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17972-17983} }
GroundFlow: A Plug-in Module for Temporal Reasoning on 3D Point Cloud Sequential Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Zijun and He, Shuting and Tan, Cheston and Wen, Bihan}, title = {GroundFlow: A Plug-in Module for Temporal Reasoning on 3D Point Cloud Sequential Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28774-28784} }
Long-Tailed Classification with Multi-Granularity Semantics-
[pdf]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yuting and Yang, Liu and Wang, Yu}, title = {Long-Tailed Classification with Multi-Granularity Semantics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4285-4294} }
DAA*: Deep Angular A Star for Image-based Path Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Zhiwei}, title = {DAA*: Deep Angular A Star for Image-based Path Planning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25284-25293} }
TrackVerse: A Large-Scale Object-Centric Video Dataset for Image-Level Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Yibing and Church, Samuel and Suciu, Victor and Lin, Jinhong and Wu, Cheng-En and Morgado, Pedro}, title = {TrackVerse: A Large-Scale Object-Centric Video Dataset for Image-Level Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11153-11163} }
Perspective-Invariant 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Ao and Kong, Lingdong and Lu, Dongyue and Liu, Youquan and Fang, Jian and Zhao, Huaici and Ooi, Wei Tsang}, title = {Perspective-Invariant 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27725-27738} }
Long-LRM: Long-sequence Large Reconstruction Model for Wide-coverage Gaussian Splats-
[pdf]
[supp]
[bibtex]@InProceedings{Ziwen_2025_ICCV, author = {Ziwen, Chen and Tan, Hao and Zhang, Kai and Bi, Sai and Luan, Fujun and Hong, Yicong and Fuxin, Li and Xu, Zexiang}, title = {Long-LRM: Long-sequence Large Reconstruction Model for Wide-coverage Gaussian Splats}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4349-4359} }
Unlearning the Noisy Correspondence Makes CLIP More Robust-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Haochen and Wang, Alex Jinpeng and Ye, Peijun and Liu, Fangming}, title = {Unlearning the Noisy Correspondence Makes CLIP More Robust}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4518-4528} }
HumanSAM: Classifying Human-centric Forgery Videos in Human Spatial, Appearance, and Motion Anomaly-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Chang and Ye, Yunfan and Zhang, Fan and Zhou, Qingyang and Luo, Yuchuan and Cai, Zhiping}, title = {HumanSAM: Classifying Human-centric Forgery Videos in Human Spatial, Appearance, and Motion Anomaly}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14028-14038} }
An Information-Theoretic Regularizer for Lossy Neural Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yingwen and Wang, Meng and Sheng, Xihua and Chen, Peilin and Li, Junru and Zhang, Li and Wang, Shiqi}, title = {An Information-Theoretic Regularizer for Lossy Neural Image Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15573-15582} }
ReTracker: Exploring Image Matching for Robust Online Any Point Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2025_ICCV, author = {Tan, Dongli and He, Xingyi and Peng, Sida and Gong, Yiqing and Zhu, Xing and Sun, Jiaming and Hu, Ruizhen and Shen, Yujun and Bao, Hujun and Zhou, Xiaowei}, title = {ReTracker: Exploring Image Matching for Robust Online Any Point Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4306-4316} }
DC-AE 1.5: Accelerating Diffusion Model Convergence with Structured Latent Space-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Junyu and Zou, Dongyun and He, Wenkun and Chen, Junsong and Xie, Enze and Han, Song and Cai, Han}, title = {DC-AE 1.5: Accelerating Diffusion Model Convergence with Structured Latent Space}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19628-19637} }
V2M4: 4D Mesh Animation Reconstruction from a Single Monocular Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Jianqi and Zhang, Biao and Tang, Xiangjun and Wonka, Peter}, title = {V2M4: 4D Mesh Animation Reconstruction from a Single Monocular Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11643-11653} }
MixA: A Mixed Attention approach with Stable Lightweight Linear Attention to enhance Efficiency of Vision Transformers at the Edge-
[pdf]
[supp]
[bibtex]@InProceedings{Ahmed_2025_ICCV, author = {Ahmed, Sabbir and Li, Jingtao and Zhuang, Weiming and Chen, Chen and Lyu, Lingjuan}, title = {MixA: A Mixed Attention approach with Stable Lightweight Linear Attention to enhance Efficiency of Vision Transformers at the Edge}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21187-21196} }
MUSE: Multi-Subject Unified Synthesis via Explicit Layout Semantic Expansion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_ICCV, author = {Peng, Fei and Wu, Junqiang and Li, Yan and Gao, Tingting and Zhang, Di and Fu, Huiyuan}, title = {MUSE: Multi-Subject Unified Synthesis via Explicit Layout Semantic Expansion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15885-15895} }
OpenSubstance: A High-quality Measured Dataset of Multi-View and -Lighting Images and Shapes-
[pdf]
[bibtex]@InProceedings{Pei_2025_ICCV, author = {Pei, Fan and Bai, Jinchen and Feng, Xiang and Bi, Zoubin and Zhou, Kun and Wu, Hongzhi}, title = {OpenSubstance: A High-quality Measured Dataset of Multi-View and -Lighting Images and Shapes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5221-5231} }
Overcoming Dual Drift for Continual Long-Tailed Visual Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Feifei and Wang, Zhihao and Zhang, Xi and Xu, Changsheng}, title = {Overcoming Dual Drift for Continual Long-Tailed Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4413-4423} }
LIRA: Inferring Segmentation in Large Multi-modal Models with Local Interleaved Region Assistance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhang and Yang, Biao and Liu, Qiang and Zhang, Shuo and Ma, Zhiyin and Yin, Liang and Deng, Linger and Sun, Yabo and Liu, Yuliang and Bai, Xiang}, title = {LIRA: Inferring Segmentation in Large Multi-modal Models with Local Interleaved Region Assistance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24056-24067} }
GARF: Learning Generalizable 3D Reassembly for Real-World Fractures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Sihang and Jiang, Zeyu and Chen, Grace and Xu, Chenyang and Tan, Siqi and Wang, Xue and Fang, Irving and Zyskowski, Kristof and McPherron, Shannon P. and Iovita, Radu and Feng, Chen and Zhang, Jing}, title = {GARF: Learning Generalizable 3D Reassembly for Real-World Fractures}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5711-5721} }
Adapting In-Domain Few-Shot Segmentation to New Domains without Source Domain Retraining-
[pdf]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Qi and Liu, Kaiqi and Liu, Nian and Cholakkal, Hisham and Anwer, Rao Muhammad and Li, Wenbin and Gao, Yang}, title = {Adapting In-Domain Few-Shot Segmentation to New Domains without Source Domain Retraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21429-21439} }
Exploring the Adversarial Vulnerabilities of Vision-Language-Action Models in Robotics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Taowen and Han, Cheng and Liang, James and Yang, Wenhao and Liu, Dongfang and Zhang, Luna Xinyu and Wang, Qifan and Luo, Jiebo and Tang, Ruixiang}, title = {Exploring the Adversarial Vulnerabilities of Vision-Language-Action Models in Robotics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6948-6958} }
Scaling Language-Free Visual Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, David and Tong, Shengbang and Zhu, Jiachen and Sinha, Koustuv and Liu, Zhuang and Chen, Xinlei and Rabbat, Michael and Ballas, Nicolas and LeCun, Yann and Bar, Amir and Xie, Saining}, title = {Scaling Language-Free Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {370-382} }
HUG: Hierarchical Urban Gaussian Splatting with Block-Based Reconstruction for Large-Scale Aerial Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2025_ICCV, author = {Su, Mai and Wang, Zhongtao and Au, Huishan and Li, Yilong and Cao, Xizhe and Pan, Chengwei and Chen, Yisong and Wang, Guoping}, title = {HUG: Hierarchical Urban Gaussian Splatting with Block-Based Reconstruction for Large-Scale Aerial Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28839-28848} }
ESSENTIAL: Episodic and Semantic Memory Integration for Video Class-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Jongseo and Bae, Kyungho and Min, Kyle and Park, Gyeong-Moon and Choi, Jinwoo}, title = {ESSENTIAL: Episodic and Semantic Memory Integration for Video Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17546-17556} }
Move to Understand a 3D Scene: Bridging Visual Grounding and Exploration for Efficient and Versatile Embodied Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Ziyu and Wang, Xilin and Li, Yixuan and Zhang, Zhuofan and Ma, Xiaojian and Chen, Yixin and Jia, Baoxiong and Liang, Wei and Yu, Qian and Deng, Zhidong and Huang, Siyuan and Li, Qing}, title = {Move to Understand a 3D Scene: Bridging Visual Grounding and Exploration for Efficient and Versatile Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8120-8132} }
Exploring The Visual Feature Space for Multimodal Neural Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_ICCV, author = {Xia, Weihao and Oztireli, Cengiz}, title = {Exploring The Visual Feature Space for Multimodal Neural Decoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4370-4379} }
INS-MMBench: A Comprehensive Benchmark for Evaluating LVLMs' Performance in Insurance-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Chenwei and Lyu, Hanjia and Xu, Xian and Luo, Jiebo}, title = {INS-MMBench: A Comprehensive Benchmark for Evaluating LVLMs' Performance in Insurance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9036-9047} }
Continual Personalization for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_ICCV, author = {Liao, Yu-Chien and Chen, Jr-Jen and Huang, Chi-Pin and Lin, Ci-Siang and Wu, Meng-Lin and Wang, Yu-Chiang Frank}, title = {Continual Personalization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15511-15520} }
Creation-MMBench: Assessing Context-Aware Creative Intelligence in MLLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2025_ICCV, author = {Fang, Xinyu and Chen, Zhijian and Lan, Kai and Ma, Lixin and Ding, Shengyuan and Liang, Yingji and Zhao, Xiangyu and Wen, Farong and Zhang, Zicheng and Zhang, Guofeng and Duan, Haodong and Chen, Kai and Lin, Dahua}, title = {Creation-MMBench: Assessing Context-Aware Creative Intelligence in MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {447-456} }
Seam360GS: Seamless 360deg Gaussian Splatting from Real-World Omnidirectional Images-
[pdf]
[supp]
[bibtex]@InProceedings{Shin_2025_ICCV, author = {Shin, Changha and Cho, Woong Oh and Kim, Seon Joo}, title = {Seam360GS: Seamless 360deg Gaussian Splatting from Real-World Omnidirectional Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28970-28979} }
Boosting Adversarial Transferability via Residual Perturbation Attack-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_ICCV, author = {Peng, Jinjia and Tao, Zeze and Wang, Huibing and Wang, Meng and Wang, Yang}, title = {Boosting Adversarial Transferability via Residual Perturbation Attack}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1261-1270} }
LDPose: Towards Inclusive Human Pose Estimation for Limb-Deficient Individuals in the Wild-
[pdf]
[supp]
[bibtex]@InProceedings{Ying_2025_ICCV, author = {Ying, Jiaying and Du, Heming and Zhang, Kaihao and Li, Lincheng and Yu, Xin}, title = {LDPose: Towards Inclusive Human Pose Estimation for Limb-Deficient Individuals in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9865-9875} }
Talking to DINO: Bridging Self-Supervised Vision Backbones with Language for Open-Vocabulary Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Barsellotti_2025_ICCV, author = {Barsellotti, Luca and Bianchi, Lorenzo and Messina, Nicola and Carrara, Fabio and Cornia, Marcella and Baraldi, Lorenzo and Falchi, Fabrizio and Cucchiara, Rita}, title = {Talking to DINO: Bridging Self-Supervised Vision Backbones with Language for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22025-22035} }
Language-Driven Multi-Label Zero-Shot Learning with Semantic Granularity-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Shouwen and Wan, Qian and Gao, Junbin and Zeng, Zhigang}, title = {Language-Driven Multi-Label Zero-Shot Learning with Semantic Granularity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1968-1978} }
Beyond Perspective: Neural 360-Degree Video Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Regensky_2025_ICCV, author = {Regensky, Andy and Windsheimer, Marc and Brand, Fabian and Kaup, Andre}, title = {Beyond Perspective: Neural 360-Degree Video Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16143-16153} }
Differentiable Room Acoustic Rendering with Multi-View Vision Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2025_ICCV, author = {Jin, Derong and Gao, Ruohan}, title = {Differentiable Room Acoustic Rendering with Multi-View Vision Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {37-47} }
An Inversion-based Measure of Memorization for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Zhe and Li, Qingming and Zhang, Xuhong and Du, Tianyu and Lin, Ruixiao and Wang, Zonghui and Ji, Shouling and Chen, Wenzhi}, title = {An Inversion-based Measure of Memorization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16959-16969} }
AgroBench: Vision-Language Model Benchmark in Agriculture-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shinoda_2025_ICCV, author = {Shinoda, Risa and Inoue, Nakamasa and Kataoka, Hirokatsu and Onishi, Masaki and Ushiku, Yoshitaka}, title = {AgroBench: Vision-Language Model Benchmark in Agriculture}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7634-7644} }
ROAR: Reducing Inversion Error in Generative Image Watermarking-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Hanyi and Fang, Han and Wang, Shi-Lin and Chang, Ee-Chien}, title = {ROAR: Reducing Inversion Error in Generative Image Watermarking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19742-19751} }
Adding Additional Control to One-Step Diffusion with Joint Distribution Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Yihong and Hu, Tianyang and Song, Yifan and Sun, Jiacheng and Li, Zhenguo and Tang, Jing}, title = {Adding Additional Control to One-Step Diffusion with Joint Distribution Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4009-4018} }
TimeBooth: Disentangled Facial Invariant Representation for Diverse and Personalized Face Aging-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2025_ICCV, author = {Su, Zepeng and Liu, Zhulin and Zhang, Zongyan and Zhang, Tong and Chen, C.L.Philip}, title = {TimeBooth: Disentangled Facial Invariant Representation for Diverse and Personalized Face Aging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12147-12157} }
Flexi-FSCIL: Adaptive Knowledge Retention for Breaking the Stability-Plasticity Dilemma in Few-Shot Class-Incremental Learning-
[pdf]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Wufei and Wang, Yalin and Liu, Chenliang and Jiang, Zhaohui and Yang, Xue}, title = {Flexi-FSCIL: Adaptive Knowledge Retention for Breaking the Stability-Plasticity Dilemma in Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2451-2460} }
Learning Beyond Still Frames: Scaling Vision-Language Models with Video-
[pdf]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yiyuan and Li, Handong and Liu, Jing and Yue, Xiangyu}, title = {Learning Beyond Still Frames: Scaling Vision-Language Models with Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22425-22435} }
MultiADS: Defect-aware Supervision for Multi-type Anomaly Detection and Segmentation in Zero-Shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sadikaj_2025_ICCV, author = {Sadikaj, Ylli and Zhou, Hongkuan and Halilaj, Lavdim and Schmid, Stefan and Staab, Steffen and Plant, Claudia}, title = {MultiADS: Defect-aware Supervision for Multi-type Anomaly Detection and Segmentation in Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22978-22988} }
SEGS-SLAM: Structure-enhanced 3D Gaussian Splatting SLAM with Appearance Embedding-
[pdf]
[supp]
[bibtex]@InProceedings{Wen_2025_ICCV, author = {Wen, Tianci and Liu, Zhiang and Fang, Yongchun}, title = {SEGS-SLAM: Structure-enhanced 3D Gaussian Splatting SLAM with Appearance Embedding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28103-28113} }
Images as Noisy Labels: Unleashing the Potential of the Diffusion Model for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Fan and Wang, Xuanbin and Wang, Xuan and Zhang, Zhaoxiang and Xu, Yuelei}, title = {Images as Noisy Labels: Unleashing the Potential of the Diffusion Model for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24255-24265} }
Tile-wise vs. Image-wise: Random-Tile Loss and Training Paradigm for Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiaoyu and Pan, Weihong and Xiang, Xiaojun and Zhai, Hongjia and Zhou, Liyang and Jiang, Hanqing and Zhang, Guofeng}, title = {Tile-wise vs. Image-wise: Random-Tile Loss and Training Paradigm for Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26923-26932} }
How To Make Your Cell Tracker Say "I dunno!"-
[pdf]
[supp]
[bibtex]@InProceedings{Paul_2025_ICCV, author = {Paul, Richard D. and Seiffarth, Johannes and R\"ugamer, David and N\"oh, Katharina and Scharr, Hanno}, title = {How To Make Your Cell Tracker Say ''I dunno!''}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6914-6923} }
Training-free and Adaptive Sparse Attention for Efficient Long Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_ICCV, author = {Xia, Yifei and Ling, Suhan and Fu, Fangcheng and Wang, Yujie and Li, Huixia and Xiao, Xuefeng and Cui, Bin}, title = {Training-free and Adaptive Sparse Attention for Efficient Long Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15982-15993} }
Super Resolved Imaging with Adaptive Optics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Swanson_2025_ICCV, author = {Swanson, Robin and Lin, Esther Y. H. and Lamb, Masen and Sivanandam, Suresh and Kutulakos, Kiriakos N.}, title = {Super Resolved Imaging with Adaptive Optics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29142-29152} }
BillBoard Splatting (BBSplat): Learnable Textured Primitives for Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Svitov_2025_ICCV, author = {Svitov, David and Morerio, Pietro and Agapito, Lourdes and Del Bue, Alessio}, title = {BillBoard Splatting (BBSplat): Learnable Textured Primitives for Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25029-25039} }
SDFit: 3D Object Pose and Shape by Fitting a Morphable SDF to a Single Image-
[pdf]
[supp]
[bibtex]@InProceedings{Antic_2025_ICCV, author = {Anti\'c, Dimitrije and Paschalidis, Georgios and Tripathi, Shashank and Gevers, Theo and Dwivedi, Sai Kumar and Tzionas, Dimitrios}, title = {SDFit: 3D Object Pose and Shape by Fitting a Morphable SDF to a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9616-9626} }
VRM: Knowledge Distillation via Virtual Relation Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Weijia and Xie, Fei and Cai, Weidong and Ma, Chao}, title = {VRM: Knowledge Distillation via Virtual Relation Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2707-2717} }
SC-Lane: Slope-aware and Consistent Road Height Estimation Framework for 3D Lane Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Chaesong and Seo, Eunbin and Hwang, Jihyeon and Lim, Jongwoo}, title = {SC-Lane: Slope-aware and Consistent Road Height Estimation Framework for 3D Lane Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28407-28416} }
Parametric Shadow Control for Portrait Generation in Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2025_ICCV, author = {Cai, Haoming and Huang, Tsung-Wei and Gehlot, Shiv and Feng, Brandon Y. and Shah, Sachin and Su, Guan-Ming and Metzler, Christopher}, title = {Parametric Shadow Control for Portrait Generation in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18207-18217} }
ZIUM: Zero-Shot Intent-Aware Adversarial Attack on Unlearned Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yook_2025_ICCV, author = {Yook, Hyun Jun and Jhun, Ga San and Cho, Jae Hyun and Jeon, Min and Kim, Donghyun and Kim, Tae Hyung and Lee, Youn Kyu}, title = {ZIUM: Zero-Shot Intent-Aware Adversarial Attack on Unlearned Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3926-3935} }
Beyond Next-Token: Next-X Prediction for Autoregressive Visual Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2025_ICCV, author = {Ren, Sucheng and Yu, Qihang and He, Ju and Shen, Xiaohui and Yuille, Alan and Chen, Liang-Chieh}, title = {Beyond Next-Token: Next-X Prediction for Autoregressive Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15781-15791} }
GECO: Geometrically Consistent Embedding with Lightspeed Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hartwig_2025_ICCV, author = {Hartwig, Regine and Muhle, Dominik and Marin, Riccardo and Cremers, Daniel}, title = {GECO: Geometrically Consistent Embedding with Lightspeed Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9309-9319} }
Marigold-DC: Zero-Shot Monocular Depth Completion with Guided Diffusion-
[pdf]
[bibtex]@InProceedings{Viola_2025_ICCV, author = {Viola, Massimiliano and Qu, Kevin and Metzger, Nando and Ke, Bingxin and Becker, Alexander and Schindler, Konrad and Obukhov, Anton}, title = {Marigold-DC: Zero-Shot Monocular Depth Completion with Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5359-5370} }
MedSegFactory: Text-Guided Generation of Medical Image-Mask Pairs-
[pdf]
[arXiv]
[bibtex]@InProceedings{Mao_2025_ICCV, author = {Mao, Jiawei and Wang, Yuhan and Tang, Yucheng and Xu, Daguang and Wang, Kang and Yang, Yang and Zhou, Zongwei and Zhou, Yuyin}, title = {MedSegFactory: Text-Guided Generation of Medical Image-Mask Pairs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21525-21535} }
TurboTrain: Towards Efficient and Balanced Multi-Task Learning for Multi-Agent Perception and Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zewei and Zhao, Seth Z. and Cai, Tianhui and Huang, Zhiyu and Zhou, Bolei and Ma, Jiaqi}, title = {TurboTrain: Towards Efficient and Balanced Multi-Task Learning for Multi-Agent Perception and Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4391-4402} }
FiVE-Bench: A Fine-grained Video Editing Benchmark for Evaluating Emerging Diffusion and Rectified Flow Models-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Minghan and Xie, Chenxi and Wu, Yichen and Zhang, Lei and Wang, Mengyu}, title = {FiVE-Bench: A Fine-grained Video Editing Benchmark for Evaluating Emerging Diffusion and Rectified Flow Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16672-16681} }
ReAL-AD: Towards Human-Like Reasoning in End-to-End Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Yuhang and Tu, Jiadong and Ma, Yuexin and Zhu, Xinge}, title = {ReAL-AD: Towards Human-Like Reasoning in End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27783-27793} }
ReME: A Data-Centric Framework for Training-Free Open-Vocabulary Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xuan_2025_ICCV, author = {Xuan, Xiwei and Deng, Ziquan and Ma, Kwan-Liu}, title = {ReME: A Data-Centric Framework for Training-Free Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20954-20965} }
Sparse Fine-Tuning of Transformers for Generative Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Wei and Yu, Jingxi and Miao, Zichen and Qiu, Qiang}, title = {Sparse Fine-Tuning of Transformers for Generative Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18703-18713} }
Unlocking Constraints: Source-Free Occlusion-Aware Seamless Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Yihong and Zhang, Jiaming and Zheng, Xu and Shi, Hao and Peng, Kunyu and Liu, Hang and Yang, Kailun and Zhang, Hui}, title = {Unlocking Constraints: Source-Free Occlusion-Aware Seamless Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8961-8972} }
A Visual Leap in CLIP Compositionality Reasoning through Generation of Counterfactual Sets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2025_ICCV, author = {Jia, Zexi and Huang, Chuanwei and Fei, Hongyan and Zhu, Yeshuang and Yuan, Zhiqiang and Deng, Ying and Zhang, Jiapei and Zhang, Jinchao and Zhou, Jie}, title = {A Visual Leap in CLIP Compositionality Reasoning through Generation of Counterfactual Sets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23498-23507} }
Boosting Adversarial Transferability via Negative Hessian Trace Regularization-
[pdf]
[supp]
[bibtex]@InProceedings{Long_2025_ICCV, author = {Long, Yunfei and Tian, Zilin and Zhang, Liguo and Xu, Huosheng}, title = {Boosting Adversarial Transferability via Negative Hessian Trace Regularization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2386-2395} }
AutoOcc: Automatic Open-Ended Semantic Occupancy Annotation via Vision-Language Guided Gaussian Splatting-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Xiaoyu and Wang, Jingqi and Wang, Yongtao and Wei, Yufei and Dong, Nan and Yang, Ming-Hsuan}, title = {AutoOcc: Automatic Open-Ended Semantic Occupancy Annotation via Vision-Language Guided Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3367-3377} }
Scalable Dual Fingerprinting for Hierarchical Attribution of Text-to-Image Models-
[pdf]
[bibtex]@InProceedings{Fei_2025_ICCV, author = {Fei, Jianwei and Dai, Yunshu and Yu, Peipeng and Kong, Zhe and Zhou, Jiantao and Xia, Zhihua}, title = {Scalable Dual Fingerprinting for Hierarchical Attribution of Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15025-15034} }
Improving Noise Efficiency in Privacy-preserving Dataset Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Runkai and Dasu, Vishnu Asutosh and Wang, Yinong Oliver and Wang, Haohan and De La Torre, Fernando}, title = {Improving Noise Efficiency in Privacy-preserving Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4838-4847} }
Parameter-Efficient Adaptation of Geospatial Foundation Models through Embedding Deflection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thoreau_2025_ICCV, author = {Thoreau, Romain and Marsocci, Valerio and Derksen, Dawa}, title = {Parameter-Efficient Adaptation of Geospatial Foundation Models through Embedding Deflection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9594-9604} }
UniFuse: A Unified All-in-One Framework for Multi-Modal Medical Image Fusion Under Diverse Degradations and Misalignments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2025_ICCV, author = {Su, Dayong and Zhang, Yafei and Li, Huafeng and Li, Jinxing and Liu, Yu}, title = {UniFuse: A Unified All-in-One Framework for Multi-Modal Medical Image Fusion Under Diverse Degradations and Misalignments}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14238-14247} }
Coupling the Generator with Teacher for Effective Data-Free Knowledge Distillation-
[pdf]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Xu and Li, Yang and Han, Yahong and Xu, Guangquan and Shen, Jialie}, title = {Coupling the Generator with Teacher for Effective Data-Free Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2152-2160} }
MOSCATO: Predicting Multiple Object State Change Through Actions-
[pdf]
[supp]
[bibtex]@InProceedings{Zameni_2025_ICCV, author = {Zameni, Parnian and Shen, Yuhan and Elhamifar, Ehsan}, title = {MOSCATO: Predicting Multiple Object State Change Through Actions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11600-11611} }
Harnessing Input-Adaptive Inference for Efficient VLN-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_ICCV, author = {Kang, Dongwoo and Perincherry, Akhil and Coalson, Zachary and Gabriel, Aiden and Lee, Stefan and Hong, Sanghyun}, title = {Harnessing Input-Adaptive Inference for Efficient VLN}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8219-8229} }
LoftUp: Learning a Coordinate-Based Feature Upsampler for Vision Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Haiwen and Chen, Anpei and Havrylov, Volodymyr and Geiger, Andreas and Zhang, Dan}, title = {LoftUp: Learning a Coordinate-Based Feature Upsampler for Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9913-9923} }
EMatch: A Unified Framework for Event-based Optical Flow and Stereo Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Pengjie and Zhu, Lin and Wang, Xiao and Wang, Lizhi and Huang, Hua}, title = {EMatch: A Unified Framework for Event-based Optical Flow and Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5845-5855} }
Superpowering Open-Vocabulary Object Detectors for X-ray Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Garcia-Fernandez_2025_ICCV, author = {Garcia-Fernandez, Pablo and Vaquero, Lorenzo and Liu, Mingxuan and Xue, Feng and Cores, Daniel and Sebe, Nicu and Mucientes, Manuel and Ricci, Elisa}, title = {Superpowering Open-Vocabulary Object Detectors for X-ray Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20770-20779} }
LVBench: An Extreme Long Video Understanding Benchmark-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Weihan and He, Zehai and Hong, Wenyi and Cheng, Yean and Zhang, Xiaohan and Qi, Ji and Ding, Ming and Gu, Xiaotao and Huang, Shiyu and Xu, Bin and Dong, Yuxiao and Tang, Jie}, title = {LVBench: An Extreme Long Video Understanding Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22958-22967} }
Scalable Image Tokenization with Index Backpropagation Quantization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Fengyuan and Luo, Zhuoyan and Ge, Yixiao and Yang, Yujiu and Shan, Ying and Wang, Limin}, title = {Scalable Image Tokenization with Index Backpropagation Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16037-16046} }
InsViE-1M: Effective Instruction-based Video Editing with Elaborate Dataset Construction-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Yuhui and Chen, Liyi and Li, Ruibin and Wang, Shihao and Xie, Chenxi and Zhang, Lei}, title = {InsViE-1M: Effective Instruction-based Video Editing with Elaborate Dataset Construction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16692-16701} }
SpatialCrafter: Unleashing the Imagination of Video Diffusion Models for Scene Reconstruction from Limited Observations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Songchun and Xu, Huiyao and Guo, Sitong and Xie, Zhongwei and Bao, Hujun and Xu, Weiwei and Zou, Changqing}, title = {SpatialCrafter: Unleashing the Imagination of Video Diffusion Models for Scene Reconstruction from Limited Observations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27794-27805} }
Multi-Granular Spatio-Temporal Token Merging for Training-Free Acceleration of Video LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hyun_2025_ICCV, author = {Hyun, Jeongseok and Hwang, Sukjun and Han, Su Ho and Kim, Taeoh and Lee, Inwoong and Wee, Dongyoon and Lee, Joon-Young and Kim, Seon Joo and Shim, Minho}, title = {Multi-Granular Spatio-Temporal Token Merging for Training-Free Acceleration of Video LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23990-24000} }
ONLY: One-Layer Intervention Sufficiently Mitigates Hallucinations in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wan_2025_ICCV, author = {Wan, Zifu and Zhang, Ce and Yong, Silong and Ma, Martin Q. and Stepputtis, Simon and Morency, Louis-Philippe and Ramanan, Deva and Sycara, Katia and Xie, Yaqi}, title = {ONLY: One-Layer Intervention Sufficiently Mitigates Hallucinations in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3225-3234} }
Inverse Image-Based Rendering for Light Field Generation from Single Images-
[pdf]
[supp]
[bibtex]@InProceedings{Jung_2025_ICCV, author = {Jung, Hyunjun and Jeon, Hae-Gon}, title = {Inverse Image-Based Rendering for Light Field Generation from Single Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24739-24749} }
Morph: A Motion-free Physics Optimization Framework for Human Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhuo and Luo, Mingshuang and Hou, Ruibing and Zhao, Xin and Liu, Hao and Chang, Hong and Liu, Zimo and Li, Chen}, title = {Morph: A Motion-free Physics Optimization Framework for Human Motion Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14580-14589} }
Generalized and Efficient 2D Gaussian Splatting for Arbitrary-scale Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Du and Chen, Liyi and Zhang, Zhengqiang and Zhang, Lei}, title = {Generalized and Efficient 2D Gaussian Splatting for Arbitrary-scale Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26435-26445} }
Pretend Benign: A Stealthy Adversarial Attack by Exploiting Vulnerabilities in Cooperative Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Hongwei and Pan, Dongyu and Xia, Qiming and Wu, Hai and Wang, Cheng and Shen, Siqi and Wen, Chenglu}, title = {Pretend Benign: A Stealthy Adversarial Attack by Exploiting Vulnerabilities in Cooperative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19947-19956} }
ProbMED: A Probabilistic Framework for Medical Multimodal Binding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Yuan and Kim, Sangwook and You, Jianzhong and McIntosh, Chris}, title = {ProbMED: A Probabilistic Framework for Medical Multimodal Binding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20157-20167} }
Kestrel: 3D Multimodal LLM for Part-Aware Grounded Description-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ahmed_2025_ICCV, author = {Ahmed, Mahmoud and Fei, Junjie and Ding, Jian and Bakr, Eslam Mohamed and Elhoseiny, Mohamed}, title = {Kestrel: 3D Multimodal LLM for Part-Aware Grounded Description}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8973-8983} }
Precise Action-to-Video Generation Through Visual Action Prompts-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuang and Wen, Chao and Guo, Haoyu and Peng, Sida and Qin, Minghan and Bao, Hujun and Zhou, Xiaowei and Hu, Ruizhen}, title = {Precise Action-to-Video Generation Through Visual Action Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12713-12724} }
QuickSplat: Fast 3D Surface Reconstruction via Learned Gaussian Initialization-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yueh-Cheng and H\"ollein, Lukas and Nie{\ss}ner, Matthias and Dai, Angela}, title = {QuickSplat: Fast 3D Surface Reconstruction via Learned Gaussian Initialization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27851-27861} }
EquiCaps: Predictor-Free Pose-Aware Pre-Trained Capsule Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Konstantinou_2025_ICCV, author = {Konstantinou, Athinoulla and Leontidis, Georgios and Thota, Mamatha and Durrant, Aiden}, title = {EquiCaps: Predictor-Free Pose-Aware Pre-Trained Capsule Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7947-7957} }
Feature Decomposition-Recomposition in Large Vision-Language Model for Few-Shot Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2025_ICCV, author = {Xue, Zongyao and Kan, Meina and Shan, Shiguang and Chen, Xilin}, title = {Feature Decomposition-Recomposition in Large Vision-Language Model for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3153-3162} }
Taming Flow Matching with Unbalanced Optimal Transport into Fast Pansharpening-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Zihan and Zhong, Yu and Deng, Liang-Jian}, title = {Taming Flow Matching with Unbalanced Optimal Transport into Fast Pansharpening}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2803-2813} }
Learning to Unlearn while Retaining: Combating Gradient Conflicts in Machine Unlearning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Patel_2025_ICCV, author = {Patel, Gaurav and Qiu, Qiang}, title = {Learning to Unlearn while Retaining: Combating Gradient Conflicts in Machine Unlearning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4211-4221} }
Scaling and Taming Adversarial Training with Synthetic Data-
[pdf]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Juntao and Huang, Xianting and Chen, Yu and Pang, Shuai and Wang, Ke}, title = {Scaling and Taming Adversarial Training with Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2951-2960} }
FramePainter: Endowing Interactive Image Editing with Video Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yabo and Zhou, Xinpeng and Zeng, Yihan and Xu, Hang and Li, Hui and Zuo, Wangmeng}, title = {FramePainter: Endowing Interactive Image Editing with Video Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18121-18131} }
MetaScope: Optics-Driven Neural Network for Ultra-Micro Metalens Endoscopy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Wuyang and Pan, Wentao and Liu, Xiaoyuan and Luo, Zhendong and Li, Chenxin and Liu, Hengyu and Tsai, Din Ping and Chen, Mu Ku and Yuan, Yixuan}, title = {MetaScope: Optics-Driven Neural Network for Ultra-Micro Metalens Endoscopy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25938-25950} }
Removing Cost Volumes from Optical Flow Estimators-
[pdf]
[supp]
[bibtex]@InProceedings{Kiefhaber_2025_ICCV, author = {Kiefhaber, Simon and Roth, Stefan and Schaub-Meyer, Simone}, title = {Removing Cost Volumes from Optical Flow Estimators}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {79-89} }
SparseRecon: Neural Implicit Surface Reconstruction from Sparse Views with Feature and Depth Consistencies-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Liang and Zhang, Xu and Song, Haichuan and Shi, Kanle and Liu, Yu-Shen and Han, Zhizhong}, title = {SparseRecon: Neural Implicit Surface Reconstruction from Sparse Views with Feature and Depth Consistencies}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28514-28524} }
LMM4LMM: Benchmarking and Evaluating Large-multimodal Image Generation with LMMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Jiarui and Duan, Huiyu and Zhao, Yu and Wang, Juntong and Zhai, Guangtao and Min, Xiongkuo}, title = {LMM4LMM: Benchmarking and Evaluating Large-multimodal Image Generation with LMMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17312-17323} }
Leveraging the Power of MLLMs for Gloss-Free Sign Language Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jungeun and Jeon, Hyeongwoo and Bae, Jongseong and Kim, Ha Young}, title = {Leveraging the Power of MLLMs for Gloss-Free Sign Language Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21048-21058} }
Cross-modal Ship Re-Identification via Optical and SAR Imagery: A Novel Dataset and Method-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Han and Li, Shengyang and Yang, Jian and Liu, Yuxuan and Lv, Yixuan and Zhou, Zhuang}, title = {Cross-modal Ship Re-Identification via Optical and SAR Imagery: A Novel Dataset and Method}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7873-7883} }
Hybrid-grained Feature Aggregation with Coarse-to-fine Language Guidance for Self-supervised Monocular Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wenyao and Liu, Hongsi and Li, Bohan and He, Jiawei and Qi, Zekun and Wang, Yunnan and Zhao, Shengyang and Yu, Xinqiang and Zeng, Wenjun and Jin, Xin}, title = {Hybrid-grained Feature Aggregation with Coarse-to-fine Language Guidance for Self-supervised Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6678-6692} }
GaussianSpeech: Audio-Driven Personalized 3D Gaussian Avatars-
[pdf]
[supp]
[bibtex]@InProceedings{Aneja_2025_ICCV, author = {Aneja, Shivangi and Sevastopolsky, Artem and Kirschstein, Tobias and Thies, Justus and Dai, Angela and Nie{\ss}ner, Matthias}, title = {GaussianSpeech: Audio-Driven Personalized 3D Gaussian Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13065-13075} }
MonoFusion: Sparse-View 4D Reconstruction via Monocular Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zihan and Tan, Jeff and Khurana, Tarasha and Peri, Neehar and Ramanan, Deva}, title = {MonoFusion: Sparse-View 4D Reconstruction via Monocular Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8252-8263} }
SIGMAN: Scaling 3D Human Gaussian Generation with Millions of Assets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Yuhang and Liu, Fengqi and Lu, Yixing and Zhao, Qin and Wu, Pingyu and Zhai, Wei and Yi, Ran and Cao, Yang and Ma, Lizhuang and Zha, Zheng-Jun and Dong, Junting}, title = {SIGMAN: Scaling 3D Human Gaussian Generation with Millions of Assets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5122-5133} }
Reusing Computation in Text-to-Image Diffusion for Efficient Generation of Image Sets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Decatur_2025_ICCV, author = {Decatur, Dale and Groueix, Thibault and Yifan, Wang and Hanocka, Rana and Kim, Vladimir and Gadelha, Matheus}, title = {Reusing Computation in Text-to-Image Diffusion for Efficient Generation of Image Sets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16482-16491} }
Dita: Scaling Diffusion Transformer for Generalist Vision-Language-Action Policy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hou_2025_ICCV, author = {Hou, Zhi and Zhang, Tianyi and Xiong, Yuwen and Duan, Haonan and Pu, Hengjun and Tong, Ronglei and Zhao, Chengyang and Zhu, Xizhou and Qiao, Yu and Dai, Jifeng and Chen, Yuntao}, title = {Dita: Scaling Diffusion Transformer for Generalist Vision-Language-Action Policy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7686-7697} }
Tree-NeRV: Efficient Non-Uniform Sampling for Neural Video Representation via Tree-Structured Feature Grids-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Jiancheng and Zhan, Yifan and Zhu, Qingtian and Ma, Mingze and Niu, Muyao and Wan, Zunian and Ji, Xiang and Zheng, Yinqiang}, title = {Tree-NeRV: Efficient Non-Uniform Sampling for Neural Video Representation via Tree-Structured Feature Grids}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15076-15085} }
GLEAM: Enhanced Transferable Adversarial Attacks for Vision-Language Pre-training Models via Global-Local Transformations-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yunqi and Ouyang, Xue and Cui, Xiaohui}, title = {GLEAM: Enhanced Transferable Adversarial Attacks for Vision-Language Pre-training Models via Global-Local Transformations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1665-1674} }
UniMLVG: Unified Framework for Multi-view Long Video Generation with Comprehensive Control Capabilities for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Rui and Wu, Zehuan and Liu, Yichen and Guo, Yuxin and Ni, Jingcheng and Xia, Haifeng and Xia, Siyu}, title = {UniMLVG: Unified Framework for Multi-view Long Video Generation with Comprehensive Control Capabilities for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25453-25463} }
CaliMatch: Adaptive Calibration for Improving Safe Semi-supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bae_2025_ICCV, author = {Bae, Jinsoo and Kim, Seoung Bum and Do, Hyungrok}, title = {CaliMatch: Adaptive Calibration for Improving Safe Semi-supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2867-2876} }
CogCM: Cognition-Inspired Contextual Modeling for Audio-Visual Speech Enhancement-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Feixiang and Yang, Shuang and Shan, Shiguang and Chen, Xilin}, title = {CogCM: Cognition-Inspired Contextual Modeling for Audio-Visual Speech Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21408-21418} }
Scalable Ranked Preference Optimization for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karthik_2025_ICCV, author = {Karthik, Shyamgopal and Coskun, Huseyin and Akata, Zeynep and Tulyakov, Sergey and Ren, Jian and Kag, Anil}, title = {Scalable Ranked Preference Optimization for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18399-18410} }
Aligning Vision to Language: Annotation-Free Multimodal Knowledge Graph Construction for Enhanced LLMs Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Junming and Meng, Siyuan and Gao, Yanting and Mao, Song and Cai, Pinlong and Yan, Guohang and Chen, Yirong and Bian, Zilin and Wang, Ding and Shi, Botian}, title = {Aligning Vision to Language: Annotation-Free Multimodal Knowledge Graph Construction for Enhanced LLMs Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {981-992} }
QR-LoRA: Efficient and Disentangled Fine-tuning via QR Decomposition for Customized Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Jiahui and Ma, Yongjia and Di, Donglin and Cui, Jianxun and Li, Hao and Chen, Wei and Xie, Yan and Yang, Xun and Zuo, Wangmeng}, title = {QR-LoRA: Efficient and Disentangled Fine-tuning via QR Decomposition for Customized Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17587-17597} }
Uncertainty-Aware Diffusion-Guided Refinement of 3D Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bose_2025_ICCV, author = {Bose, Sarosij and Dutta, Arindam and Nag, Sayak and Zhang, Junge and Li, Jiachen and Karydis, Konstantinos and Roy-Chowdhury, Amit K.}, title = {Uncertainty-Aware Diffusion-Guided Refinement of 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28271-28281} }
GestureLSM: Latent Shortcut based Co-Speech Gesture Generation with Spatial-Temporal Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Pinxin and Song, Luchuan and Huang, Junhua and Liu, Haiyang and Xu, Chenliang}, title = {GestureLSM: Latent Shortcut based Co-Speech Gesture Generation with Spatial-Temporal Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10929-10939} }
LLaFEA: Frame-Event Complementary Fusion for Fine-Grained Spatiotemporal Understanding in LMMs-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Hanyu and Lee, Gim Hee}, title = {LLaFEA: Frame-Event Complementary Fusion for Fine-Grained Spatiotemporal Understanding in LMMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22294-22304} }
GCAV: A Global Concept Activation Vector Framework for Cross-Layer Consistency in Interpretability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Zhenghao and Sinha, Sanchit and Xiong, Guangzhi and Zhang, Aidong}, title = {GCAV: A Global Concept Activation Vector Framework for Cross-Layer Consistency in Interpretability}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {614-623} }
FastPoint: Accelerating 3D Point Cloud Model Inference via Sample Point Distance Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Donghyun and Jeong, Dawoon and Lee, Jae W. and Yoon, Hongil}, title = {FastPoint: Accelerating 3D Point Cloud Model Inference via Sample Point Distance Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25114-25123} }
GSRecon: Efficient Generalizable Gaussian Splatting for Surface Reconstruction from Sparse Views-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Hang and Hui, Le and Qian, Jianjun and Xie, Jin and Yang, Jian}, title = {GSRecon: Efficient Generalizable Gaussian Splatting for Surface Reconstruction from Sparse Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25346-25356} }
WAVE: Warp-Based View Guidance for Consistent Novel View Synthesis Using a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Jiwoo and Choi, Tae Eun and Jun, Youngjun and Hwang, Seong Jae}, title = {WAVE: Warp-Based View Guidance for Consistent Novel View Synthesis Using a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11906-11915} }
Blended Point Cloud Diffusion for Localized Text-guided Shape Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sella_2025_ICCV, author = {Sella, Etai and Atia, Noam and Mokady, Ron and Averbuch-Elor, Hadar}, title = {Blended Point Cloud Diffusion for Localized Text-guided Shape Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19119-19129} }
Beyond [cls]: Exploring the True Potential of Masked Image Modeling Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Przewiezlikowski_2025_ICCV, author = {Przewi\k{e}\'zlikowski, Marcin and Balestriero, Randall and Jasi\'nski, Wojciech and \'Smieja, Marek and Zieli\'nski, Bartosz}, title = {Beyond [cls]: Exploring the True Potential of Masked Image Modeling Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23442-23452} }
AnimateAnyMesh: A Feed-Forward 4D Foundation Model for Text-Driven Universal Mesh Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Zijie and Yu, Chaohui and Wang, Fan and Bai, Xiang}, title = {AnimateAnyMesh: A Feed-Forward 4D Foundation Model for Text-Driven Universal Mesh Animation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13557-13568} }
PhysRig: Differentiable Physics-Based Skinning and Rigging Framework for Realistic Articulated Object Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hao and Xu, Haolan and Feng, Chun and Jampani, Varun and Ahuja, Narendra}, title = {PhysRig: Differentiable Physics-Based Skinning and Rigging Framework for Realistic Articulated Object Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6609-6620} }
SpikePack: Enhanced Information Flow in Spiking Neural Networks with High Hardware Compatibility-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2025_ICCV, author = {Shen, Guobin and Li, Jindong and Li, Tenglong and Zhao, Dongcheng and Zeng, Yi}, title = {SpikePack: Enhanced Information Flow in Spiking Neural Networks with High Hardware Compatibility}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23385-23395} }
Frequency-Aware Autoregressive Modeling for Efficient High-Resolution Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Zhuokun and Fan, Jugang and Yu, Zhuowei and Zhuang, Bohan and Tan, Mingkui}, title = {Frequency-Aware Autoregressive Modeling for Efficient High-Resolution Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17140-17149} }
Efficient Track Anything-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2025_ICCV, author = {Xiong, Yunyang and Zhou, Chong and Xiang, Xiaoyu and Wu, Lemeng and Zhu, Chenchen and Liu, Zechun and Suri, Saksham and Varadarajan, Balakrishnan and Akula, Ramya and Iandola, Forrest and Krishnamoorthi, Raghuraman and Soran, Bilge and Chandra, Vikas}, title = {Efficient Track Anything}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11513-11524} }
VFlowOpt: A Token Pruning Framework for LMMs with Visual Information Flow-Guided Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Sihan and Xu, Runsen and Cui, Chenhang and Wang, Tai and Lin, Dahua and Pang, Jiangmiao}, title = {VFlowOpt: A Token Pruning Framework for LMMs with Visual Information Flow-Guided Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23924-23934} }
CameraCtrl II: Dynamic Scene Exploration via Camera-controlled Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Hao and Yang, Ceyuan and Lin, Shanchuan and Xu, Yinghao and Wei, Meng and Gui, Liangke and Zhao, Qi and Wetzstein, Gordon and Jiang, Lu and Li, Hongsheng}, title = {CameraCtrl II: Dynamic Scene Exploration via Camera-controlled Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13416-13426} }
Text-guided Visual Prompt DINO for Generic Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guan_2025_ICCV, author = {Guan, Yuchen and Sun, Chong and Fu, Canmiao and Huang, Zhipeng and Yuan, Chun and Li, Chen}, title = {Text-guided Visual Prompt DINO for Generic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21288-21298} }
Open-set Cross Modal Generalization via Multimodal Unified Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Hai and Xia, Yan and Wang, Shulei and Wang, Hanting and Fang, Minghui and Ji, Shengpeng and Zhou, Sashuai and Jin, Tao and Zhao, Zhou}, title = {Open-set Cross Modal Generalization via Multimodal Unified Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {541-551} }
PLADIS: Pushing the Limits of Attention in Diffusion Models at Inference Time by Leveraging Sparsity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Kwanyoung and Sim, Byeongsu}, title = {PLADIS: Pushing the Limits of Attention in Diffusion Models at Inference Time by Leveraging Sparsity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16238-16248} }
GT-Loc: Unifying When and Where in Images Through a Joint Embedding Space-
[pdf]
[supp]
[bibtex]@InProceedings{Shatwell_2025_ICCV, author = {Shatwell, David G. and Dave, Ishan Rajendrakumar and Swetha, Sirnam and Shah, Mubarak}, title = {GT-Loc: Unifying When and Where in Images Through a Joint Embedding Space}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1-11} }
Robust Test-Time Adaptation for Single Image Denoising Using Deep Gaussian Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Qing and Liang, Pengwei and Zhou, Xiong and Ma, Jiayi and Jiang, Junjun and Peng, Zhe}, title = {Robust Test-Time Adaptation for Single Image Denoising Using Deep Gaussian Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11230-11240} }
MATE: Motion-Augmented Temporal Consistency for Event-based Point Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Han and Zhai, Wei and Cao, Yang and Li, Bin and Zha, Zheng-jun}, title = {MATE: Motion-Augmented Temporal Consistency for Event-based Point Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8340-8349} }
Diffusion-Based Extreme High-speed Scenes Reconstruction with the Complementary Vision Sensor-
[pdf]
[supp]
[bibtex]@InProceedings{Meng_2025_ICCV, author = {Meng, Yapeng and Lin, Yihan and Wang, Taoyi and Chen, Yuguo and Wang, Lijian and Zhao, Rong}, title = {Diffusion-Based Extreme High-speed Scenes Reconstruction with the Complementary Vision Sensor}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5701-5710} }
RnGCam: High-speed video from rolling & global shutter measurements-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tandi_2025_ICCV, author = {Tandi, Kevin and Dai, Xiang and Talegaonkar, Chinmay and Mishne, Gal and Antipa, Nick}, title = {RnGCam: High-speed video from rolling \& global shutter measurements}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8830-8840} }
OV3D-CG: Open-vocabulary 3D Instance Segmentation with Contextual Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Mingquan and He, Chen and Wang, Ruiping and Chen, Xilin}, title = {OV3D-CG: Open-vocabulary 3D Instance Segmentation with Contextual Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5305-5314} }
FG-OrIU: Towards Better Forgetting via Feature-Gradient Orthogonality for Incremental Unlearning-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2025_ICCV, author = {Feng, Qian and Tu, JiaHang and Kang, Mintong and Zhao, Hanbin and Zhang, Chao and Qian, Hui}, title = {FG-OrIU: Towards Better Forgetting via Feature-Gradient Orthogonality for Incremental Unlearning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {1957-1967} }
InfiniCube: Unbounded and Controllable Dynamic 3D Driving Scene Generation with World-Guided Video Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Yifan and Ren, Xuanchi and Yang, Jiawei and Shen, Tianchang and Wu, Zhangjie and Gao, Jun and Wang, Yue and Chen, Siheng and Chen, Mike and Fidler, Sanja and Huang, Jiahui}, title = {InfiniCube: Unbounded and Controllable Dynamic 3D Driving Scene Generation with World-Guided Video Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27272-27283} }
Scene Graph Guided Generation: Enable Accurate Relations Generation in Text-to-Image Models via Textural Rectification-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_ICCV, author = {Shen, Guibao and Wang, Luozhou and Lin, Jiantao and Ge, Wenhang and Zhang, Chaozhe and Tao, Xin and Zhang, Di and Wan, Pengfei and Chen, Guangyong and Li, Yijun and Chen, Ying-cong}, title = {Scene Graph Guided Generation: Enable Accurate Relations Generation in Text-to-Image Models via Textural Rectification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15437-15446} }
After the Party: Navigating the Mapping From Color to Ambient Lighting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vasluianu_2025_ICCV, author = {Vasluianu, Florin-Alexandru and Seizinger, Tim and Wu, Zongwei and Timofte, Radu}, title = {After the Party: Navigating the Mapping From Color to Ambient Lighting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9218-9229} }
Disentangled Clothed Avatar Generation with Layered Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Weitian and Yan, Yichao and Wu, Sijing and Liao, Manwen and Yang, Xiaokang}, title = {Disentangled Clothed Avatar Generation with Layered Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11327-11338} }
Teleportraits: Training-Free People Insertion into Any Scene-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Jialu and Joseph, K J and De La Torre, Fernando}, title = {Teleportraits: Training-Free People Insertion into Any Scene}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18866-18875} }
DepthSync: Diffusion Guidance-Based Depth Synchronization for Scale- and Geometry-Consistent Video Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Yue-Jiang and Zhao, Wang and Xu, Jiale and Shan, Ying and Zhang, Song-Hai}, title = {DepthSync: Diffusion Guidance-Based Depth Synchronization for Scale- and Geometry-Consistent Video Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5415-5425} }
InfoBridge: Balanced Multimodal Integration through Conditional Dependency Modeling-
[pdf]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Chenxin and Liu, Yifan and Pan, Panwang and Liu, Hengyu and Liu, Xinyu and Li, Wuyang and Wang, Cheng and Yu, Weihao and Lin, Yiyang and Yuan, Yixuan}, title = {InfoBridge: Balanced Multimodal Integration through Conditional Dependency Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {393-404} }
Towards Explicit Exoskeleton for the Reconstruction of Complicated 3D Human Avatars-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhan_2025_ICCV, author = {Zhan, Yifan and Zhu, Qingtian and Niu, Muyao and Ma, Mingze and Zhao, Jiancheng and Zhong, Zhihang and Sun, Xiao and Qiao, Yu and Zheng, Yinqiang}, title = {Towards Explicit Exoskeleton for the Reconstruction of Complicated 3D Human Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14259-14269} }
CABLD: Contrast-Agnostic Brain Landmark Detection with Consistency-Based Regularization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Salari_2025_ICCV, author = {Salari, Soorena and Harirpoush, Arash and Rivaz, Hassan and Xiao, Yiming}, title = {CABLD: Contrast-Agnostic Brain Landmark Detection with Consistency-Based Regularization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20991-21002} }
One Object, Multiple Lies: A Benchmark for Cross-task Adversarial Attack on Unified Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Jiale and Jiang, Xinyang and Gao, Junyao and Xue, Yuhao and Zhao, Cairong}, title = {One Object, Multiple Lies: A Benchmark for Cross-task Adversarial Attack on Unified Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {187-196} }
A Differentiable Wave Optics Model for End-to-End Computational Imaging System Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ho_2025_ICCV, author = {Ho, Chi-Jui and Belhe, Yash and Rotenberg, Steve and Ramamoorthi, Ravi and Li, Tzu-Mao and Antipa, Nicholas}, title = {A Differentiable Wave Optics Model for End-to-End Computational Imaging System Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28042-28051} }
CHROME: Clothed Human Reconstruction with Occlusion-Resilience and Multiview-Consistency from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dutta_2025_ICCV, author = {Dutta, Arindam and Zheng, Meng and Gao, Zhongpai and Planche, Benjamin and Choudhuri, Anwesa and Chen, Terrence and Roy-Chowdhury, Amit K. and Wu, Ziyan}, title = {CHROME: Clothed Human Reconstruction with Occlusion-Resilience and Multiview-Consistency from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9124-9135} }
H3R: Hybrid Multi-view Correspondence for Generalizable 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2025_ICCV, author = {Jia, Heng and Zhu, Linchao and Zhao, Na}, title = {H3R: Hybrid Multi-view Correspondence for Generalizable 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7655-7665} }
Capturing head avatar with hand contacts from a monocular video-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Haonan and Zheng, Yufeng and Song, Jie}, title = {Capturing head avatar with hand contacts from a monocular video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13099-13108} }
DoppDrive: Doppler-Driven Temporal Aggregation for Improved Radar Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Haitman_2025_ICCV, author = {Haitman, Yuval and Bialer, Oded}, title = {DoppDrive: Doppler-Driven Temporal Aggregation for Improved Radar Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26085-26094} }
3D Gaussian Map with Open-Set Semantic Grouping for Vision-Language Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Jianzhe and Liu, Rui and Wang, Wenguan}, title = {3D Gaussian Map with Open-Set Semantic Grouping for Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9252-9262} }
M-SpecGene: Generalized Foundation Model for RGBT Multispectral Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Kailai and Yang, Fuqiang and Wang, Shixian and Wen, Bihan and Zi, Chongde and Chen, Linsen and Shen, Qiu and Cao, Xun}, title = {M-SpecGene: Generalized Foundation Model for RGBT Multispectral Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7861-7872} }
YOLOE: Real-Time Seeing Anything-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Ao and Liu, Lihao and Chen, Hui and Lin, Zijia and Han, Jungong and Ding, Guiguang}, title = {YOLOE: Real-Time Seeing Anything}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24591-24602} }
Joint Learning of Pose Regression and Denoising Diffusion with Score Scaling Sampling for Category-level 6D Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Seunghyun and Kim, Tae-Kyun}, title = {Joint Learning of Pose Regression and Denoising Diffusion with Score Scaling Sampling for Category-level 6D Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {5757-5768} }
Beyond Text-Visual Attention: Exploiting Visual Cues for Effective Token Pruning in VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Qizhe and Cheng, Aosong and Lu, Ming and Zhang, Renrui and Zhuo, Zhiyong and Cao, Jiajun and Guo, Shaobo and She, Qi and Zhang, Shanghang}, title = {Beyond Text-Visual Attention: Exploiting Visual Cues for Effective Token Pruning in VLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20857-20867} }
Event-guided HDR Reconstruction with Diffusion Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Yixin and Zhang, Jiawei and Zhang, Yang and Wei, Yunxuan and Zou, Dongqing and Ren, Jimmy S. and Shi, Boxin}, title = {Event-guided HDR Reconstruction with Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11787-11796} }
Continual Adaptation: Environment-Conditional Parameter Generation for Object Detection in Dynamic Scenarios-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Deng and Wu, Aming and Li, Yang and Wang, Yaowei and Han, Yahong}, title = {Continual Adaptation: Environment-Conditional Parameter Generation for Object Detection in Dynamic Scenarios}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4434-4443} }
CATP-LLM: Empowering Large Language Models for Cost-Aware Tool Planning-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Duo and Wang, Jinghe and Meng, Yuan and Zhang, Yanning and Sun, Le and Wang, Zhi}, title = {CATP-LLM: Empowering Large Language Models for Cost-Aware Tool Planning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8699-8709} }
MoMaps: Semantics-Aware Scene Motion Generation with Motion Maps-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lei_2025_ICCV, author = {Lei, Jiahui and Genova, Kyle and Kopanas, George and Snavely, Noah and Guibas, Leonidas}, title = {MoMaps: Semantics-Aware Scene Motion Generation with Motion Maps}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10022-10031} }
WarpHE4D: Dense 4D Head Map toward Full Head Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Yun_2025_ICCV, author = {Yun, Jongseob and Kwon, Yong-Hoon and Park, Min-Gyu and Kang, Ju-Mi and Lee, Min-Ho and Chang, Inho and Yoon, Ju Hong and Yoon, Kuk-Jin}, title = {WarpHE4D: Dense 4D Head Map toward Full Head Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11480-11490} }
Perceive, Understand and Restore: Real-World Image Super-Resolution with Autoregressive Multimodal Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Hongyang and Liu, Shuaizheng and Yuan, Chun and Zhang, Lei}, title = {Perceive, Understand and Restore: Real-World Image Super-Resolution with Autoregressive Multimodal Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18640-18650} }
TRKT: Weakly Supervised Dynamic Scene Graph Generation with Temporal-enhanced Relation-aware Knowledge Transferring-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Zhu and Lei, Ting and Li, Zhimin and Wang, Guan and Chen, Qingchao and Peng, Yuxin and Liu, Yang}, title = {TRKT: Weakly Supervised Dynamic Scene Graph Generation with Temporal-enhanced Relation-aware Knowledge Transferring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15812-15821} }
Progressive Distribution Bridging: Unsupervised Adaptation for Large-scale Pre-trained Models via Adaptive Auxiliary Data-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Weinan and Zhang, Yixin and Wang, Zilei}, title = {Progressive Distribution Bridging: Unsupervised Adaptation for Large-scale Pre-trained Models via Adaptive Auxiliary Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3280-3292} }
Griffon v2: Advancing Multimodal Perception with High-Resolution Scaling and Visual-Language Co-Referring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2025_ICCV, author = {Zhan, Yufei and Zheng, Shurong and Zhu, Yousong and Zhao, Hongyin and Yang, Fan and Tang, Ming and Wang, Jinqiao}, title = {Griffon v2: Advancing Multimodal Perception with High-Resolution Scaling and Visual-Language Co-Referring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22947-22957} }
TARO: Timestep-Adaptive Representation Alignment with Onset-Aware Conditioning for Synchronized Video-to-Audio Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ton_2025_ICCV, author = {Ton, Tri and Hong, Ji Woo and Yoo, Chang D.}, title = {TARO: Timestep-Adaptive Representation Alignment with Onset-Aware Conditioning for Synchronized Video-to-Audio Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14228-14237} }
VPR-Cloak: A First Look at Privacy Cloak Against Visual Place Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Shuting and Chen, Mingzhi and Lu, Feng and Yu, Hao and Li, Guanghao and Wu, Zhe and Tang, Ming and Yuan, Chun}, title = {VPR-Cloak: A First Look at Privacy Cloak Against Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {7197-7208} }
SDFormer: Vision-based 3D Semantic Scene Completion via SAM-assisted Dual-channel Voxel Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2025_ICCV, author = {Xue, Yujie and Pi, Huilong and Zhang, Jiapeng and Qin, Yunchuan and Tang, Zhuo and Li, Kenli and Li, Ruihui}, title = {SDFormer: Vision-based 3D Semantic Scene Completion via SAM-assisted Dual-channel Voxel Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26837-26847} }
ModSkill: Physical Character Skill Modularization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Yiming and Dou, Zhiyang and Liu, Lingjie}, title = {ModSkill: Physical Character Skill Modularization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12394-12404} }
Improving Multimodal Learning via Imbalanced Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Shicai and Luo, Chunbo and Luo, Yang}, title = {Improving Multimodal Learning via Imbalanced Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2250-2259} }
INTER: Mitigating Hallucination in Large Vision-Language Models by Interaction Guidance Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Xin and Dong, Shichao and Wang, Jin and Huang, Jing and Zhou, Li and Sun, Zenghui and Jing, Lihua and Lan, Jinsong and Zhu, Xiaoyong and Zheng, Bo}, title = {INTER: Mitigating Hallucination in Large Vision-Language Models by Interaction Guidance Sampling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {2534-2544} }
ForestFormer3D: A Unified Framework for End-to-End Segmentation of Forest LiDAR 3D Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2025_ICCV, author = {Xiang, Binbin and Wielgosz, Maciej and Puliti, Stefano and Kr\'al, Kamil and Kr\r{u}\v{c}ek, Martin and Missarov, Azim and Astrup, Rasmus}, title = {ForestFormer3D: A Unified Framework for End-to-End Segmentation of Forest LiDAR 3D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24717-24727} }
Medical World Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Yijun and Wang, Zhao-Yang and Liu, Qiuping and Sun, Shuwen and Wang, Kang and Chellappa, Rama and Zhou, Zongwei and Yuille, Alan and Zhu, Lei and Zhang, Yu-Dong and Chen, Jieneng}, title = {Medical World Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8319-8329} }
AccidentalGS: 3D Gaussian Splatting from Accidental Camera Motion-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2025_ICCV, author = {Mao, Mao and Shen, Xujie and Chen, Guyuan and Zhao, Boming and Hu, Jiarui and Bao, Hujun and Cui, Zhaopeng}, title = {AccidentalGS: 3D Gaussian Splatting from Accidental Camera Motion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27445-27455} }
RoMo: Robust Motion Segmentation Improves Structure from Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Goli_2025_ICCV, author = {Goli, Lily and Sabour, Sara and Matthews, Mark and Brubaker, Marcus A. and Lagun, Dmitry and Jacobson, Alec and Fleet, David J. and Saxena, Saurabh and Tagliasacchi, Andrea}, title = {RoMo: Robust Motion Segmentation Improves Structure from Motion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6155-6164} }
CAFA: a Controllable Automatic Foley Artist-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Benita_2025_ICCV, author = {Benita, Roi and Finkelson, Michael and Halperin, Tavi and Sterkin, Gleb and Adi, Yossi}, title = {CAFA: a Controllable Automatic Foley Artist}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15917-15926} }
Neuroverse3D: Developing In-Context Learning Universal Model for Neuroimaging in 3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Jiesi and Peng, Hanyang and Yang, Yanwu and Guo, Xutao and Shang, Yang and Shi, Pengcheng and Ye, Chenfei and Ma, Ting}, title = {Neuroverse3D: Developing In-Context Learning Universal Model for Neuroimaging in 3D}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21721-21731} }
ProSAM: Enhancing the Robustness of SAM-based Visual Reference Segmentation with Probabilistic Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Xiaoqi and Sebastian, Clint and He, Wenbin and Ren, Liu}, title = {ProSAM: Enhancing the Robustness of SAM-based Visual Reference Segmentation with Probabilistic Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20487-20496} }
TF-TI2I: Training-Free Text-and-Image-to-Image Generation via Multi-Modal Implicit-Context Learning In Text-to-Image Models-
[pdf]
[supp]
[bibtex]@InProceedings{Hsiao_2025_ICCV, author = {Hsiao, Teng-Fang and Ruan, Bo-Kai and Wu, Yi-Lun and Lin, Tzu-Ling and Shuai, Hong-Han}, title = {TF-TI2I: Training-Free Text-and-Image-to-Image Generation via Multi-Modal Implicit-Context Learning In Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18377-18387} }
Allowing Oscillation Quantization: Overcoming Solution Space Limitation in Low Bit-Width Quantization-
[pdf]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Weiying and Meng, Zihan and Ma, Jitao and Guo, Wenjin and Li, Haowei and Qin, Haonan and Fang, Leyuan and Li, Yunsong}, title = {Allowing Oscillation Quantization: Overcoming Solution Space Limitation in Low Bit-Width Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24615-24624} }
CARP: Visuomotor Policy Learning via Coarse-to-Fine Autoregressive Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2025_ICCV, author = {Gong, Zhefei and Ding, Pengxiang and Lyu, Shangke and Huang, Siteng and Sun, Mingyang and Zhao, Wei and Fan, Zhaoxin and Wang, Donglin}, title = {CARP: Visuomotor Policy Learning via Coarse-to-Fine Autoregressive Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13460-13470} }
SAMora: Enhancing SAM through Hierarchical Self-Supervised Pre-Training for Medical Images-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Shuhang and Yuan, Hangjie and Liu, Pengwei and Gu, Hanxue and Feng, Tao and Ni, Dong}, title = {SAMora: Enhancing SAM through Hierarchical Self-Supervised Pre-Training for Medical Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21209-21219} }
MagicMotion: Controllable Video Generation with Dense-to-Sparse Trajectory Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Quanhao and Xing, Zhen and Wang, Rui and Zhang, Hui and Dai, Qi and Wu, Zuxuan}, title = {MagicMotion: Controllable Video Generation with Dense-to-Sparse Trajectory Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12112-12123} }
Less Static, More Private: Towards Transferable Privacy-Preserving Action Recognition by Generative Decoupled Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2025_ICCV, author = {Xia, Zhi-Wei and Lin, Kun-Yu and Li, Yuan-Ming and Huang, Wei-Jin and Tan, Xian-Tuo and Zheng, Wei-Shi}, title = {Less Static, More Private: Towards Transferable Privacy-Preserving Action Recognition by Generative Decoupled Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12894-12903} }
PathDiff: Histopathology Image Synthesis with Unpaired Text and Mask Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bhosale_2025_ICCV, author = {Bhosale, Mahesh and Wasi, Abdul and Zhai, Yuanhao and Tian, Yunjie and Border, Samuel and Xi, Nan and Sarder, Pinaki and Yuan, Junsong and Doermann, David and Gong, Xuan}, title = {PathDiff: Histopathology Image Synthesis with Unpaired Text and Mask Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22415-22424} }
Robust Low-light Scene Restoration via Illumination Transition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Ze and Zhang, Feng and Zhu, Xiatian and Zhang, Meng and Zhou, Yanghong and Mok, P. Y.}, title = {Robust Low-light Scene Restoration via Illumination Transition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6188-6197} }
GVDepth: Zero-Shot Monocular Depth Estimation for Ground Vehicles based on Probabilistic Cue Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Koledic_2025_ICCV, author = {Koledi\'c, Karlo and Petrovi\'c, Luka and Markovi\'c, Ivan and Petrovi\'c, Ivan}, title = {GVDepth: Zero-Shot Monocular Depth Estimation for Ground Vehicles based on Probabilistic Cue Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26126-26135} }
VISION-XL: High Definition Video Inverse Problem Solver using Latent Image Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Kwon_2025_ICCV, author = {Kwon, Taesung and Ye, Jong Chul}, title = {VISION-XL: High Definition Video Inverse Problem Solver using Latent Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10465-10474} }
Robin3D: Improving 3D Large Language Model via Robust Instruction Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_ICCV, author = {Kang, Weitai and Huang, Haifeng and Shang, Yuzhang and Shah, Mubarak and Yan, Yan}, title = {Robin3D: Improving 3D Large Language Model via Robust Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {3905-3915} }
Mixture of Experts Guided by Gaussian Splatters Matters: A new Approach to Weakly-Supervised Video Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Amicantonio_2025_ICCV, author = {Amicantonio, Giacomo D' and Majhi, Snehashis and Kong, Quan and Garattoni, Lorenzo and Francesca, Gianpiero and Bremond, Francois and Bondarev, Egor}, title = {Mixture of Experts Guided by Gaussian Splatters Matters: A new Approach to Weakly-Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10275-10285} }
ProJudge: A Multi-Modal Multi-Discipline Benchmark and Instruction-Tuning Dataset for MLLM-based Process Judges-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ai_2025_ICCV, author = {Ai, Jiaxin and Zhou, Pengfei and Xu, Zhaopan and Li, Ming and Zhang, Fanrui and Li, Zizhen and Sun, Jianwen and Feng, Yukang and Huang, Baojin and Wang, Zhongyuan and Zhang, Kaipeng}, title = {ProJudge: A Multi-Modal Multi-Discipline Benchmark and Instruction-Tuning Dataset for MLLM-based Process Judges}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4681-4690} }
GloPER: Unsupervised Animal Pattern Extraction from Local Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Bowen and Koh, Yun Sing and Dobbie, Gillian}, title = {GloPER: Unsupervised Animal Pattern Extraction from Local Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6519-6529} }
One Trajectory, One Token: Grounded Video Tokenization via Panoptic Sub-object Trajectory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Chenhao and Zhang, Jieyu and Salehi, Mohammadreza and Gao, Ziqi and Iyengar, Vishnu and Kobori, Norimasa and Kong, Quan and Krishna, Ranjay}, title = {One Trajectory, One Token: Grounded Video Tokenization via Panoptic Sub-object Trajectory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23156-23166} }
Kaleidoscopic Background Attack: Disrupting Pose Estimation with Multi-Fold Radial Symmetry Textures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2025_ICCV, author = {Ding, Xinlong and Yu, Hongwei and Li, Jiawei and Li, Feifan and Shang, Yu and Zou, Bochao and Ma, Huimin and Chen, Jiansheng}, title = {Kaleidoscopic Background Attack: Disrupting Pose Estimation with Multi-Fold Radial Symmetry Textures}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28483-28492} }
COSTARR: Consolidated Open Set Technique with Attenuation for Robust Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rabinowitz_2025_ICCV, author = {Rabinowitz, Ryan and Cruz, Steve and Scheirer, Walter and Boult, Terrance E.}, title = {COSTARR: Consolidated Open Set Technique with Attenuation for Robust Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4146-4155} }
Agreement aware and dissimilarity oriented GLOM-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Ru and Song, Yan and Zhang, Yang and Hu, Yanling and Yu, Hui}, title = {Agreement aware and dissimilarity oriented GLOM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24351-24359} }
Att-Adapter: A Robust and Precise Domain-Specific Multi-Attributes T2I Diffusion Adapter via Conditional Variational Autoencoder-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2025_ICCV, author = {Cho, Wonwoong and Chen, Yan-Ying and Klenk, Matthew and Inouye, David I. and Zhang, Yanxia}, title = {Att-Adapter: A Robust and Precise Domain-Specific Multi-Attributes T2I Diffusion Adapter via Conditional Variational Autoencoder}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15626-15635} }
PERSONA: Personalized Whole-Body 3D Avatar with Pose-Driven Deformations from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sim_2025_ICCV, author = {Sim, Geonhee and Moon, Gyeongsik}, title = {PERSONA: Personalized Whole-Body 3D Avatar with Pose-Driven Deformations from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12670-12680} }
PanoSplatt3R: Leveraging Perspective Pretraining for Generalized Unposed Wide-Baseline Panorama Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_ICCV, author = {Ren, Jiahui and Xiang, Mochu and Zhu, Jiajun and Dai, Yuchao}, title = {PanoSplatt3R: Leveraging Perspective Pretraining for Generalized Unposed Wide-Baseline Panorama Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28959-28969} }
CCMNet: Leveraging Calibrated Color Correction Matrices for Cross-Camera Color Constancy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Dongyoung and Afifi, Mahmoud and Kim, Dongyun and Brown, Michael S. and Kim, Seon Joo}, title = {CCMNet: Leveraging Calibrated Color Correction Matrices for Cross-Camera Color Constancy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6198-6208} }
Seeing 3D Through 2D Lenses: 3D Few-Shot Class-Incremental Learning via Cross-Modal Geometric Rectification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2025_ICCV, author = {Xiang, Tuo and Xu, Xuemiao and Liu, Bangzhen and Li, Jinyi and Li, Yong and He, Shengfeng}, title = {Seeing 3D Through 2D Lenses: 3D Few-Shot Class-Incremental Learning via Cross-Modal Geometric Rectification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {6761-6771} }
Secure On-Device Video OOD Detection Without Backpropagation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Shawn and Cai, Peilin and Zhou, Yuxiao and Ni, Zhiyu and Liang, Renjie and Qin, You and Nian, Yi and Tu, Zhengzhong and Hu, Xiyang and Zhao, Yue}, title = {Secure On-Device Video OOD Detection Without Backpropagation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {112-121} }
SmolDocling: An ultra-compact vision-language model for end-to-end multi-modal document conversion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nassar_2025_ICCV, author = {Nassar, Ahmed and Omenetti, Matteo and Lysak, Maksym and Livathinos, Nikolaos and Auer, Christoph and Morin, Lucas and de Lima, Rafael Teixeira and Kim, Yusik and Gurbuz, A. Said and Dolfi, Michele and Staar, Peter W. J.}, title = {SmolDocling: An ultra-compact vision-language model for end-to-end multi-modal document conversion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21972-21983} }
Zero-Shot Composed Image Retrieval via Dual-Stream Instruction-Aware Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2025_ICCV, author = {Zhong, Wenliang and Barton, Rob and An, Weizhi and Jiang, Feng and Ma, Hehuan and Guo, Yuzhi and Dan, Abhishek and Sam, Shioulin and Bouyarmane, Karim and Huang, Junzhou}, title = {Zero-Shot Composed Image Retrieval via Dual-Stream Instruction-Aware Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22221-22231} }
Underwater Visual SLAM with Depth Uncertainty and Medium Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Rui and Fan, Sheng and Wang, Wenguan and Yang, Yi}, title = {Underwater Visual SLAM with Depth Uncertainty and Medium Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {970-980} }
Blind Video Super-Resolution based on Implicit Kernels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Qiang and Jiang, Yuxuan and Zhu, Shuyuan and Zhang, Fan and Bull, David and Zeng, Bing}, title = {Blind Video Super-Resolution based on Implicit Kernels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10971-10981} }
AMDANet: Attention-Driven Multi-Perspective Discrepancy Alignment for RGB-Infrared Image Fusion and Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2025_ICCV, author = {Zhong, Haifeng and Tang, Fan and Chen, Zhuo and Chang, Hyung Jin and Gao, Yixing}, title = {AMDANet: Attention-Driven Multi-Perspective Discrepancy Alignment for RGB-Infrared Image Fusion and Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10645-10655} }
Local Scale Equivariance with Latent Deep Equilibrium Canonicalizer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rahman_2025_ICCV, author = {Rahman, Md Ashiqur and Yang, Chiao-An and Cheng, Michael N. and Hao, Lim Jun and Jiang, Jeremiah and Lim, Teck-Yian and Yeh, Raymond A.}, title = {Local Scale Equivariance with Latent Deep Equilibrium Canonicalizer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10527-10537} }
Auxiliary Prompt Tuning of Vision-Language Models for Few-Shot Out-of-Distribution Detection-
[pdf]
[bibtex]@InProceedings{Miao_2025_ICCV, author = {Miao, Wenjun and Pang, Guansong and Wang, Zihan and Zheng, Jin and Bai, Xiao}, title = {Auxiliary Prompt Tuning of Vision-Language Models for Few-Shot Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {4776-4785} }
TACO: Taming Diffusion for in-the-wild Video Amodal Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Ruijie and Chen, Yixin and Liu, Yu and Tang, Jiaxiang and Ni, Junfeng and Wan, Diwen and Zeng, Gang and Huang, Siyuan}, title = {TACO: Taming Diffusion for in-the-wild Video Amodal Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13638-13650} }
UniGS: Modeling Unitary 3D Gaussians for Novel View Synthesis from Sparse-view Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Jiamin and Liu, Kenkun and Jiang, Xiaoke and Yao, Yuan and Zhang, Lei}, title = {UniGS: Modeling Unitary 3D Gaussians for Novel View Synthesis from Sparse-view Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26241-26251} }
ObjectGS: Object-aware Scene Reconstruction and Scene Understanding via Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Ruijie and Yu, Mulin and Xu, Linning and Jiang, Lihan and Li, Yixuan and Zhang, Tianzhu and Pang, Jiangmiao and Dai, Bo}, title = {ObjectGS: Object-aware Scene Reconstruction and Scene Understanding via Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {8350-8360} }
SP2T: Sparse Proxy Attention for Dual-stream Point Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Wan_2025_ICCV, author = {Wan, Jiaxu and Zhang, Hong and He, Ziqi and Deng, Yangyan and Wang, Qishu and Yuan, Ding and Yang, Yifan}, title = {SP2T: Sparse Proxy Attention for Dual-stream Point Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27885-27895} }
Scoring, Remember, and Reference: Catching Camouflaged Objects in Videos-
[pdf]
[arXiv]
[bibtex]@InProceedings{Feng_2025_ICCV, author = {Feng, Yu'ang and Gao, Shuyong and Yan, Fuzhen and Song, Yicheng and Hong, Lingyi and Hu, Junjie and Zhang, Wenqiang}, title = {Scoring, Remember, and Reference: Catching Camouflaged Objects in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13043-13052} }
Task-Oriented Human Grasp Synthesis via Context- and Task-Aware Diffusers-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, An-Lun and Chao, Yu-Wei and Chen, Yi-Ting}, title = {Task-Oriented Human Grasp Synthesis via Context- and Task-Aware Diffusers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10375-10385} }
Back