ICCV 2025 Open Access Repository

Findings of ICCV

AdCorDA: Classifier Refinement via Adversarial Correction and Domain Adaptation: Lulan Shen,

Ali Edalati,

Xiangyu Li,

Brett H. Meyer,

Warren Gross,

James J. Clark; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_ICCV, author = {Shen, Lulan and Edalati, Ali and Li, Xiangyu and Meyer, Brett H. and Gross, Warren and Clark, James J.}, title = {AdCorDA: Classifier Refinement via Adversarial Correction and Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6314-6323} }
NAS just once: Neural Architecture Search for joint Image-Video Recognition: Sofia Casarin,

Sergio Escalera,

Oswald Lanz; [pdf] [supp]
[bibtex]
@InProceedings{Casarin_2025_ICCV, author = {Casarin, Sofia and Escalera, Sergio and Lanz, Oswald}, title = {NAS just once: Neural Architecture Search for joint Image-Video Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6431-6441} }
Zero-Shot Subject-Centric Generation for Creative Application Using Entropy Fusion: Kaifeng Zou,

Xiaoyi Feng,

Tao Huang,

Zizhou Huang,

Haihang Zhang,

Yuntao Zou,

Dagang Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Zou_2025_ICCV, author = {Zou, Kaifeng and Feng, Xiaoyi and Huang, Tao and Huang, Zizhou and Zhang, Haihang and Zou, Yuntao and Li, Dagang}, title = {Zero-Shot Subject-Centric Generation for Creative Application Using Entropy Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6195-6204} }
DEARLi: Decoupled Enhancement of Recognition and Localization for Semi-supervised Panoptic Segmentation: Ivan Martinović,

Josip Šarić,

Marin Oršić,

Matej Kristan,

Siniša Šegvić; [pdf] [supp]
[bibtex]
@InProceedings{Martinovic_2025_ICCV, author = {Martinovi\'c, Ivan and \v{S}ari\'c, Josip and Or\v{s}i\'c, Marin and Kristan, Matej and \v{S}egvi\'c, Sini\v{s}a}, title = {DEARLi: Decoupled Enhancement of Recognition and Localization for Semi-supervised Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6268-6279} }
Scaling Open-Vocabulary Action Detection: Zhen Hao Sia,

Yogesh Singh Rawat; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sia_2025_ICCV, author = {Sia, Zhen Hao and Rawat, Yogesh Singh}, title = {Scaling Open-Vocabulary Action Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6410-6420} }
ORXE: Orchestrating Experts for Dynamically Configurable Efficiency: Qingyuan Wang,

Guoxin Wang,

Barry Cardiff,

Deepu John; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Qingyuan and Wang, Guoxin and Cardiff, Barry and John, Deepu}, title = {ORXE: Orchestrating Experts for Dynamically Configurable Efficiency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6324-6334} }
GeoDiff: Geometry-Guided Diffusion for Metric Depth Estimation: Tuan Pham,

Thanh Tung Le,

Xiaohui Xie,

Stephan Mandt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pham_2025_ICCV, author = {Pham, Tuan and Le, Thanh Tung and Xie, Xiaohui and Mandt, Stephan}, title = {GeoDiff: Geometry-Guided Diffusion for Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6357-6367} }
BLIP-3: A Family of Open Large Multimodal Models: Le Xue,

Manli Shu,

Anas Awadalla,

Jun Wang,

An Yan,

Senthil Purushwalkam,

Honglu Zhou,

Viraj Prabhu,

Yutong Dai,

Michael S Ryoo,

Shrikant Kendre,

Jieyu Zhang,

Shaoyen Tseng,

Gustavo Adolfo Lujan-Moreno,

Matthew Lyle Olson,

Musashi Hinck,

David Cobbley,

Vasudev Lal,

Can Qin,

Shu Zhang,

Chia-Chih Chen,

Ning Yu,

Juntao Tan,

Tulika Manoj Awalgaonkar,

Shelby Heinecke,

Huan Wang,

Yejin Choi,

Ludwig Schmidt,

Zeyuan Chen,

Silvio Savarese,

Juan Carlos Niebles,

Caiming Xiong,

Ran Xu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xue_2025_ICCV, author = {Xue, Le and Shu, Manli and Awadalla, Anas and Wang, Jun and Yan, An and Purushwalkam, Senthil and Zhou, Honglu and Prabhu, Viraj and Dai, Yutong and Ryoo, Michael S and Kendre, Shrikant and Zhang, Jieyu and Tseng, Shaoyen and Lujan-Moreno, Gustavo Adolfo and Olson, Matthew Lyle and Hinck, Musashi and Cobbley, David and Lal, Vasudev and Qin, Can and Zhang, Shu and Chen, Chia-Chih and Yu, Ning and Tan, Juntao and Awalgaonkar, Tulika Manoj and Heinecke, Shelby and Wang, Huan and Choi, Yejin and Schmidt, Ludwig and Chen, Zeyuan and Savarese, Silvio and Niebles, Juan Carlos and Xiong, Caiming and Xu, Ran}, title = {BLIP-3: A Family of Open Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6183-6194} }
Data Leakage in Visual Datasets: Patrick Ramos,

Ryan Ramos,

Noa Garcia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ramos_2025_ICCV, author = {Ramos, Patrick and Ramos, Ryan and Garcia, Noa}, title = {Data Leakage in Visual Datasets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6368-6378} }
Zero-shot Customized Video Editing with Diffusion Feature Transfer: Wei Chen,

Huidong Liu,

Yang Liu,

Chien-Chih Wang,

Moyan Li,

Hongdong Li,

Bryan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Wei and Liu, Huidong and Liu, Yang and Wang, Chien-Chih and Li, Moyan and Li, Hongdong and Wang, Bryan}, title = {Zero-shot Customized Video Editing with Diffusion Feature Transfer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6173-6182} }
LOCAL: Latent Orthonormal Contrastive Learning for Paired Image Classification: Fei Dou,

Jin Lu,

Tan Zhu,

Jinbo Bi; [pdf] [supp]
[bibtex]
@InProceedings{Dou_2025_ICCV, author = {Dou, Fei and Lu, Jin and Zhu, Tan and Bi, Jinbo}, title = {LOCAL: Latent Orthonormal Contrastive Learning for Paired Image Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6467-6476} }
Tuning-Free Multi-Event Long Video Generation via Synchronized Coupled Sampling: Subin Kim,

Seoung Wug Oh,

Jui-Hsien Wang,

Joon-Young Lee,

Jinwoo Shin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Subin and Oh, Seoung Wug and Wang, Jui-Hsien and Lee, Joon-Young and Shin, Jinwoo}, title = {Tuning-Free Multi-Event Long Video Generation via Synchronized Coupled Sampling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6477-6488} }
SynBalance: Harnessing Synthetic Data in Long-tailed Recognition: Zhongyu Jiang,

Jiarui Cai,

Chang Liu,

Dongsheng An,

Jonathan Wu; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Zhongyu and Cai, Jiarui and Liu, Chang and An, Dongsheng and Wu, Jonathan}, title = {SynBalance: Harnessing Synthetic Data in Long-tailed Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6335-6344} }
From Flat to Round: Redefining Brain Decoding with Surface-Based fMRI and Cortex Structure: Sijin Yu,

Zijiao Chen,

Wenxuan Wu,

Shengxian Chen,

Zhongliang Liu,

Jingxin Nie,

Xiaofen Xing,

Xiangmin Xu,

Xin Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Sijin and Chen, Zijiao and Wu, Wenxuan and Chen, Shengxian and Liu, Zhongliang and Nie, Jingxin and Xing, Xiaofen and Xu, Xiangmin and Zhang, Xin}, title = {From Flat to Round: Redefining Brain Decoding with Surface-Based fMRI and Cortex Structure}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6248-6257} }
DNF-Avatar: Distilling Neural Fields for Real-time Animatable Avatar Relighting: Zeren Jiang,

Shaofei Wang,

Siyu Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Zeren and Wang, Shaofei and Tang, Siyu}, title = {DNF-Avatar: Distilling Neural Fields for Real-time Animatable Avatar Relighting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6442-6453} }
Multi-Objective Optimization for Deep Neural Network Calibration: Dexter Neo,

Tsuhan Chen; [pdf] [supp]
[bibtex]
@InProceedings{Neo_2025_ICCV, author = {Neo, Dexter and Chen, Tsuhan}, title = {Multi-Objective Optimization for Deep Neural Network Calibration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6280-6291} }
CAM-Seg: A Continuous-valued Embedding Approach for Semantic Image Generation: Masud Ahmed,

Zahid Hasan,

Syed Arefinul Haque,

Abu-Zaher Faridee,

Sanjay Purushotham,

Suya You,

Nirmalya Roy; [pdf] [arXiv]
[bibtex]
@InProceedings{Ahmed_2025_ICCV, author = {Ahmed, Masud and Hasan, Zahid and Haque, Syed Arefinul and Faridee, Abu-Zaher and Purushotham, Sanjay and You, Suya and Roy, Nirmalya}, title = {CAM-Seg: A Continuous-valued Embedding Approach for Semantic Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6421-6430} }
SCRAMBLe : Enhancing Multimodal LLM Compositionality with Synthetic Preference Data: Samarth Mishra,

Kate Saenko,

Venkatesh Saligrama; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mishra_2025_ICCV, author = {Mishra, Samarth and Saenko, Kate and Saligrama, Venkatesh}, title = {SCRAMBLe : Enhancing Multimodal LLM Compositionality with Synthetic Preference Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6292-6302} }
BadPatch: Diffusion-Based Generation of Physical Adversarial Patches: Zhixiang Wang,

Xingjun Ma,

Yu-Gang Jiang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zhixiang and Ma, Xingjun and Jiang, Yu-Gang}, title = {BadPatch: Diffusion-Based Generation of Physical Adversarial Patches}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6303-6313} }
Probing the Representational Power of Sparse Autoencoders in Vision Models: Matthew Lyle Olson,

Musashi Hinck,

Neale Ratzlaff,

Changbai Li,

Phillip Howard,

Vasudev Lal,

Shao-Yen Tseng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Olson_2025_ICCV, author = {Olson, Matthew Lyle and Hinck, Musashi and Ratzlaff, Neale and Li, Changbai and Howard, Phillip and Lal, Vasudev and Tseng, Shao-Yen}, title = {Probing the Representational Power of Sparse Autoencoders in Vision Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6226-6236} }
Multimodal Representation Alignment for Image Generation: Text-Image Interleaved Control Is Easier Than You Think: Liang Chen,

Shuai Bai,

Wenhao Chai,

Weichu Xie,

Haozhe Zhao,

Leon Vinci,

Junyang Lin,

Baobao Chang; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Liang and Bai, Shuai and Chai, Wenhao and Xie, Weichu and Zhao, Haozhe and Vinci, Leon and Lin, Junyang and Chang, Baobao}, title = {Multimodal Representation Alignment for Image Generation: Text-Image Interleaved Control Is Easier Than You Think}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6205-6215} }
Video-MMLU: A Massive Multi-Discipline Lecture Understanding Benchmark: Enxin Song,

Wenhao Chai,

Weili Xu,

Jianwen Xie,

Yuxuan Liu,

Gaoang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_ICCV, author = {Song, Enxin and Chai, Wenhao and Xu, Weili and Xie, Jianwen and Liu, Yuxuan and Wang, Gaoang}, title = {Video-MMLU: A Massive Multi-Discipline Lecture Understanding Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6158-6172} }
Debias your Large Multi-Modal Model at Test-Time with Non-Contrastive Visual Attribute Steering: Neale Ratzlaff,

Matthew Lyle Olson,

Musashi Hinck,

Shao-Yen Tseng,

Vasudev Lal,

Phillip Howard; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ratzlaff_2025_ICCV, author = {Ratzlaff, Neale and Olson, Matthew Lyle and Hinck, Musashi and Tseng, Shao-Yen and Lal, Vasudev and Howard, Phillip}, title = {Debias your Large Multi-Modal Model at Test-Time with Non-Contrastive Visual Attribute Steering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6258-6267} }
CAFE: Unifying Representation and Generation with Contrastive-Autoregressive Finetuning: Hao Yu,

Zhuokai Zhao,

Shen Yan,

Lukasz Korycki,

Jianyu Wang,

Baosheng He,

Jiayi Liu,

Lizhu Zhang,

Xiangjun Fan,

Hanchao Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Hao and Zhao, Zhuokai and Yan, Shen and Korycki, Lukasz and Wang, Jianyu and He, Baosheng and Liu, Jiayi and Zhang, Lizhu and Fan, Xiangjun and Yu, Hanchao}, title = {CAFE: Unifying Representation and Generation with Contrastive-Autoregressive Finetuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6345-6356} }
IDMR: Towards Instance-Driven Precise Visual Correspondence in Multimodal Retrieval: Bangwei Liu,

Yicheng Bao,

Shaohui Lin,

Xuhong Wang,

Xin Tan,

Yingchun Wang,

Yuan Xie,

Chaochao Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Bangwei and Bao, Yicheng and Lin, Shaohui and Wang, Xuhong and Tan, Xin and Wang, Yingchun and Xie, Yuan and Lu, Chaochao}, title = {IDMR: Towards Instance-Driven Precise Visual Correspondence in Multimodal Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6379-6388} }
Similarity-Aware Selective State-Space Modeling for Semantic Correspondence: Seungwook Kim,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Seungwook and Cho, Minsu}, title = {Similarity-Aware Selective State-Space Modeling for Semantic Correspondence}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6147-6157} }
MIDAS: Modeling Ground-Truth Distributions with Dark Knowledge for Domain Generalized Stereo Matching: Peng Xu,

Zhiyu Xiang,

Jingyun Fu,

Tianyu Pu,

Hanzhi Zhong,

Eryun Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Peng and Xiang, Zhiyu and Fu, Jingyun and Pu, Tianyu and Zhong, Hanzhi and Liu, Eryun}, title = {MIDAS: Modeling Ground-Truth Distributions with Dark Knowledge for Domain Generalized Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6389-6399} }
CST Anti-UAV: A Thermal Infrared Benchmark for Tiny UAV Tracking in Complex Scenes: Bin Xie,

Congxuan Zhang,

Fagan Wang,

Peng Liu,

Feng Lu,

Zhen Chen,

Weiming Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Bin and Zhang, Congxuan and Wang, Fagan and Liu, Peng and Lu, Feng and Chen, Zhen and Hu, Weiming}, title = {CST Anti-UAV: A Thermal Infrared Benchmark for Tiny UAV Tracking in Complex Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6216-6225} }
OpenInsGaussian: Open-vocabulary Instance Gaussian Segmentation with Context-aware Cross-view Fusion: Tianyu Huang,

Runnan Chen,

Dongting Hu,

Fengming Huang,

Mingming Gong,

Tongliang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Tianyu and Chen, Runnan and Hu, Dongting and Huang, Fengming and Gong, Mingming and Liu, Tongliang}, title = {OpenInsGaussian: Open-vocabulary Instance Gaussian Segmentation with Context-aware Cross-view Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6400-6409} }
SPIE: Semantic and Structural Post-Training of Image Editing Diffusion Models with AI feedback: Elior Benarous,

Yilun Du,

Heng Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Benarous_2025_ICCV, author = {Benarous, Elior and Du, Yilun and Yang, Heng}, title = {SPIE: Semantic and Structural Post-Training of Image Editing Diffusion Models with AI feedback}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6454-6466} }
M3DocVQA: Multi-modal Multi-page Multi-document Understanding: Jaemin Cho,

Debanjan Mahata,

Ozan Irsoy,

Yujie He,

Mohit Bansal; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2025_ICCV, author = {Cho, Jaemin and Mahata, Debanjan and Irsoy, Ozan and He, Yujie and Bansal, Mohit}, title = {M3DocVQA: Multi-modal Multi-page Multi-document Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6237-6247} }