Findings of ICCV
Data Leakage in Visual Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ramos_2025_ICCV, author = {Ramos, Patrick and Ramos, Ryan and Garcia, Noa}, title = {Data Leakage in Visual Datasets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6309-6319} }
From Flat to Round: Redefining Brain Decoding with Surface-Based fMRI and Cortex Structure-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Sijin and Chen, Zijiao and Wu, Wenxuan and Chen, Shengxian and Liu, Zhongliang and Nie, Jingxin and Xing, Xiaofen and Xu, Xiangmin and Zhang, Xin}, title = {From Flat to Round: Redefining Brain Decoding with Surface-Based fMRI and Cortex Structure}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6189-6198} }
ORXE: Orchestrating Experts for Dynamically Configurable Efficiency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Qingyuan and Wang, Guoxin and Cardiff, Barry and John, Deepu}, title = {ORXE: Orchestrating Experts for Dynamically Configurable Efficiency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6265-6275} }
LOCAL: Latent Orthonormal Contrastive Learning for Paired Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Dou_2025_ICCV, author = {Dou, Fei and Lu, Jin and Zhu, Tan and Bi, Jinbo}, title = {LOCAL: Latent Orthonormal Contrastive Learning for Paired Image Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6408-6417} }
IDMR: Towards Instance-Driven Precise Visual Correspondence in Multimodal Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Bangwei and Bao, Yicheng and Lin, Shaohui and Wang, Xuhong and Tan, Xin and Wang, Yingchun and Xie, Yuan and Lu, Chaochao}, title = {IDMR: Towards Instance-Driven Precise Visual Correspondence in Multimodal Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6320-6329} }
Scaling Open-Vocabulary Action Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sia_2025_ICCV, author = {Sia, Zhen Hao and Rawat, Yogesh Singh}, title = {Scaling Open-Vocabulary Action Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6351-6361} }
M3DocVQA: Multi-modal Multi-page Multi-document Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2025_ICCV, author = {Cho, Jaemin and Mahata, Debanjan and Irsoy, Ozan and He, Yujie and Bansal, Mohit}, title = {M3DocVQA: Multi-modal Multi-page Multi-document Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6178-6188} }
AdCorDA: Classifier Refinement via Adversarial Correction and Domain Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_ICCV, author = {Shen, Lulan and Edalati, Ali and Li, Xiangyu and Meyer, Brett H. and Gross, Warren and Clark, James J.}, title = {AdCorDA: Classifier Refinement via Adversarial Correction and Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6255-6264} }
GeoDiff: Geometry-Guided Diffusion for Metric Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Pham_2025_ICCV, author = {Pham, Tuan and Le, Thanh Tung and Xie, Xiaohui and Mandt, Stephan}, title = {GeoDiff: Geometry-Guided Diffusion for Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6298-6308} }
Video-MMLU: A Massive Multi-Discipline Lecture Understanding Benchmark-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2025_ICCV, author = {Song, Enxin and Chai, Wenhao and Xu, Weili and Xie, Jianwen and Liu, Yuxuan and Wang, Gaoang}, title = {Video-MMLU: A Massive Multi-Discipline Lecture Understanding Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6099-6113} }
SPIE: Semantic and Structural Post-Training of Image Editing Diffusion Models with AI feedback-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Benarous_2025_ICCV, author = {Benarous, Elior and Du, Yilun and Yang, Heng}, title = {SPIE: Semantic and Structural Post-Training of Image Editing Diffusion Models with AI feedback}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6395-6407} }
BadPatch: Diffusion-Based Generation of Physical Adversarial Patches-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zhixiang and Ma, Xingjun and Jiang, Yu-Gang}, title = {BadPatch: Diffusion-Based Generation of Physical Adversarial Patches}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6244-6254} }
CAFE: Unifying Representation and Generation with Contrastive-Autoregressive Finetuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Hao and Zhao, Zhuokai and Yan, Shen and Korycki, Lukasz and Wang, Jianyu and He, Baosheng and Liu, Jiayi and Zhang, Lizhu and Fan, Xiangjun and Yu, Hanchao}, title = {CAFE: Unifying Representation and Generation with Contrastive-Autoregressive Finetuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6286-6297} }
NAS just once: Neural Architecture Search for joint Image-Video Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Casarin_2025_ICCV, author = {Casarin, Sofia and Escalera, Sergio and Lanz, Oswald}, title = {NAS just once: Neural Architecture Search for joint Image-Video Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6372-6382} }
Multi-Objective Optimization for Deep Neural Network Calibration-
[pdf]
[supp]
[bibtex]@InProceedings{Neo_2025_ICCV, author = {Neo, Dexter and Chen, Tsuhan}, title = {Multi-Objective Optimization for Deep Neural Network Calibration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6221-6232} }
CAM-Seg: A Continuous-valued Embedding Approach for Semantic Image Generation-
[pdf]
[bibtex]@InProceedings{Ahmed_2025_ICCV, author = {Ahmed, Masud and Hasan, Zahid and Haque, Syed Arefinul and Faridee, Abu-Zaher and Purushotham, Sanjay and You, Suya and Roy, Nirmalya}, title = {CAM-Seg: A Continuous-valued Embedding Approach for Semantic Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6362-6371} }
Zero-shot Customized Video Editing with Diffusion Feature Transfer-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Wei and Liu, Huidong and Liu, Yang and Wang, Chien-Chih and Li, Moyan and Li, Hongdong and Wang, Bryan}, title = {Zero-shot Customized Video Editing with Diffusion Feature Transfer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6114-6123} }
OpenInsGaussian: Open-vocabulary Instance Gaussian Segmentation with Context-aware Cross-view Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Tianyu and Chen, Runnan and Hu, Dongting and Huang, Fengming and Gong, Mingming and Liu, Tongliang}, title = {OpenInsGaussian: Open-vocabulary Instance Gaussian Segmentation with Context-aware Cross-view Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6341-6350} }
Probing the Representational Power of Sparse Autoencoders in Vision Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Olson_2025_ICCV, author = {Olson, Matthew Lyle and Hinck, Musashi and Ratzlaff, Neale and Li, Changbai and Howard, Phillip and Lal, Vasudev and Tseng, Shao-Yen}, title = {Probing the Representational Power of Sparse Autoencoders in Vision Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6167-6177} }
SCRAMBLe : Enhancing Multimodal LLM Compositionality with Synthetic Preference Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mishra_2025_ICCV, author = {Mishra, Samarth and Saenko, Kate and Saligrama, Venkatesh}, title = {SCRAMBLe : Enhancing Multimodal LLM Compositionality with Synthetic Preference Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6233-6243} }
CST Anti-UAV: A Thermal Infrared Benchmark for Tiny UAV Tracking in Complex Scenes-
[pdf]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Bin and Zhang, Congxuan and Wang, Fagan and Liu, Peng and Lu, Feng and Chen, Zhen and Hu, Weiming}, title = {CST Anti-UAV: A Thermal Infrared Benchmark for Tiny UAV Tracking in Complex Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6157-6166} }
BLIP-3: A Family of Open Large Multimodal Models-
[pdf]
[bibtex]@InProceedings{Xue_2025_ICCV, author = {Xue, Le and Shu, Manli and Awadalla, Anas and Wang, Jun and Yan, An and Purushwalkam, Senthil and Zhou, Honglu and Prabhu, Viraj and Dai, Yutong and Ryoo, Michael S and Kendre, Shrikant and Zhang, Jieyu and Tseng, Shaoyen and Lujan-Moreno, Gustavo Adolfo and Olson, Matthew Lyle and Hinck, Musashi and Cobbley, David and Lal, Vasudev and Qin, Can and Zhang, Shu and Chen, Chia-Chih and Yu, Ning and Tan, Juntao and Awalgaonkar, Tulika Manoj and Heinecke, Shelby and Wang, Huan and Choi, Yejin and Schmidt, Ludwig and Chen, Zeyuan and Savarese, Silvio and Niebles, Juan Carlos and Xiong, Caiming and Xu, Ran}, title = {BLIP-3: A Family of Open Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6124-6135} }
DNF-Avatar: Distilling Neural Fields for Real-time Animatable Avatar Relighting-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Zeren and Wang, Shaofei and Tang, Siyu}, title = {DNF-Avatar: Distilling Neural Fields for Real-time Animatable Avatar Relighting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6383-6394} }
Zero-Shot Subject-Centric Generation for Creative Application Using Entropy Fusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zou_2025_ICCV, author = {Zou, Kaifeng and Feng, Xiaoyi and Huang, Tao and Huang, Zizhou and Zhang, Haihang and Zou, Yuntao and Li, Dagang}, title = {Zero-Shot Subject-Centric Generation for Creative Application Using Entropy Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6136-6145} }
SynBalance: Harnessing Synthetic Data in Long-tailed Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Zhongyu and Cai, Jiarui and Liu, Chang and An, Dongsheng and Wu, Jonathan}, title = {SynBalance: Harnessing Synthetic Data in Long-tailed Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6276-6285} }
Similarity-Aware Selective State-Space Modeling for Semantic Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Seungwook and Cho, Minsu}, title = {Similarity-Aware Selective State-Space Modeling for Semantic Correspondence}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6088-6098} }
MIDAS: Modeling Ground-Truth Distributions with Dark Knowledge for Domain Generalized Stereo Matching-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Peng and Xiang, Zhiyu and Fu, Jingyun and Pu, Tianyu and Zhong, Hanzhi and Liu, Eryun}, title = {MIDAS: Modeling Ground-Truth Distributions with Dark Knowledge for Domain Generalized Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6330-6340} }
Tuning-Free Multi-Event Long Video Generation via Synchronized Coupled Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Subin and Oh, Seoung Wug and Wang, Jui-Hsien and Lee, Joon-Young and Shin, Jinwoo}, title = {Tuning-Free Multi-Event Long Video Generation via Synchronized Coupled Sampling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6418-6429} }
DEARLi: Decoupled Enhancement of Recognition and Localization for Semi-supervised Panoptic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Martinovic_2025_ICCV, author = {Martinovi\'c, Ivan and \v{S}ari\'c, Josip and Or\v{s}i\'c, Marin and Kristan, Matej and \v{S}egvi\'c, Sini\v{s}a}, title = {DEARLi: Decoupled Enhancement of Recognition and Localization for Semi-supervised Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6209-6220} }
Debias your Large Multi-Modal Model at Test-Time with Non-Contrastive Visual Attribute Steering-
[pdf]
[supp]
[bibtex]@InProceedings{Ratzlaff_2025_ICCV, author = {Ratzlaff, Neale and Olson, Matthew Lyle and Hinck, Musashi and Tseng, Shao-Yen and Lal, Vasudev and Howard, Phillip}, title = {Debias your Large Multi-Modal Model at Test-Time with Non-Contrastive Visual Attribute Steering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6199-6208} }
Multimodal Representation Alignment for Image Generation: Text-Image Interleaved Control Is Easier Than You Think-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Liang and Bai, Shuai and Chai, Wenhao and Xie, Weichu and Zhao, Haozhe and Vinci, Leon and Lin, Junyang and Chang, Baobao}, title = {Multimodal Representation Alignment for Image Generation: Text-Image Interleaved Control Is Easier Than You Think}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6146-6156} }