ICCV 2025 Open Access Repository

Papers

Back
Who is a Better Talker: Subjective and Objective Quality Assessment for AI-Generated Talking Heads: Yingjie Zhou,

Jiezhang Cao,

Zicheng Zhang,

Farong Wen,

Yanwei Jiang,

Jun Jia,

Xiaohong Liu,

Xiongkuo Min,

Guangtao Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Yingjie and Cao, Jiezhang and Zhang, Zicheng and Wen, Farong and Jiang, Yanwei and Jia, Jun and Liu, Xiaohong and Min, Xiongkuo and Zhai, Guangtao}, title = {Who is a Better Talker: Subjective and Objective Quality Assessment for AI-Generated Talking Heads}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12201-12211} }
LayerAnimate: Layer-level Control for Animation: Yuxue Yang,

Lue Fan,

Zuzeng Lin,

Feng Wang,

Zhaoxiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Yuxue and Fan, Lue and Lin, Zuzeng and Wang, Feng and Zhang, Zhaoxiang}, title = {LayerAnimate: Layer-level Control for Animation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10865-10874} }
Towards a Unified Copernicus Foundation Model for Earth Vision: Yi Wang,

Zhitong Xiong,

Chenying Liu,

Adam J. Stewart,

Thomas Dujardin,

Nikolaos Ioannis Bountos,

Angelos Zavras,

Franziska Gerken,

Ioannis Papoutsis,

Laura Leal-Taixé,

Xiao Xiang Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yi and Xiong, Zhitong and Liu, Chenying and Stewart, Adam J. and Dujardin, Thomas and Bountos, Nikolaos Ioannis and Zavras, Angelos and Gerken, Franziska and Papoutsis, Ioannis and Leal-Taix\'e, Laura and Zhu, Xiao Xiang}, title = {Towards a Unified Copernicus Foundation Model for Earth Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9888-9899} }
Less-to-More Generalization: Unlocking More Controllability by In-Context Generation: Shaojin Wu,

Mengqi Huang,

Wenxu Wu,

Yufeng Cheng,

Fei Ding,

Qian He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Shaojin and Huang, Mengqi and Wu, Wenxu and Cheng, Yufeng and Ding, Fei and He, Qian}, title = {Less-to-More Generalization: Unlocking More Controllability by In-Context Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18682-18692} }
How Far are AI-generated Videos from Simulating the 3D Visual World: A Learned 3D Evaluation Approach: Chirui Chang,

Jiahui Liu,

Zhengzhe Liu,

Xiaoyang Lyu,

Yi-Hua Huang,

Xin Tao,

Pengfei Wan,

Di Zhang,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chang_2025_ICCV, author = {Chang, Chirui and Liu, Jiahui and Liu, Zhengzhe and Lyu, Xiaoyang and Huang, Yi-Hua and Tao, Xin and Wan, Pengfei and Zhang, Di and Qi, Xiaojuan}, title = {How Far are AI-generated Videos from Simulating the 3D Visual World: A Learned 3D Evaluation Approach}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10307-10317} }
Automated Red Teaming for Text-to-Image Models through Feedback-Guided Prompt Iteration with Vision-Language Models: Wei Xu,

Kangjie Chen,

Jiawei Qiu,

Yuyang Zhang,

Run Wang,

Jin Mao,

Tianwei Zhang,

Lina Wang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Wei and Chen, Kangjie and Qiu, Jiawei and Zhang, Yuyang and Wang, Run and Mao, Jin and Zhang, Tianwei and Wang, Lina}, title = {Automated Red Teaming for Text-to-Image Models through Feedback-Guided Prompt Iteration with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18575-18584} }
ZFusion: Efficient Deep Compositional Zero-shot Learning for Blind Image Super-Resolution with Generative Diffusion Prior: Alireza Esmaeilzehi,

Hossein Zaredar,

Yapeng Tian,

Laleh Seyyed-Kalantari; [pdf] [supp]
[bibtex]
@InProceedings{Esmaeilzehi_2025_ICCV, author = {Esmaeilzehi, Alireza and Zaredar, Hossein and Tian, Yapeng and Seyyed-Kalantari, Laleh}, title = {ZFusion: Efficient Deep Compositional Zero-shot Learning for Blind Image Super-Resolution with Generative Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12338-12348} }
FlexGen: Flexible Multi-View Generation from Text and Image Inputs: Xinli Xu,

Wenhang Ge,

Jiantao Lin,

Jiawei Feng,

Lie Xu,

Hanfeng Zhao,

Shunsi Zhang,

Ying-Cong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Xinli and Ge, Wenhang and Lin, Jiantao and Feng, Jiawei and Xu, Lie and Zhao, Hanfeng and Zhang, Shunsi and Chen, Ying-Cong}, title = {FlexGen: Flexible Multi-View Generation from Text and Image Inputs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18714-18724} }
SummDiff: Generative Modeling of Video Summarization with Diffusion: Kwanseok Kim,

Jaehoon Hahm,

Sumin Kim,

Jinhwan Sul,

Byunghak Kim,

Joonseok Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Kwanseok and Hahm, Jaehoon and Kim, Sumin and Sul, Jinhwan and Kim, Byunghak and Lee, Joonseok}, title = {SummDiff: Generative Modeling of Video Summarization with Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15096-15106} }
FlowDPS : Flow-Driven Posterior Sampling for Inverse Problems: Jeongsol Kim,

Bryan Sangwoo Kim,

Jong Chul Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jeongsol and Kim, Bryan Sangwoo and Ye, Jong Chul}, title = {FlowDPS : Flow-Driven Posterior Sampling for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12328-12337} }
OminiControl: Minimal and Universal Control for Diffusion Transformer: Zhenxiong Tan,

Songhua Liu,

Xingyi Yang,

Qiaochu Xue,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2025_ICCV, author = {Tan, Zhenxiong and Liu, Songhua and Yang, Xingyi and Xue, Qiaochu and Wang, Xinchao}, title = {OminiControl: Minimal and Universal Control for Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14940-14950} }
AIComposer: Any Style and Content Image Composition via Feature Integration: Haowen Li,

Zhenfeng Fan,

Zhang Wen,

Zhengzhou Zhu,

Yunjin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Haowen and Fan, Zhenfeng and Wen, Zhang and Zhu, Zhengzhou and Li, Yunjin}, title = {AIComposer: Any Style and Content Image Composition via Feature Integration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16840-16850} }
M2SFormer: Multi-Spectral and Multi-Scale Attention with Edge-Aware Difficulty Guidance for Image Forgery Localization: Ju-Hyeon Nam,

Dong-Hyun Moon,

Sang-Chul Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2025_ICCV, author = {Nam, Ju-Hyeon and Moon, Dong-Hyun and Lee, Sang-Chul}, title = {M2SFormer: Multi-Spectral and Multi-Scale Attention with Edge-Aware Difficulty Guidance for Image Forgery Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15927-15938} }
Pinco: Position-induced Consistent Adapter for Diffusion Transformer in Foreground-conditioned Inpainting: Guangben Lu,

Yuzhen Du,

Yizhe Tang,

Zhimin Sun,

Ran Yi,

Yifan Qi,

Tianyi Wang,

Lizhuang Ma,

Fangyuan Zou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Guangben and Du, Yuzhen and Tang, Yizhe and Sun, Zhimin and Yi, Ran and Qi, Yifan and Wang, Tianyi and Ma, Lizhuang and Zou, Fangyuan}, title = {Pinco: Position-induced Consistent Adapter for Diffusion Transformer in Foreground-conditioned Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15266-15276} }
SyncDiff: Synchronized Motion Diffusion for Multi-Body Human-Object Interaction Synthesis: Wenkun He,

Yun Liu,

Ruitao Liu,

Li Yi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Wenkun and Liu, Yun and Liu, Ruitao and Yi, Li}, title = {SyncDiff: Synchronized Motion Diffusion for Multi-Body Human-Object Interaction Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11731-11743} }
UnZipLoRA: Separating Content and Style from a Single Image: Chang Liu,

Viraj Shah,

Aiyu Cui,

Svetlana Lazebnik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Chang and Shah, Viraj and Cui, Aiyu and Lazebnik, Svetlana}, title = {UnZipLoRA: Separating Content and Style from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16776-16785} }
AdaDCP: Learning an Adapter with Discrete Cosine Prior for Clear-to-Adverse Domain Generalization: Qi Bi,

Yixian Shen,

Jingjun Yi,

Gui-Song Xia; [pdf]
[bibtex]
@InProceedings{Bi_2025_ICCV, author = {Bi, Qi and Shen, Yixian and Yi, Jingjun and Xia, Gui-Song}, title = {AdaDCP: Learning an Adapter with Discrete Cosine Prior for Clear-to-Adverse Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12997-13008} }
Enhancing Image Restoration Transformer via Adaptive Translation Equivariance: JiaKui Hu,

Zhengjian Yao,

Lujia Jin,

Hangzhou He,

Yanye Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, JiaKui and Yao, Zhengjian and Jin, Lujia and He, Hangzhou and Lu, Yanye}, title = {Enhancing Image Restoration Transformer via Adaptive Translation Equivariance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16047-16057} }
RapVerse: Coherent Vocals and Whole-Body Motion Generation from Text: Jiaben Chen,

Xin Yan,

Yihang Chen,

Siyuan Cen,

Zixin Wang,

Qinwei Ma,

Haoyu Zhen,

Kaizhi Qian,

Lie Lu,

Chuang Gan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jiaben and Yan, Xin and Chen, Yihang and Cen, Siyuan and Wang, Zixin and Ma, Qinwei and Zhen, Haoyu and Qian, Kaizhi and Lu, Lie and Gan, Chuang}, title = {RapVerse: Coherent Vocals and Whole-Body Motion Generation from Text}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10097-10107} }
MoFRR: Mixture of Diffusion Models for Face Retouching Restoration: Jiaxin Liu,

Qichao Ying,

Zhenxing Qian,

Sheng Li,

Runqi Zhang,

Jian Liu,

Xinpeng Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Jiaxin and Ying, Qichao and Qian, Zhenxing and Li, Sheng and Zhang, Runqi and Liu, Jian and Zhang, Xinpeng}, title = {MoFRR: Mixture of Diffusion Models for Face Retouching Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12842-12851} }
UniEgoMotion: A Unified Model for Egocentric Motion Reconstruction, Forecasting, and Generation: Chaitanya Patel,

Hiroki Nakamura,

Yuta Kyuragi,

Kazuki Kozuka,

Juan Carlos Niebles,

Ehsan Adeli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Patel_2025_ICCV, author = {Patel, Chaitanya and Nakamura, Hiroki and Kyuragi, Yuta and Kozuka, Kazuki and Niebles, Juan Carlos and Adeli, Ehsan}, title = {UniEgoMotion: A Unified Model for Egocentric Motion Reconstruction, Forecasting, and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10318-10329} }
SANA-Sprint: One-Step Diffusion with Continuous-Time Consistency Distillation: Junsong Chen,

Shuchen Xue,

Yuyang Zhao,

Jincheng Yu,

Sayak Paul,

Junyu Chen,

Han Cai,

Song Han,

Enze Xie; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Junsong and Xue, Shuchen and Zhao, Yuyang and Yu, Jincheng and Paul, Sayak and Chen, Junyu and Cai, Han and Han, Song and Xie, Enze}, title = {SANA-Sprint: One-Step Diffusion with Continuous-Time Consistency Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16185-16195} }
Erasing More Than Intended? How Concept Erasure Degrades the Generation of Non-Target Concepts: Ibtihel Amara,

Ahmed Imtiaz Humayun,

Ivana Kajic,

Zarana Parekh,

Natalie Harris,

Sarah Young,

Chirag Nagpal,

Najoung Kim,

Junfeng He,

Cristina Nader Vasconcelos,

Deepak Ramachandran,

Golnoosh Farnadi,

Katherine Heller,

Mohammad Havaei,

Negar Rostamzadeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Amara_2025_ICCV, author = {Amara, Ibtihel and Humayun, Ahmed Imtiaz and Kajic, Ivana and Parekh, Zarana and Harris, Natalie and Young, Sarah and Nagpal, Chirag and Kim, Najoung and He, Junfeng and Vasconcelos, Cristina Nader and Ramachandran, Deepak and Farnadi, Golnoosh and Heller, Katherine and Havaei, Mohammad and Rostamzadeh, Negar}, title = {Erasing More Than Intended? How Concept Erasure Degrades the Generation of Non-Target Concepts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16420-16430} }
Global and Local Entailment Learning for Natural World Imagery: Srikumar Sastry,

Aayush Dhakal,

Eric Xing,

Subash Khanal,

Nathan Jacobs; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sastry_2025_ICCV, author = {Sastry, Srikumar and Dhakal, Aayush and Xing, Eric and Khanal, Subash and Jacobs, Nathan}, title = {Global and Local Entailment Learning for Natural World Imagery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15770-15780} }
LVFace: Progressive Cluster Optimization for Large Vision Models in Face Recognition: Jinghan You,

Shanglin Li,

Yuanrui Sun,

Jiangchuan Wei,

Mingyu Guo,

Chao Feng,

Jiao Ran; [pdf] [arXiv]
[bibtex]
@InProceedings{You_2025_ICCV, author = {You, Jinghan and Li, Shanglin and Sun, Yuanrui and Wei, Jiangchuan and Guo, Mingyu and Feng, Chao and Ran, Jiao}, title = {LVFace: Progressive Cluster Optimization for Large Vision Models in Face Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11840-11849} }
E-NeMF: Event-based Neural Motion Field for Novel Space-time View Synthesis of Dynamic Scenes: Yan Liu,

Zehao Chen,

Haojie Yan,

De Ma,

Huajin Tang,

Qian Zheng,

Gang Pan; [pdf]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yan and Chen, Zehao and Yan, Haojie and Ma, De and Tang, Huajin and Zheng, Qian and Pan, Gang}, title = {E-NeMF: Event-based Neural Motion Field for Novel Space-time View Synthesis of Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10854-10864} }
Adversarial Distribution Matching for Diffusion Distillation Towards Efficient Image and Video Synthesis: Yanzuo Lu,

Yuxi Ren,

Xin Xia,

Shanchuan Lin,

Xing Wang,

Xuefeng Xiao,

Andy J. Ma,

Xiaohua Xie,

Jian-Huang Lai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Yanzuo and Ren, Yuxi and Xia, Xin and Lin, Shanchuan and Wang, Xing and Xiao, Xuefeng and Ma, Andy J. and Xie, Xiaohua and Lai, Jian-Huang}, title = {Adversarial Distribution Matching for Diffusion Distillation Towards Efficient Image and Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16818-16829} }
Switch-a-View: View Selection Learned from Unlabeled In-the-wild Videos: Sagnik Majumder,

Tushar Nagarajan,

Ziad Al-Halah,

Kristen Grauman; [pdf] [supp]
[bibtex]
@InProceedings{Majumder_2025_ICCV, author = {Majumder, Sagnik and Nagarajan, Tushar and Al-Halah, Ziad and Grauman, Kristen}, title = {Switch-a-View: View Selection Learned from Unlabeled In-the-wild Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11969-11979} }
UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization: Junjie He,

Yifeng Geng,

Liefeng Bo; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Junjie and Geng, Yifeng and Bo, Liefeng}, title = {UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14399-14408} }
Exploiting Diffusion Prior for Task-driven Image Restoration: Jaeha Kim,

Junghun Oh,

Kyoung Mu Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jaeha and Oh, Junghun and Lee, Kyoung Mu}, title = {Exploiting Diffusion Prior for Task-driven Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10151-10161} }
Dual-level Prototype Learning for Composite Degraded Image Restoration: Zhongze Wang,

Haitao Zhao,

Lujian Yao,

Jingchao Peng,

Kaijie Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zhongze and Zhao, Haitao and Yao, Lujian and Peng, Jingchao and Zhao, Kaijie}, title = {Dual-level Prototype Learning for Composite Degraded Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14006-14016} }
Forensic-MoE: Exploring Comprehensive Synthetic Image Detection Traces with Mixture of Experts: Mingqi Fang,

Ziguang Li,

Lingyun Yu,

Quanwei Yang,

Hongtao Xie,

Yongdong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Mingqi and Li, Ziguang and Yu, Lingyun and Yang, Quanwei and Xie, Hongtao and Zhang, Yongdong}, title = {Forensic-MoE: Exploring Comprehensive Synthetic Image Detection Traces with Mixture of Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17772-17782} }
Robust Adverse Weather Removal via Spectral-based Spatial Grouping: Yuhwan Jeong,

Yunseo Yang,

Youngho Yoon,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_ICCV, author = {Jeong, Yuhwan and Yang, Yunseo and Yoon, Youngho and Yoon, Kuk-Jin}, title = {Robust Adverse Weather Removal via Spectral-based Spatial Grouping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11872-11883} }
SemGes: Semantics-aware Co-Speech Gesture Generation using Semantic Coherence and Relevance Learning: Lanmiao Liu,

Esam Ghaleb,

Asli Ozyurek,

Zerrin Yumak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Lanmiao and Ghaleb, Esam and Ozyurek, Asli and Yumak, Zerrin}, title = {SemGes: Semantics-aware Co-Speech Gesture Generation using Semantic Coherence and Relevance Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13963-13973} }
DDB: Diffusion Driven Balancing to Address Spurious Correlations: Aryan Yazdan Parast,

Basim Azam,

Naveed Akhtar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parast_2025_ICCV, author = {Parast, Aryan Yazdan and Azam, Basim and Akhtar, Naveed}, title = {DDB: Diffusion Driven Balancing to Address Spurious Correlations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17526-17535} }
MMAIF: Multi-task and Multi-degradation All-in-One for Image Fusion with Language Guidance: Zihan Cao,

Yu Zhong,

Ziqi Wang,

Liang-Jian Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Zihan and Zhong, Yu and Wang, Ziqi and Deng, Liang-Jian}, title = {MMAIF: Multi-task and Multi-degradation All-in-One for Image Fusion with Language Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11744-11754} }
MonSTeR: a Unified Model for Motion, Scene, Text Retrieval: Luca Collorone,

Matteo Gioia,

Massimiliano Pappa,

Paolo Leoni,

Giovanni Ficarra,

Or Litany,

Indro Spinelli,

Fabio Galasso; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Collorone_2025_ICCV, author = {Collorone, Luca and Gioia, Matteo and Pappa, Massimiliano and Leoni, Paolo and Ficarra, Giovanni and Litany, Or and Spinelli, Indro and Galasso, Fabio}, title = {MonSTeR: a Unified Model for Motion, Scene, Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10940-10949} }
Event-Driven Storytelling with Multiple Lifelike Humans in a 3D Scene: Donggeun Lim,

Jinseok Bae,

Inwoo Hwang,

Seungmin Lee,

Hwanhee Lee,

Young Min Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lim_2025_ICCV, author = {Lim, Donggeun and Bae, Jinseok and Hwang, Inwoo and Lee, Seungmin and Lee, Hwanhee and Kim, Young Min}, title = {Event-Driven Storytelling with Multiple Lifelike Humans in a 3D Scene}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11654-11664} }
Bi-Level Optimization for Self-Supervised AI-Generated Face Detection: Mian Zou,

Nan Zhong,

Baosheng Yu,

Yibing Zhan,

Kede Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zou_2025_ICCV, author = {Zou, Mian and Zhong, Nan and Yu, Baosheng and Zhan, Yibing and Ma, Kede}, title = {Bi-Level Optimization for Self-Supervised AI-Generated Face Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18959-18968} }
Golden Noise for Diffusion Models: A Learning Framework: Zikai Zhou,

Shitong Shao,

Lichen Bai,

Shufei Zhang,

Zhiqiang Xu,

Bo Han,

Zeke Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zikai and Shao, Shitong and Bai, Lichen and Zhang, Shufei and Xu, Zhiqiang and Han, Bo and Xie, Zeke}, title = {Golden Noise for Diffusion Models: A Learning Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17688-17697} }
CharaConsist: Fine-Grained Consistent Character Generation: Mengyu Wang,

Henghui Ding,

Jianing Peng,

Yao Zhao,

Yunpeng Chen,

Yunchao Wei; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Mengyu and Ding, Henghui and Peng, Jianing and Zhao, Yao and Chen, Yunpeng and Wei, Yunchao}, title = {CharaConsist: Fine-Grained Consistent Character Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16058-16067} }
StyleSRN: Scene Text Image Super-Resolution with Text Style Embedding: Shengrong Yuan,

Runmin Wang,

Ke Hao,

Xuqi Ma,

Changxin Gao,

Li Liu,

Nong Sang; [pdf] [supp]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Shengrong and Wang, Runmin and Hao, Ke and Ma, Xuqi and Gao, Changxin and Liu, Li and Sang, Nong}, title = {StyleSRN: Scene Text Image Super-Resolution with Text Style Embedding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18693-18702} }
Processing and acquisition traces in visual encoders: What does CLIP know about your camera?: Ryan Ramos,

Vladan Stojnić,

Giorgos Kordopatis-Zilos,

Yuta Nakashima,

Giorgos Tolias,

Noa Garcia; [pdf] [supp]
[bibtex]
@InProceedings{Ramos_2025_ICCV, author = {Ramos, Ryan and Stojni\'c, Vladan and Kordopatis-Zilos, Giorgos and Nakashima, Yuta and Tolias, Giorgos and Garcia, Noa}, title = {Processing and acquisition traces in visual encoders: What does CLIP know about your camera?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17056-17066} }
Holistic Tokenizer for Autoregressive Image Generation: Anlin Zheng,

Haochen Wang,

Yucheng Zhao,

Weipeng Deng,

Tiancai Wang,

Xiangyu Zhang,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Anlin and Wang, Haochen and Zhao, Yucheng and Deng, Weipeng and Wang, Tiancai and Zhang, Xiangyu and Qi, Xiaojuan}, title = {Holistic Tokenizer for Autoregressive Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16916-16926} }
VLABench: A Large-Scale Benchmark for Language-Conditioned Robotics Manipulation with Long-Horizon Reasoning Tasks: Shiduo Zhang,

Zhe Xu,

Peiju Liu,

Xiaopeng Yu,

Yuan Li,

Qinghui Gao,

Zhaoye Fei,

Zhangyue Yin,

Zuxuan Wu,

Yu-Gang Jiang,

Xipeng Qiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shiduo and Xu, Zhe and Liu, Peiju and Yu, Xiaopeng and Li, Yuan and Gao, Qinghui and Fei, Zhaoye and Yin, Zhangyue and Wu, Zuxuan and Jiang, Yu-Gang and Qiu, Xipeng}, title = {VLABench: A Large-Scale Benchmark for Language-Conditioned Robotics Manipulation with Long-Horizon Reasoning Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11142-11152} }
Hierarchical Visual Prompt Learning for Continual Video Instance Segmentation: Jiahua Dong,

Hui Yin,

Wenqi Liang,

Hanbin Zhao,

Henghui Ding,

Nicu Sebe,

Salman Khan,

Fahad Shahbaz Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Jiahua and Yin, Hui and Liang, Wenqi and Zhao, Hanbin and Ding, Henghui and Sebe, Nicu and Khan, Salman and Khan, Fahad Shahbaz}, title = {Hierarchical Visual Prompt Learning for Continual Video Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11829-11839} }
MorphoGen: Efficient Unconditional Generation of Long-Range Projection Neuronal Morphology via a Global-to-Local Framework: Tianfang Zhu,

Hongyang Zhou,

Anan Li; [pdf]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Tianfang and Zhou, Hongyang and Li, Anan}, title = {MorphoGen: Efficient Unconditional Generation of Long-Range Projection Neuronal Morphology via a Global-to-Local Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13021-13031} }
REDUCIO! Generating 1K Video within 16 Seconds using Extremely Compressed Motion Latents: Rui Tian,

Qi Dai,

Jianmin Bao,

Kai Qiu,

Yifan Yang,

Chong Luo,

Zuxuan Wu,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_ICCV, author = {Tian, Rui and Dai, Qi and Bao, Jianmin and Qiu, Kai and Yang, Yifan and Luo, Chong and Wu, Zuxuan and Jiang, Yu-Gang}, title = {REDUCIO! Generating 1K Video within 16 Seconds using Extremely Compressed Motion Latents}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19237-19247} }
Structure-Guided Diffusion Models for High-Fidelity Portrait Shadow Removal: Wanchang Yu,

Qing Zhang,

Rongjia Zheng,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Wanchang and Zhang, Qing and Zheng, Rongjia and Zheng, Wei-Shi}, title = {Structure-Guided Diffusion Models for High-Fidelity Portrait Shadow Removal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11675-11684} }
UniRes: Universal Image Restoration for Complex Degradations: Mo Zhou,

Keren Ye,

Mauricio Delbracio,

Peyman Milanfar,

Vishal M. Patel,

Hossein Talebi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Mo and Ye, Keren and Delbracio, Mauricio and Milanfar, Peyman and Patel, Vishal M. and Talebi, Hossein}, title = {UniRes: Universal Image Restoration for Complex Degradations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13237-13247} }
Your Text Encoder Can Be An Object-Level Watermarking Controller: Naresh Kumar Devulapally,

Mingzhen Huang,

Vishal Asnani,

Shruti Agarwal,

Siwei Lyu,

Vishnu Suresh Lokhande; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Devulapally_2025_ICCV, author = {Devulapally, Naresh Kumar and Huang, Mingzhen and Asnani, Vishal and Agarwal, Shruti and Lyu, Siwei and Lokhande, Vishnu Suresh}, title = {Your Text Encoder Can Be An Object-Level Watermarking Controller}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16576-16585} }
Attention to Trajectory: Trajectory-Aware Open-Vocabulary Tracking: Yunhao Li,

Yifan Jiao,

Dan Meng,

Heng Fan,

Libo Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yunhao and Jiao, Yifan and Meng, Dan and Fan, Heng and Zhang, Libo}, title = {Attention to Trajectory: Trajectory-Aware Open-Vocabulary Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14390-14398} }
Lay2Story: Extending Diffusion Transformers for Layout-Togglable Story Generation: Ao Ma,

Jiasong Feng,

Ke Cao,

Jing Wang,

Yun Wang,

Quanwei Zhang,

Zhanjie Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Ao and Feng, Jiasong and Cao, Ke and Wang, Jing and Wang, Yun and Zhang, Quanwei and Zhang, Zhanjie}, title = {Lay2Story: Extending Diffusion Transformers for Layout-Togglable Story Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16102-16111} }
Unfolding-Associative Encoder-Decoder Network with Progressive Alignment for Pansharpening: Shijie Fang,

Hongping Gan; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Shijie and Gan, Hongping}, title = {Unfolding-Associative Encoder-Decoder Network with Progressive Alignment for Pansharpening}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13651-13661} }
DeepMesh: Auto-Regressive Artist-mesh Creation with Reinforcement Learning: Ruowen Zhao,

Junliang Ye,

Zhengyi Wang,

Guangce Liu,

Yiwen Chen,

Yikai Wang,

Jun Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Ruowen and Ye, Junliang and Wang, Zhengyi and Liu, Guangce and Chen, Yiwen and Wang, Yikai and Zhu, Jun}, title = {DeepMesh: Auto-Regressive Artist-mesh Creation with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10612-10623} }
PINO: Person-Interaction Noise Optimization for Long-Duration and Customizable Motion Generation of Arbitrary-Sized Groups: Sakuya Ota,

Qing Yu,

Kent Fujiwara,

Satoshi Ikehata,

Ikuro Sato; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ota_2025_ICCV, author = {Ota, Sakuya and Yu, Qing and Fujiwara, Kent and Ikehata, Satoshi and Sato, Ikuro}, title = {PINO: Person-Interaction Noise Optimization for Long-Duration and Customizable Motion Generation of Arbitrary-Sized Groups}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10676-10685} }
Fast Image Super-Resolution via Consistency Rectified Flow: Jiaqi Xu,

Wenbo Li,

Haoze Sun,

Fan Li,

Zhixin Wang,

Long Peng,

Jingjing Ren,

Haoran Yang,

Xiaowei Hu,

Renjing Pei,

Pheng-Ann Heng; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Jiaqi and Li, Wenbo and Sun, Haoze and Li, Fan and Wang, Zhixin and Peng, Long and Ren, Jingjing and Yang, Haoran and Hu, Xiaowei and Pei, Renjing and Heng, Pheng-Ann}, title = {Fast Image Super-Resolution via Consistency Rectified Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11755-11765} }
Learning Implicit Features with Flow-Infused Transformations for Realistic Virtual Try-On: Delong Zhang,

Qiwei Huang,

Yang Sun,

Yuanliu Liu,

Wei-Shi Zheng,

Pengfei Xiong,

Wei Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Delong and Huang, Qiwei and Sun, Yang and Liu, Yuanliu and Zheng, Wei-Shi and Xiong, Pengfei and Zhang, Wei}, title = {Learning Implicit Features with Flow-Infused Transformations for Realistic Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18736-18745} }
FineMotion: A Dataset and Benchmark with both Spatial and Temporal Annotation for Fine-grained Motion Generation and Editing: Bizhu Wu,

Jinheng Xie,

Meidan Ding,

Zhe Kong,

Jianfeng Ren,

Ruibin Bai,

Rong Qu,

Linlin Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Bizhu and Xie, Jinheng and Ding, Meidan and Kong, Zhe and Ren, Jianfeng and Bai, Ruibin and Qu, Rong and Shen, Linlin}, title = {FineMotion: A Dataset and Benchmark with both Spatial and Temporal Annotation for Fine-grained Motion Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13837-13846} }
MC-Bench: A Benchmark for Multi-Context Visual Grounding in the Era of MLLMs: Yunqiu Xu,

Linchao Zhu,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Yunqiu and Zhu, Linchao and Yang, Yi}, title = {MC-Bench: A Benchmark for Multi-Context Visual Grounding in the Era of MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17675-17687} }
What Makes for Text to 360-degree Panorama Generation with Stable Diffusion?: Jinhong Ni,

Chang-Bin Zhang,

Qiang Zhang,

Jing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2025_ICCV, author = {Ni, Jinhong and Zhang, Chang-Bin and Zhang, Qiang and Zhang, Jing}, title = {What Makes for Text to 360-degree Panorama Generation with Stable Diffusion?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16555-16564} }
CHORDS: Diffusion Sampling Accelerator with Multi-core Hierarchical ODE Solvers: Jiaqi Han,

Haotian Ye,

Puheng Li,

Minkai Xu,

James Zou,

Stefano Ermon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Jiaqi and Ye, Haotian and Li, Puheng and Xu, Minkai and Zou, James and Ermon, Stefano}, title = {CHORDS: Diffusion Sampling Accelerator with Multi-core Hierarchical ODE Solvers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19386-19395} }
Decouple and Track: Benchmarking and Improving Video Diffusion Transformers For Motion Transfer: Qingyu Shi,

Jianzong Wu,

Jinbin Bai,

Jiangning Zhang,

Lu Qi,

Yunhai Tong,

Xiangtai Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Qingyu and Wu, Jianzong and Bai, Jinbin and Zhang, Jiangning and Qi, Lu and Tong, Yunhai and Li, Xiangtai}, title = {Decouple and Track: Benchmarking and Improving Video Diffusion Transformers For Motion Transfer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10995-11005} }
MaskControl: Spatio-Temporal Control for Masked Motion Synthesis: Ekkasit Pinyoanuntapong,

Muhammad Saleem,

Korrawe Karunratanakul,

Pu Wang,

Hongfei Xue,

Chen Chen,

Chuan Guo,

Junli Cao,

Jian Ren,

Sergey Tulyakov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pinyoanuntapong_2025_ICCV, author = {Pinyoanuntapong, Ekkasit and Saleem, Muhammad and Karunratanakul, Korrawe and Wang, Pu and Xue, Hongfei and Chen, Chen and Guo, Chuan and Cao, Junli and Ren, Jian and Tulyakov, Sergey}, title = {MaskControl: Spatio-Temporal Control for Masked Motion Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9955-9965} }
Straighten Viscous Rectified Flow via Noise Optimization: Jimin Dai,

Jiexi Yan,

Jian Yang,

Lei Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2025_ICCV, author = {Dai, Jimin and Yan, Jiexi and Yang, Jian and Luo, Lei}, title = {Straighten Viscous Rectified Flow via Noise Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15005-15014} }
ImHead: A Large-scale Implicit Morphable Model for Localized Head Modeling: Rolandos Alexandros Potamias,

Stathis Galanakis,

Jiankang Deng,

Athanasios Papaioannou,

Stefanos Zafeiriou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Potamias_2025_ICCV, author = {Potamias, Rolandos Alexandros and Galanakis, Stathis and Deng, Jiankang and Papaioannou, Athanasios and Zafeiriou, Stefanos}, title = {ImHead: A Large-scale Implicit Morphable Model for Localized Head Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10196-10206} }
Asynchronous Event Error-Minimizing Noise for Safeguarding Event Dataset: Ruofei Wang,

Peiqi Duan,

Boxin Shi,

Renjie Wan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ruofei and Duan, Peiqi and Shi, Boxin and Wan, Renjie}, title = {Asynchronous Event Error-Minimizing Noise for Safeguarding Event Dataset}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10141-10150} }
Efficient Concertormer for Image Deblurring and Beyond: Pin-Hung Kuo,

Jinshan Pan,

Shao-Yi Chien,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kuo_2025_ICCV, author = {Kuo, Pin-Hung and Pan, Jinshan and Chien, Shao-Yi and Yang, Ming-Hsuan}, title = {Efficient Concertormer for Image Deblurring and Beyond}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14665-14675} }
DynamicID: Zero-Shot Multi-ID Image Personalization with Flexible Facial Editability: Xirui Hu,

Jiahao Wang,

Hao Chen,

Weizhan Zhang,

Benqi Wang,

Yikun Li,

Haishun Nan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Xirui and Wang, Jiahao and Chen, Hao and Zhang, Weizhan and Wang, Benqi and Li, Yikun and Nan, Haishun}, title = {DynamicID: Zero-Shot Multi-ID Image Personalization with Flexible Facial Editability}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10549-10559} }
Wavelet Policy: Lifting Scheme for Policy Learning in Long-Horizon Tasks: Hao Huang,

Shuaihang Yuan,

Geeta Chandra Raju Bethala,

Congcong Wen,

Anthony Tzes,

Yi Fang; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Hao and Yuan, Shuaihang and Bethala, Geeta Chandra Raju and Wen, Congcong and Tzes, Anthony and Fang, Yi}, title = {Wavelet Policy: Lifting Scheme for Policy Learning in Long-Horizon Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12349-12359} }
Dual-Temporal Exemplar Representation Network for Video Semantic Segmentation: Xiaolong Xu,

Lei Zhang,

Jiayi Li,

Lituan Wang,

Yifan Guan,

Yu Yan,

Leyi Zhang,

Hao Song; [pdf]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Xiaolong and Zhang, Lei and Li, Jiayi and Wang, Lituan and Guan, Yifan and Yan, Yu and Zhang, Leyi and Song, Hao}, title = {Dual-Temporal Exemplar Representation Network for Video Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10775-10785} }
Denoising Token Prediction in Masked Autoregressive Models: Ting Yao,

Yehao Li,

Yingwei Pan,

Zhaofan Qiu,

Tao Mei; [pdf]
[bibtex]
@InProceedings{Yao_2025_ICCV, author = {Yao, Ting and Li, Yehao and Pan, Yingwei and Qiu, Zhaofan and Mei, Tao}, title = {Denoising Token Prediction in Masked Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18024-18033} }
BadVideo: Stealthy Backdoor Attack against Text-to-Video Generation: Ruotong Wang,

Mingli Zhu,

Jiarong Ou,

Rui Chen,

Xin Tao,

Pengfei Wan,

Baoyuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ruotong and Zhu, Mingli and Ou, Jiarong and Chen, Rui and Tao, Xin and Wan, Pengfei and Wu, Baoyuan}, title = {BadVideo: Stealthy Backdoor Attack against Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19075-19084} }
Cassic: Towards Content-Adaptive State-Space Models for Learned Image Compression: Shiyu Qin,

Jinpeng Wang,

Yimin Zhou,

Bin Chen,

Tianci Luo,

Baoyi An,

Tao Dai,

Shu-Tao Xia,

Yaowei Wang; [pdf]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Shiyu and Wang, Jinpeng and Zhou, Yimin and Chen, Bin and Luo, Tianci and An, Baoyi and Dai, Tao and Xia, Shu-Tao and Wang, Yaowei}, title = {Cassic: Towards Content-Adaptive State-Space Models for Learned Image Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15727-15736} }
AIGI-Holmes: Towards Explainable and Generalizable AI-Generated Image Detection via Multimodal Large Language Models: Ziyin Zhou,

Yunpeng Luo,

Yuanchen Wu,

Ke Sun,

Jiayi Ji,

Ke Yan,

Shouhong Ding,

Xiaoshuai Sun,

Yunsheng Wu,

Rongrong Ji; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Ziyin and Luo, Yunpeng and Wu, Yuanchen and Sun, Ke and Ji, Jiayi and Yan, Ke and Ding, Shouhong and Sun, Xiaoshuai and Wu, Yunsheng and Ji, Rongrong}, title = {AIGI-Holmes: Towards Explainable and Generalizable AI-Generated Image Detection via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18746-18758} }
TokensGen: Harnessing Condensed Tokens for Long Video Generation: Wenqi Ouyang,

Zeqi Xiao,

Danni Yang,

Yifan Zhou,

Shuai Yang,

Lei Yang,

Jianlou Si,

Xingang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ouyang_2025_ICCV, author = {Ouyang, Wenqi and Xiao, Zeqi and Yang, Danni and Zhou, Yifan and Yang, Shuai and Yang, Lei and Si, Jianlou and Pan, Xingang}, title = {TokensGen: Harnessing Condensed Tokens for Long Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18197-18206} }
Subjective Camera 1.0: Bridging Human Cognition and Visual Reconstruction through Sequence-Aware Sketch-Guided Diffusion: Haoyang Chen,

Dongfang Sun,

Caoyuan Ma,

Shiqin Wang,

Kewei Zhang,

Zheng Wang,

Zhixiang Wang; [pdf]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Haoyang and Sun, Dongfang and Ma, Caoyuan and Wang, Shiqin and Zhang, Kewei and Wang, Zheng and Wang, Zhixiang}, title = {Subjective Camera 1.0: Bridging Human Cognition and Visual Reconstruction through Sequence-Aware Sketch-Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17838-17847} }
Learning Robust Image Watermarking with Lossless Cover Recovery: Jiale Chen,

Wei Wang,

Chongyang Shi,

Li Dong,

Xiping Hu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jiale and Wang, Wei and Shi, Chongyang and Dong, Li and Hu, Xiping}, title = {Learning Robust Image Watermarking with Lossless Cover Recovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15056-15065} }
Drawing Developmental Trajectory from Cortical Surface Reconstruction: Wenxuan Wu,

Ruowen Qu,

Zhongliang Liu,

Zhuoyan Dai,

Dongzi Shi,

Sijin Yu,

Tong Xiong,

Shiping Liu,

Xiangmin Xu,

Xiaofen Xing,

Xin Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Wenxuan and Qu, Ruowen and Liu, Zhongliang and Dai, Zhuoyan and Shi, Dongzi and Yu, Sijin and Xiong, Tong and Liu, Shiping and Xu, Xiangmin and Xing, Xiaofen and Zhang, Xin}, title = {Drawing Developmental Trajectory from Cortical Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11026-11035} }
DGTalker: Disentangled Generative Latent Space Learning for Audio-Driven Gaussian Talking Heads: Xiaoxi Liang,

Yanbo Fan,

Qiya Yang,

Xuan Wang,

Wei Gao,

Ge Li; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Xiaoxi and Fan, Yanbo and Yang, Qiya and Wang, Xuan and Gao, Wei and Li, Ge}, title = {DGTalker: Disentangled Generative Latent Space Learning for Audio-Driven Gaussian Talking Heads}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11079-11088} }
EDiT: Efficient Diffusion Transformers with Linear Compressed Attention: Philipp Becker,

Abhinav Mehrotra,

Ruchika Chavhan,

Malcolm Chadwick,

Luca Morreale,

Mehdi Noroozi,

Alberto Gil C. P. Ramos,

Sourav Bhattacharya; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Becker_2025_ICCV, author = {Becker, Philipp and Mehrotra, Abhinav and Chavhan, Ruchika and Chadwick, Malcolm and Morreale, Luca and Noroozi, Mehdi and Gil C. P. Ramos, Alberto and Bhattacharya, Sourav}, title = {EDiT: Efficient Diffusion Transformers with Linear Compressed Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19608-19616} }
When Lighting Deceives: Exposing Vision-Language Models' Illumination Vulnerability Through Illumination Transformation Attack: Hanqing Liu,

Shouwei Ruan,

Yao Huang,

Shiji Zhao,

Xingxing Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Hanqing and Ruan, Shouwei and Huang, Yao and Zhao, Shiji and Wei, Xingxing}, title = {When Lighting Deceives: Exposing Vision-Language Models' Illumination Vulnerability Through Illumination Transformation Attack}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10485-10495} }
DIA: The Adversarial Exposure of Deterministic Inversion in Diffusion Models: Seunghoo Hong,

Geonho Son,

Juhun Lee,

Simon S. Woo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_ICCV, author = {Hong, Seunghoo and Son, Geonho and Lee, Juhun and Woo, Simon S.}, title = {DIA: The Adversarial Exposure of Deterministic Inversion in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17994-18003} }
HouseTour: A Virtual Real Estate A(I)gent: Ata Çelen,

Marc Pollefeys,

Daniel Barath,

Iro Armeni; [pdf] [supp]
[bibtex]
@InProceedings{Celen_2025_ICCV, author = {\c{C}elen, Ata and Pollefeys, Marc and Barath, Daniel and Armeni, Iro}, title = {HouseTour: A Virtual Real Estate A(I)gent}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17761-17771} }
Colors See Colors Ignore: Clothes Changing ReID with Color Disentanglement: Priyank Pathak,

Yogesh S. Rawat; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pathak_2025_ICCV, author = {Pathak, Priyank and Rawat, Yogesh S.}, title = {Colors See Colors Ignore: Clothes Changing ReID with Color Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16797-16807} }
AnyI2V: Animating Any Conditional Image with Motion Control: Ziye Li,

Hao Luo,

Xincheng Shuai,

Henghui Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Ziye and Luo, Hao and Shuai, Xincheng and Ding, Henghui}, title = {AnyI2V: Animating Any Conditional Image with Motion Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17302-17311} }
GENMO: A GENeralist Model for Human MOtion: Jiefeng Li,

Jinkun Cao,

Haotian Zhang,

Davis Rempe,

Jan Kautz,

Umar Iqbal,

Ye Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jiefeng and Cao, Jinkun and Zhang, Haotian and Rempe, Davis and Kautz, Jan and Iqbal, Umar and Yuan, Ye}, title = {GENMO: A GENeralist Model for Human MOtion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11766-11776} }
Weakly Supervised Visible-Infrared Person Re-Identification via Heterogeneous Expert Collaborative Consistency Learning: Yafei Zhang,

Lingqi Kong,

Huafeng Li,

Jie Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yafei and Kong, Lingqi and Li, Huafeng and Wen, Jie}, title = {Weakly Supervised Visible-Infrared Person Re-Identification via Heterogeneous Expert Collaborative Consistency Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12659-12669} }
Prompt-A-Video: Prompt Your Video Diffusion Model via Preference-Aligned LLM: Yatai Ji,

Jiacheng Zhang,

Jie Wu,

Shilong Zhang,

Shoufa Chen,

Chongjian Ge,

Peize Sun,

Weifeng Chen,

Wenqi Shao,

Xuefeng Xiao,

Weilin Huang,

Ping Luo; [pdf] [supp]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Yatai and Zhang, Jiacheng and Wu, Jie and Zhang, Shilong and Chen, Shoufa and Ge, Chongjian and Sun, Peize and Chen, Weifeng and Shao, Wenqi and Xiao, Xuefeng and Huang, Weilin and Luo, Ping}, title = {Prompt-A-Video: Prompt Your Video Diffusion Model via Preference-Aligned LLM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18725-18735} }
Privacy-centric Deep Motion Retargeting for Anonymization of Skeleton-Based Motion Visualization: Thomas Carr,

Depeng Xu,

Shuhan Yuan,

Aidong Lu; [pdf] [supp]
[bibtex]
@InProceedings{Carr_2025_ICCV, author = {Carr, Thomas and Xu, Depeng and Yuan, Shuhan and Lu, Aidong}, title = {Privacy-centric Deep Motion Retargeting for Anonymization of Skeleton-Based Motion Visualization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13162-13170} }
MeshAnything V2: Artist-Created Mesh Generation with Adjacent Mesh Tokenization: Yiwen Chen,

Yikai Wang,

Yihao Luo,

Zhengyi Wang,

Zilong Chen,

Jun Zhu,

Chi Zhang,

Guosheng Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yiwen and Wang, Yikai and Luo, Yihao and Wang, Zhengyi and Chen, Zilong and Zhu, Jun and Zhang, Chi and Lin, Guosheng}, title = {MeshAnything V2: Artist-Created Mesh Generation with Adjacent Mesh Tokenization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13922-13931} }
DynFaceRestore: Balancing Fidelity and Quality in Diffusion-Guided Blind Face Restoration with Dynamic Blur-Level Mapping and Guidance: Huu-Phu Do,

Yu-Wei Chen,

Yi-Cheng Liao,

Chi-Wei Hsiao,

Han-Yang Wang,

Wei-Chen Chiu,

Ching-Chun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Do_2025_ICCV, author = {Do, Huu-Phu and Chen, Yu-Wei and Liao, Yi-Cheng and Hsiao, Chi-Wei and Wang, Han-Yang and Chiu, Wei-Chen and Huang, Ching-Chun}, title = {DynFaceRestore: Balancing Fidelity and Quality in Diffusion-Guided Blind Face Restoration with Dynamic Blur-Level Mapping and Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10432-10441} }
LoRAverse: A Submodular Framework to Retrieve Diverse Adapters for Diffusion Models: Mert Sonmezer,

Matthew Zheng,

Pinar Yanardag; [pdf] [supp]
[bibtex]
@InProceedings{Sonmezer_2025_ICCV, author = {Sonmezer, Mert and Zheng, Matthew and Yanardag, Pinar}, title = {LoRAverse: A Submodular Framework to Retrieve Diverse Adapters for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17879-17888} }
CombatVLA: An Efficient Vision-Language-Action Model for Combat Tasks in 3D Action Role-Playing Games: Peng Chen,

Pi Bu,

Yingyao Wang,

Xinyi Wang,

Ziming Wang,

Jie Guo,

Yingxiu Zhao,

Qi Zhu,

Jun Song,

Siran Yang,

Jiamang Wang,

Bo Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Peng and Bu, Pi and Wang, Yingyao and Wang, Xinyi and Wang, Ziming and Guo, Jie and Zhao, Yingxiu and Zhu, Qi and Song, Jun and Yang, Siran and Wang, Jiamang and Zheng, Bo}, title = {CombatVLA: An Efficient Vision-Language-Action Model for Combat Tasks in 3D Action Role-Playing Games}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10919-10928} }
Benchmarking and Learning Multi-Dimensional Quality Evaluator for Text-to-3D Generation: Yujie Zhang,

Bingyang Cui,

Qi Yang,

Zhu Li,

Yiling Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yujie and Cui, Bingyang and Yang, Qi and Li, Zhu and Xu, Yiling}, title = {Benchmarking and Learning Multi-Dimensional Quality Evaluator for Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18563-18574} }
DreamDance: Animating Human Images by Enriching 3D Geometry Cues from 2D Poses: Yatian Pang,

Bin Zhu,

Bin Lin,

Mingzhe Zheng,

Francis E. H. Tay,

Ser-Nam Lim,

Harry Yang,

Li Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2025_ICCV, author = {Pang, Yatian and Zhu, Bin and Lin, Bin and Zheng, Mingzhe and Tay, Francis E. H. and Lim, Ser-Nam and Yang, Harry and Yuan, Li}, title = {DreamDance: Animating Human Images by Enriching 3D Geometry Cues from 2D Poses}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14039-14050} }
MBTI: Masked Blending Transformers with Implicit Positional Encoding for Frame-rate Agnostic Motion Estimation: Jungwoo Huh,

Yeseung Park,

Seongjean Kim,

Jungsu Kim,

Sanghoon Lee; [pdf] [supp]
[bibtex]
@InProceedings{Huh_2025_ICCV, author = {Huh, Jungwoo and Park, Yeseung and Kim, Seongjean and Kim, Jungsu and Lee, Sanghoon}, title = {MBTI: Masked Blending Transformers with Implicit Positional Encoding for Frame-rate Agnostic Motion Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11568-11578} }
p-AVAS: Can Physics-Integrated Audio-Visual Modeling Boost Neural Acoustic Synthesis?: Susan Liang,

Chao Huang,

Yunlong Tang,

Zeliang Zhang,

Chenliang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Susan and Huang, Chao and Tang, Yunlong and Zhang, Zeliang and Xu, Chenliang}, title = {p-AVAS: Can Physics-Integrated Audio-Visual Modeling Boost Neural Acoustic Synthesis?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13942-13951} }
Learning Precise Affordances from Egocentric Videos for Robotic Manipulation: Gen Li,

Nikolaos Tsagkas,

Jifei Song,

Ruaridh Mon-Williams,

Sethu Vijayakumar,

Kun Shao,

Laura Sevilla-Lara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Gen and Tsagkas, Nikolaos and Song, Jifei and Mon-Williams, Ruaridh and Vijayakumar, Sethu and Shao, Kun and Sevilla-Lara, Laura}, title = {Learning Precise Affordances from Egocentric Videos for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10581-10591} }
VAFlow: Video-to-Audio Generation with Cross-Modality Flow Matching: Xihua Wang,

Xin Cheng,

Yuyue Wang,

Ruihua Song,

Yunfeng Wang; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xihua and Cheng, Xin and Wang, Yuyue and Song, Ruihua and Wang, Yunfeng}, title = {VAFlow: Video-to-Audio Generation with Cross-Modality Flow Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11777-11786} }
HDR Image Generation via Gain Map Decomposed Diffusion: Yuanshen Guan,

Ruikang Xu,

Yinuo Liao,

Mingde Yao,

Lizhi Wang,

Zhiwei Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Guan_2025_ICCV, author = {Guan, Yuanshen and Xu, Ruikang and Liao, Yinuo and Yao, Mingde and Wang, Lizhi and Xiong, Zhiwei}, title = {HDR Image Generation via Gain Map Decomposed Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17536-17545} }
MoGA: 3D Generative Avatar Prior for Monocular Gaussian Avatar Reconstruction: Zijian Dong,

Longteng Duan,

Jie Song,

Michael J. Black,

Andreas Geiger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Zijian and Duan, Longteng and Song, Jie and Black, Michael J. and Geiger, Andreas}, title = {MoGA: 3D Generative Avatar Prior for Monocular Gaussian Avatar Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13304-13314} }
Dual Recursive Feedback on Generation and Appearance Latents for Pose-Robust Text-to-Image Diffusion: Jiwon Kim,

Pureum Kim,

SeonHwa Kim,

Soobin Park,

Eunju Cha,

Kyong Hwan Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jiwon and Kim, Pureum and Kim, SeonHwa and Park, Soobin and Cha, Eunju and Jin, Kyong Hwan}, title = {Dual Recursive Feedback on Generation and Appearance Latents for Pose-Robust Text-to-Image Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15491-15500} }
EgoM2P: Egocentric Multimodal Multitask Pretraining: Gen Li,

Yutong Chen,

Yiqian Wu,

Kaifeng Zhao,

Marc Pollefeys,

Siyu Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Gen and Chen, Yutong and Wu, Yiqian and Zhao, Kaifeng and Pollefeys, Marc and Tang, Siyu}, title = {EgoM2P: Egocentric Multimodal Multitask Pretraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10830-10843} }
On Large Multimodal Models as Open-World Image Classifiers: Alessandro Conti,

Massimiliano Mancini,

Enrico Fini,

Yiming Wang,

Paolo Rota,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Conti_2025_ICCV, author = {Conti, Alessandro and Mancini, Massimiliano and Fini, Enrico and Wang, Yiming and Rota, Paolo and Ricci, Elisa}, title = {On Large Multimodal Models as Open-World Image Classifiers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16388-16398} }
Adaptive Hyper-Graph Convolution Network for Skeleton-based Human Action Recognition with Virtual Connections: Youwei Zhou,

Tianyang Xu,

Cong Wu,

Xiaojun Wu,

Josef Kittler; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Youwei and Xu, Tianyang and Wu, Cong and Wu, Xiaojun and Kittler, Josef}, title = {Adaptive Hyper-Graph Convolution Network for Skeleton-based Human Action Recognition with Virtual Connections}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12648-12658} }
General Compression Framework for Efficient Transformer Object Tracking: Lingyi Hong,

Jinglun Li,

Xinyu Zhou,

Shilin Yan,

Pinxue Guo,

Kaixun Jiang,

Zhaoyu Chen,

Shuyong Gao,

Runze Li,

Xingdong Sheng,

Wei Zhang,

Hong Lu,

Wenqiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_ICCV, author = {Hong, Lingyi and Li, Jinglun and Zhou, Xinyu and Yan, Shilin and Guo, Pinxue and Jiang, Kaixun and Chen, Zhaoyu and Gao, Shuyong and Li, Runze and Sheng, Xingdong and Zhang, Wei and Lu, Hong and Zhang, Wenqiang}, title = {General Compression Framework for Efficient Transformer Object Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13427-13437} }
A Plug-and-Play Physical Motion Restoration Approach for In-the-Wild High-Difficulty Motions: Youliang Zhang,

Ronghui Li,

Yachao Zhang,

Liang Pan,

Jingbo Wang,

Yebin Liu,

Xiu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Youliang and Li, Ronghui and Zhang, Yachao and Pan, Liang and Wang, Jingbo and Liu, Yebin and Li, Xiu}, title = {A Plug-and-Play Physical Motion Restoration Approach for In-the-Wild High-Difficulty Motions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13281-13292} }
Monocular Facial Appearance Capture in the Wild: Yingyan Xu,

Kate Gadola,

Prashanth Chandran,

Sebastian Weiss,

Markus Gross,

Gaspard Zoss,

Derek Bradley; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Yingyan and Gadola, Kate and Chandran, Prashanth and Weiss, Sebastian and Gross, Markus and Zoss, Gaspard and Bradley, Derek}, title = {Monocular Facial Appearance Capture in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12078-12088} }
Gaussian Variation Field Diffusion for High-fidelity Video-to-4D Synthesis: Bowen Zhang,

Sicheng Xu,

Chuxin Wang,

Jiaolong Yang,

Feng Zhao,

Dong Chen,

Baining Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Bowen and Xu, Sicheng and Wang, Chuxin and Yang, Jiaolong and Zhao, Feng and Chen, Dong and Guo, Baining}, title = {Gaussian Variation Field Diffusion for High-fidelity Video-to-4D Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12502-12513} }
EmotiCrafter: Text-to-Emotional-Image Generation based on Valence-Arousal Model: Shengqi Dang,

Yi He,

Long Ling,

Ziqing Qian,

Nanxuan Zhao,

Nan Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dang_2025_ICCV, author = {Dang, Shengqi and He, Yi and Ling, Long and Qian, Ziqing and Zhao, Nanxuan and Cao, Nan}, title = {EmotiCrafter: Text-to-Emotional-Image Generation based on Valence-Arousal Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15218-15228} }
EAMamba: Efficient All-Around Vision State Space Model for Image Restoration: Yu-Cheng Lin,

Yu-Syuan Xu,

Hao-Wei Chen,

Hsien-Kai Kuo,

Chun-Yi Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Yu-Cheng and Xu, Yu-Syuan and Chen, Hao-Wei and Kuo, Hsien-Kai and Lee, Chun-Yi}, title = {EAMamba: Efficient All-Around Vision State Space Model for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11708-11719} }
Learning Hierarchical Line Buffer for Image Processing: Jiacheng Li,

Feiran Li,

Daisuke Iso; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jiacheng and Li, Feiran and Iso, Daisuke}, title = {Learning Hierarchical Line Buffer for Image Processing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11132-11141} }
Rethinking Key-frame-based Micro-expression Recognition: A Robust and Accurate Framework Against Key-frame Errors: Zheyuan Zhang,

Weihao Tang,

Hong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zheyuan and Tang, Weihao and Chen, Hong}, title = {Rethinking Key-frame-based Micro-expression Recognition: A Robust and Accurate Framework Against Key-frame Errors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12274-12283} }
WIR3D: Visually-Informed and Geometry-Aware 3D Shape Abstraction: Richard Liu,

Daniel Fu,

Noah Tan,

Itai Lang,

Rana Hanocka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Richard and Fu, Daniel and Tan, Noah and Lang, Itai and Hanocka, Rana}, title = {WIR3D: Visually-Informed and Geometry-Aware 3D Shape Abstraction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14810-14821} }
FaceCraft4D: Animated 3D Facial Avatar Generation from a Single Image: Fei Yin,

Mallikarjun B R,

Chun-Han Yao,

Rafal K. Mantiuk,

Varun Jampani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_ICCV, author = {Yin, Fei and R, Mallikarjun B and Yao, Chun-Han and Mantiuk, Rafal K. and Jampani, Varun}, title = {FaceCraft4D: Animated 3D Facial Avatar Generation from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11612-11621} }
Attention to Neural Plagiarism: Diffusion Models Can Plagiarize Your Copyrighted Images!: Zihang Zou,

Boqing Gong,

Liqiang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2025_ICCV, author = {Zou, Zihang and Gong, Boqing and Wang, Liqiang}, title = {Attention to Neural Plagiarism: Diffusion Models Can Plagiarize Your Copyrighted Images!}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19546-19556} }
TRCE: Towards Reliable Malicious Concept Erasure in Text-to-Image Diffusion Models: Ruidong Chen,

Honglin Guo,

Lanjun Wang,

Chenyu Zhang,

Weizhi Nie,

An-An Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Ruidong and Guo, Honglin and Wang, Lanjun and Zhang, Chenyu and Nie, Weizhi and Liu, An-An}, title = {TRCE: Towards Reliable Malicious Concept Erasure in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18927-18936} }
MV-Adapter: Multi-View Consistent Image Generation Made Easy: Zehuan Huang,

Yuan-Chen Guo,

Haoran Wang,

Ran Yi,

Lizhuang Ma,

Yan-Pei Cao,

Lu Sheng; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Zehuan and Guo, Yuan-Chen and Wang, Haoran and Yi, Ran and Ma, Lizhuang and Cao, Yan-Pei and Sheng, Lu}, title = {MV-Adapter: Multi-View Consistent Image Generation Made Easy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16377-16387} }
Enhancing Reward Models for High-quality Image Generation: Beyond Text-Image Alignment: Ying Ba,

Tianyu Zhang,

Yalong Bai,

Wenyi Mo,

Tao Liang,

Bing Su,

Ji-Rong Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ba_2025_ICCV, author = {Ba, Ying and Zhang, Tianyu and Bai, Yalong and Mo, Wenyi and Liang, Tao and Su, Bing and Wen, Ji-Rong}, title = {Enhancing Reward Models for High-quality Image Generation: Beyond Text-Image Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19022-19031} }
LD-RPS: Zero-Shot Unified Image Restoration via Latent Diffusion Recurrent Posterior Sampling: Huaqiu Li,

Yong Wang,

Tongwen Huang,

Hailang Huang,

Haoqian Wang,

Xiangxiang Chu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Huaqiu and Wang, Yong and Huang, Tongwen and Huang, Hailang and Wang, Haoqian and Chu, Xiangxiang}, title = {LD-RPS: Zero-Shot Unified Image Restoration via Latent Diffusion Recurrent Posterior Sampling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13684-13694} }
Outlier-Aware Post-Training Quantization for Image Super-Resolution: Hailing Wang,

Jianglin Lu,

Yitian Zhang,

Yun Fu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Hailing and Lu, Jianglin and Zhang, Yitian and Fu, Yun}, title = {Outlier-Aware Post-Training Quantization for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16175-16184} }
The Curse of Conditions: Analyzing and Improving Optimal Transport for Conditional Flow-Based Generation: Ho Kei Cheng,

Alexander Schwing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Ho Kei and Schwing, Alexander}, title = {The Curse of Conditions: Analyzing and Improving Optimal Transport for Conditional Flow-Based Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15875-15884} }
Less is More: Improving Motion Diffusion Models with Sparse Keyframes: Jinseok Bae,

Inwoo Hwang,

Young-Yoon Lee,

Ziyu Guo,

Joseph Liu,

Yizhak Ben-Shabat,

Young Min Kim,

Mubbasir Kapadia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bae_2025_ICCV, author = {Bae, Jinseok and Hwang, Inwoo and Lee, Young-Yoon and Guo, Ziyu and Liu, Joseph and Ben-Shabat, Yizhak and Kim, Young Min and Kapadia, Mubbasir}, title = {Less is More: Improving Motion Diffusion Models with Sparse Keyframes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11069-11078} }
LeanVAE: An Ultra-Efficient Reconstruction VAE for Video Diffusion Models: Yu Cheng,

Fajie Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Yu and Yuan, Fajie}, title = {LeanVAE: An Ultra-Efficient Reconstruction VAE for Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15692-15702} }
Self-Calibrated Variance-Stabilizing Transformations for Real-World Image Denoising: Sébastien Herbreteau,

Michael Unser; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Herbreteau_2025_ICCV, author = {Herbreteau, S\'ebastien and Unser, Michael}, title = {Self-Calibrated Variance-Stabilizing Transformations for Real-World Image Denoising}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10496-10506} }
CompleteMe: Reference-based Human Image Completion: Yu-Ju Tsai,

Brian Price,

Qing Liu,

Luis Figueroa,

Daniil Pakhomov,

Zhihong Ding,

Scott Cohen,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tsai_2025_ICCV, author = {Tsai, Yu-Ju and Price, Brian and Liu, Qing and Figueroa, Luis and Pakhomov, Daniil and Ding, Zhihong and Cohen, Scott and Yang, Ming-Hsuan}, title = {CompleteMe: Reference-based Human Image Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18252-18261} }
2HandedAfforder: Learning Precise Actionable Bimanual Affordances from Human Videos: Marvin Heidinger,

Snehal Jauhri,

Vignesh Prasad,

Georgia Chalvatzaki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Heidinger_2025_ICCV, author = {Heidinger, Marvin and Jauhri, Snehal and Prasad, Vignesh and Chalvatzaki, Georgia}, title = {2HandedAfforder: Learning Precise Actionable Bimanual Affordances from Human Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14743-14753} }
GenDoP: Auto-regressive Camera Trajectory Generation as a Director of Photography: Mengchen Zhang,

Tong Wu,

Jing Tan,

Ziwei Liu,

Gordon Wetzstein,

Dahua Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Mengchen and Wu, Tong and Tan, Jing and Liu, Ziwei and Wetzstein, Gordon and Lin, Dahua}, title = {GenDoP: Auto-regressive Camera Trajectory Generation as a Director of Photography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18229-18239} }
Latent Swap Joint Diffusion for 2D Long-Form Latent Generation: Yusheng Dai,

Chenxi Wang,

Chang Li,

Chen Wang,

Kewei Li,

Jun Du,

Lei Sun,

Jianqing Gao,

Ruoyu Wang,

Jiefeng Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2025_ICCV, author = {Dai, Yusheng and Wang, Chenxi and Li, Chang and Wang, Chen and Li, Kewei and Du, Jun and Sun, Lei and Gao, Jianqing and Wang, Ruoyu and Ma, Jiefeng}, title = {Latent Swap Joint Diffusion for 2D Long-Form Latent Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11006-11015} }
From Image to Video: An Empirical Study of Diffusion Representations: Pedro Vélez,

Luisa F. Polanía,

Yi Yang,

Chuhan Zhang,

Rishabh Kabra,

Anurag Arnab,

Mehdi S. M. Sajjadi; [pdf] [supp]
[bibtex]
@InProceedings{Velez_2025_ICCV, author = {V\'elez, Pedro and Polan{\'\i}a, Luisa F. and Yang, Yi and Zhang, Chuhan and Kabra, Rishabh and Arnab, Anurag and Sajjadi, Mehdi S. M.}, title = {From Image to Video: An Empirical Study of Diffusion Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16948-16958} }
FreeCus: Free Lunch Subject-driven Customization in Diffusion Transformers: Yanbing Zhang,

Zhe Wang,

Qin Zhou,

Mengping Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yanbing and Wang, Zhe and Zhou, Qin and Yang, Mengping}, title = {FreeCus: Free Lunch Subject-driven Customization in Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15521-15531} }
Trokens: Semantic-Aware Relational Trajectory Tokens for Few-Shot Action Recognition: Pulkit Kumar,

Shuaiyi Huang,

Matthew Walmer,

Sai Saketh Rambhatla,

Abhinav Shrivastava; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumar_2025_ICCV, author = {Kumar, Pulkit and Huang, Shuaiyi and Walmer, Matthew and Rambhatla, Sai Saketh and Shrivastava, Abhinav}, title = {Trokens: Semantic-Aware Relational Trajectory Tokens for Few-Shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13544-13556} }
MotionStreamer: Streaming Motion Generation via Diffusion-based Autoregressive Model in Causal Latent Space: Lixing Xiao,

Shunlin Lu,

Huaijin Pi,

Ke Fan,

Liang Pan,

Yueer Zhou,

Ziyong Feng,

Xiaowei Zhou,

Sida Peng,

Jingbo Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_ICCV, author = {Xiao, Lixing and Lu, Shunlin and Pi, Huaijin and Fan, Ke and Pan, Liang and Zhou, Yueer and Feng, Ziyong and Zhou, Xiaowei and Peng, Sida and Wang, Jingbo}, title = {MotionStreamer: Streaming Motion Generation via Diffusion-based Autoregressive Model in Causal Latent Space}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10086-10096} }
RS-vHeat: Heat Conduction Guided Efficient Remote Sensing Foundation Model: Huiyang Hu,

Peijin Wang,

Hanbo Bi,

Boyuan Tong,

Zhaozhi Wang,

Wenhui Diao,

Hao Chang,

Yingchao Feng,

Ziqi Zhang,

Yaowei Wang,

Qixiang Ye,

Kun Fu,

Xian Sun; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Huiyang and Wang, Peijin and Bi, Hanbo and Tong, Boyuan and Wang, Zhaozhi and Diao, Wenhui and Chang, Hao and Feng, Yingchao and Zhang, Ziqi and Wang, Yaowei and Ye, Qixiang and Fu, Kun and Sun, Xian}, title = {RS-vHeat: Heat Conduction Guided Efficient Remote Sensing Foundation Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9876-9887} }
ArtEditor: Learning Customized Instructional Image Editor from Few-Shot Examples: Shijie Huang,

Yiren Song,

Yuxuan Zhang,

Hailong Guo,

Xueyin Wang,

Jiaming Liu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Shijie and Song, Yiren and Zhang, Yuxuan and Guo, Hailong and Wang, Xueyin and Liu, Jiaming}, title = {ArtEditor: Learning Customized Instructional Image Editor from Few-Shot Examples}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17651-17662} }
LiON-LoRA: Rethinking LoRA Fusion to Unify Controllable Spatial and Temporal Generation for Video Diffusion: Yisu Zhang,

Chenjie Cao,

Chaohui Yu,

Jianke Zhu; [pdf]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yisu and Cao, Chenjie and Yu, Chaohui and Zhu, Jianke}, title = {LiON-LoRA: Rethinking LoRA Fusion to Unify Controllable Spatial and Temporal Generation for Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14569-14579} }
VLIPP: Towards Physically Plausible Video Generation with Vision and Language Informed Physical Prior: Xindi Yang,

Baolu Li,

Yiming Zhang,

Zhenfei Yin,

Lei Bai,

Liqian Ma,

Zhiyong Wang,

Jianfei Cai,

Tien-Tsin Wong,

Huchuan Lu,

Xu Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Xindi and Li, Baolu and Zhang, Yiming and Yin, Zhenfei and Bai, Lei and Ma, Liqian and Wang, Zhiyong and Cai, Jianfei and Wong, Tien-Tsin and Lu, Huchuan and Jia, Xu}, title = {VLIPP: Towards Physically Plausible Video Generation with Vision and Language Informed Physical Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12360-12370} }
DreamRelation: Relation-Centric Video Customization: Yujie Wei,

Shiwei Zhang,

Hangjie Yuan,

Biao Gong,

Longxiang Tang,

Xiang Wang,

Haonan Qiu,

Hengjia Li,

Shuai Tan,

Yingya Zhang,

Hongming Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Yujie and Zhang, Shiwei and Yuan, Hangjie and Gong, Biao and Tang, Longxiang and Wang, Xiang and Qiu, Haonan and Li, Hengjia and Tan, Shuai and Zhang, Yingya and Shan, Hongming}, title = {DreamRelation: Relation-Centric Video Customization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12381-12393} }
DropletVideo: A Dataset and Approach to Explore Integral Spatio-Temporal Consistent Video Generation: Runze Zhang,

Guoguang Du,

Xiaochuan Li,

Qi Jia,

Liang Jin,

Lu Liu,

Jingjing Wang,

Cong Xu,

Zhenhua Guo,

Yaqian Zhao,

Xiaoli Gong,

Rengang Li,

Baoyu Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Runze and Du, Guoguang and Li, Xiaochuan and Jia, Qi and Jin, Liang and Liu, Lu and Wang, Jingjing and Xu, Cong and Guo, Zhenhua and Zhao, Yaqian and Gong, Xiaoli and Li, Rengang and Fan, Baoyu}, title = {DropletVideo: A Dataset and Approach to Explore Integral Spatio-Temporal Consistent Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15583-15593} }
Towards Stabilized and Efficient Diffusion Transformers through Long-Skip-Connections with Spectral Constraints: Guanjie Chen,

Xinyu Zhao,

Yucheng Zhou,

Xiaoye Qu,

Tianlong Chen,

Yu Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Guanjie and Zhao, Xinyu and Zhou, Yucheng and Qu, Xiaoye and Chen, Tianlong and Cheng, Yu}, title = {Towards Stabilized and Efficient Diffusion Transformers through Long-Skip-Connections with Spectral Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17708-17718} }
FLOAT: Generative Motion Latent Flow Matching for Audio-driven Talking Portrait: Taekyung Ki,

Dongchan Min,

Gyeongsu Chae; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ki_2025_ICCV, author = {Ki, Taekyung and Min, Dongchan and Chae, Gyeongsu}, title = {FLOAT: Generative Motion Latent Flow Matching for Audio-driven Talking Portrait}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14699-14710} }
Sequential keypoint density estimator: an overlooked baseline of skeleton-based video anomaly detection: Anja Delić,

Matej Grcic,

Siniša Šegvić; [pdf] [supp]
[bibtex]
@InProceedings{Delic_2025_ICCV, author = {Deli\'c, Anja and Grcic, Matej and \v{S}egvi\'c, Sini\v{s}a}, title = {Sequential keypoint density estimator: an overlooked baseline of skeleton-based video anomaly detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11579-11589} }
DLF: Extreme Image Compression with Dual-generative Latent Fusion: Naifu Xue,

Zhaoyang Jia,

Jiahao Li,

Bin Li,

Yuan Zhang,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2025_ICCV, author = {Xue, Naifu and Jia, Zhaoyang and Li, Jiahao and Li, Bin and Zhang, Yuan and Lu, Yan}, title = {DLF: Extreme Image Compression with Dual-generative Latent Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19227-19236} }
PanoLlama: Generating Endless and Coherent Panoramas with Next-Token-Prediction LLMs: Teng Zhou,

Xiaoyu Zhang,

Yongchuan Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Teng and Zhang, Xiaoyu and Tang, Yongchuan}, title = {PanoLlama: Generating Endless and Coherent Panoramas with Next-Token-Prediction LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15340-15349} }
VQ-SGen: A Vector Quantized Stroke Representation for Creative Sketch Generation: Jiawei Wang,

Zhiming Cui,

Changjian Li; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jiawei and Cui, Zhiming and Li, Changjian}, title = {VQ-SGen: A Vector Quantized Stroke Representation for Creative Sketch Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19311-19320} }
Spatial-Temporal Forgery Trace based Forgery Image Identification: Yilin Wang,

Zunlei Feng,

Jiachi Wang,

Hengrui Lou,

Binjia Zhou,

Jie Lei,

Mingli Song,

Yijun Bei; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yilin and Feng, Zunlei and Wang, Jiachi and Lou, Hengrui and Zhou, Binjia and Lei, Jie and Song, Mingli and Bei, Yijun}, title = {Spatial-Temporal Forgery Trace based Forgery Image Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17067-17076} }
OneGT: One-Shot Geometry-Texture Neural Rendering for Head Avatars: Jinshu Chen,

Bingchuan Li,

Fan Zhang,

Songtao Zhao,

Qian He; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jinshu and Li, Bingchuan and Zhang, Fan and Zhao, Songtao and He, Qian}, title = {OneGT: One-Shot Geometry-Texture Neural Rendering for Head Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11294-11304} }
A Unified Framework for Industrial Cel-Animation Colorization with Temporal-Structural Awareness: Xiaoyi Feng,

Tao Huang,

Peng Wang,

Zizhou Huang,

Zhang Haihang,

Yuntao Zou,

Dagang Li,

Kaifeng Zou; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Xiaoyi and Huang, Tao and Wang, Peng and Huang, Zizhou and Haihang, Zhang and Zou, Yuntao and Li, Dagang and Zou, Kaifeng}, title = {A Unified Framework for Industrial Cel-Animation Colorization with Temporal-Structural Awareness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19301-19310} }
MCID: Multi-aspect Copyright Infringement Detection for Generated Images: Chuanwei Huang,

Zexi Jia,

Hongyan Fei,

Yeshuang Zhu,

Zhiqiang Yuan,

Ying Deng,

Jiapei Zhang,

Xiaoyue Duan,

Jinchao Zhang,

Jie Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Chuanwei and Jia, Zexi and Fei, Hongyan and Zhu, Yeshuang and Yuan, Zhiqiang and Deng, Ying and Zhang, Jiapei and Duan, Xiaoyue and Zhang, Jinchao and Zhou, Jie}, title = {MCID: Multi-aspect Copyright Infringement Detection for Generated Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16154-16164} }
Any2AnyTryon: Leveraging Adaptive Position Embeddings for Versatile Virtual Clothing Tasks: Hailong Guo,

Bohan Zeng,

Yiren Song,

Wentao Zhang,

Jiaming Liu,

Chuang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Hailong and Zeng, Bohan and Song, Yiren and Zhang, Wentao and Liu, Jiaming and Zhang, Chuang}, title = {Any2AnyTryon: Leveraging Adaptive Position Embeddings for Versatile Virtual Clothing Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19085-19096} }
DISTA-Net: Dynamic Closely-Spaced Infrared Small Target Unmixing: Shengdong Han,

Shangdong Yang,

Yuxuan Li,

Xin Zhang,

Xiang Li,

Jian Yang,

Ming-Ming Cheng,

Yimian Dai; [pdf] [supp]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Shengdong and Yang, Shangdong and Li, Yuxuan and Zhang, Xin and Li, Xiang and Yang, Jian and Cheng, Ming-Ming and Dai, Yimian}, title = {DISTA-Net: Dynamic Closely-Spaced Infrared Small Target Unmixing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14655-14664} }
MDD: A Dataset for Text-and-Music Conditioned Duet Dance Generation: Prerit Gupta,

Jason Alexander Fotso-Puepi,

Zhengyuan Li,

Jay Mehta,

Aniket Bera; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gupta_2025_ICCV, author = {Gupta, Prerit and Fotso-Puepi, Jason Alexander and Li, Zhengyuan and Mehta, Jay and Bera, Aniket}, title = {MDD: A Dataset for Text-and-Music Conditioned Duet Dance Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13932-13941} }
SVG-Head: Hybrid Surface-Volumetric Gaussians for High-Fidelity Head Reconstruction and Real-Time Editing: Heyi Sun,

Cong Wang,

Tian-Xing Xu,

Jingwei Huang,

Di Kang,

Chunchao Guo,

Song-Hai Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Heyi and Wang, Cong and Xu, Tian-Xing and Huang, Jingwei and Kang, Di and Guo, Chunchao and Zhang, Song-Hai}, title = {SVG-Head: Hybrid Surface-Volumetric Gaussians for High-Fidelity Head Reconstruction and Real-Time Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13326-13335} }
Video Motion Graphs: Haiyang Liu,

Zhan Xu,

Fa-Ting Hong,

Hsin-Ping Huang,

Yi Zhou,

Yang Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Haiyang and Xu, Zhan and Hong, Fa-Ting and Huang, Hsin-Ping and Zhou, Yi and Zhou, Yang}, title = {Video Motion Graphs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13730-13740} }
Ask and Remember: A Questions-Only Replay Strategy for Continual Visual Question Answering: Imad Eddine Marouf,

Enzo Tartaglione,

Stéphane Lathuilière,

Joost Van De Weijer; [pdf] [supp]
[bibtex]
@InProceedings{Marouf_2025_ICCV, author = {Marouf, Imad Eddine and Tartaglione, Enzo and Lathuili\`ere, St\'ephane and Van De Weijer, Joost}, title = {Ask and Remember: A Questions-Only Replay Strategy for Continual Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18078-18089} }
Dense Policy: Bidirectional Autoregressive Learning of Actions: Yue Su,

Xinyu Zhan,

Hongjie Fang,

Han Xue,

Hao-Shu Fang,

Yong-Lu Li,

Cewu Lu,

Lixin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_ICCV, author = {Su, Yue and Zhan, Xinyu and Fang, Hongjie and Xue, Han and Fang, Hao-Shu and Li, Yong-Lu and Lu, Cewu and Yang, Lixin}, title = {Dense Policy: Bidirectional Autoregressive Learning of Actions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14486-14495} }
Conditional Visual Autoregressive Modeling for Pathological Image Restoration: Ziyi Liu,

Zhe Xu,

Jiabo Ma,

Wenqiang Li,

Ruixuan Wang,

Bo Du,

Hao Chen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Ziyi and Xu, Zhe and Ma, Jiabo and Li, Wenqiang and Wang, Ruixuan and Du, Bo and Chen, Hao}, title = {Conditional Visual Autoregressive Modeling for Pathological Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17828-17837} }
BlueNeg: A 35mm Negative Film Dataset for Restoring Channel-Heterogeneous Deterioration: Hanyuan Liu,

Chengze Li,

Minshan Xie,

Zhenni Wang,

Jiawen Liang,

Chi-Sing Leung,

Tien-Tsin Wong; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Hanyuan and Li, Chengze and Xie, Minshan and Wang, Zhenni and Liang, Jiawen and Leung, Chi-Sing and Wong, Tien-Tsin}, title = {BlueNeg: A 35mm Negative Film Dataset for Restoring Channel-Heterogeneous Deterioration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13119-13128} }
Gain-MLP: Improving HDR Gain Map Encoding via a Lightweight MLP: Trevor D. Canham,

SaiKiran Tedla,

Michael J. Murdoch,

Michael S. Brown; [pdf] [supp]
[bibtex]
@InProceedings{Canham_2025_ICCV, author = {Canham, Trevor D. and Tedla, SaiKiran and Murdoch, Michael J. and Brown, Michael S.}, title = {Gain-MLP: Improving HDR Gain Map Encoding via a Lightweight MLP}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18619-18628} }
StyleMotif: Multi-Modal Motion Stylization using Style-Content Cross Fusion: Ziyu Guo,

Young Yoon Lee,

Joseph Liu,

Yizhak Ben-Shabat,

Victor Zordan,

Mubbasir Kapadia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Ziyu and Lee, Young Yoon and Liu, Joseph and Ben-Shabat, Yizhak and Zordan, Victor and Kapadia, Mubbasir}, title = {StyleMotif: Multi-Modal Motion Stylization using Style-Content Cross Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13349-13359} }
SpecGuard: Spectral Projection-based Advanced Invisible Watermarking: Inzamamul Alam,

Md Tanvir Islam,

Simon S. Woo,

Khan Muhammad; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alam_2025_ICCV, author = {Alam, Inzamamul and Islam, Md Tanvir and Woo, Simon S. and Muhammad, Khan}, title = {SpecGuard: Spectral Projection-based Advanced Invisible Watermarking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17984-17993} }
Bridging Diffusion Models and 3D Representations: A 3D Consistent Super-Resolution Framework: Yi-Ting Chen,

Ting-Hsuan Liao,

Pengsheng Guo,

Alexander Schwing,

Jia-Bin Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yi-Ting and Liao, Ting-Hsuan and Guo, Pengsheng and Schwing, Alexander and Huang, Jia-Bin}, title = {Bridging Diffusion Models and 3D Representations: A 3D Consistent Super-Resolution Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13481-13490} }
MagicID: Hybrid Preference Optimization for ID-Consistent and Dynamic-Preserved Video Customization: Hengjia Li,

Lifan Jiang,

Xi Xiao,

Tianyang Wang,

Hongwei Yi,

Boxi Wu,

Deng Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Hengjia and Jiang, Lifan and Xiao, Xi and Wang, Tianyang and Yi, Hongwei and Wu, Boxi and Cai, Deng}, title = {MagicID: Hybrid Preference Optimization for ID-Consistent and Dynamic-Preserved Video Customization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12737-12746} }
Motion-2-to-3: Leveraging 2D Motion Data for 3D Motion Generations: Ruoxi Guo,

Huaijin Pi,

Zehong Shen,

Qing Shuai,

Zechen Hu,

Zhumei Wang,

Yajiao Dong,

Ruizhen Hu,

Taku Komura,

Sida Peng,

Xiaowei Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Ruoxi and Pi, Huaijin and Shen, Zehong and Shuai, Qing and Hu, Zechen and Wang, Zhumei and Dong, Yajiao and Hu, Ruizhen and Komura, Taku and Peng, Sida and Zhou, Xiaowei}, title = {Motion-2-to-3: Leveraging 2D Motion Data for 3D Motion Generations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14305-14316} }
Advancing Text-to-3D Generation with Linearized Lookahead Variational Score Distillation: Yu Lei,

Bingde Liu,

Qingsong Xie,

Haonan Lu,

Zhijie Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2025_ICCV, author = {Lei, Yu and Liu, Bingde and Xie, Qingsong and Lu, Haonan and Deng, Zhijie}, title = {Advancing Text-to-3D Generation with Linearized Lookahead Variational Score Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19567-19576} }
Vivid4D: Improving 4D Reconstruction from Monocular Video by Video Inpainting: Jiaxin Huang,

Sheng Miao,

Bangbang Yang,

Yuewen Ma,

Yiyi Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Jiaxin and Miao, Sheng and Yang, Bangbang and Ma, Yuewen and Liao, Yiyi}, title = {Vivid4D: Improving 4D Reconstruction from Monocular Video by Video Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12592-12604} }
RePoseD: Efficient Relative Pose Estimation With Known Depth Information: Yaqing Ding,

Viktor Kocur,

Václav Vávra,

Zuzana Berger Haladová,

Jian Yang,

Torsten Sattler,

Zuzana Kukelova; [pdf] [supp]
[bibtex]
@InProceedings{Ding_2025_ICCV, author = {Ding, Yaqing and Kocur, Viktor and V\'avra, V\'aclav and Haladov\'a, Zuzana Berger and Yang, Jian and Sattler, Torsten and Kukelova, Zuzana}, title = {RePoseD: Efficient Relative Pose Estimation With Known Depth Information}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14876-14886} }
Beyond Label Semantics: Language-Guided Action Anatomy for Few-shot Action Recognition: Zefeng Qian,

Xincheng Yao,

Yifei Huang,

Chongyang Zhang,

Jiangyong Ying,

Hong Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qian_2025_ICCV, author = {Qian, Zefeng and Yao, Xincheng and Huang, Yifei and Zhang, Chongyang and Ying, Jiangyong and Sun, Hong}, title = {Beyond Label Semantics: Language-Guided Action Anatomy for Few-shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10421-10431} }
VoluMe - Authentic 3D Video Calls from Live Gaussian Splat Prediction: Martin de La Gorce,

Charlie Hewitt,

Tibor Takács,

Robert Gerdisch,

Zafiirah Hosenie,

Givi Meishvili,

Marek Kowalski,

Thomas J. Cashman,

Antonio Criminisi; [pdf] [supp]
[bibtex]
@InProceedings{de_La_Gorce_2025_ICCV, author = {de La Gorce, Martin and Hewitt, Charlie and Tak\'acs, Tibor and Gerdisch, Robert and Hosenie, Zafiirah and Meishvili, Givi and Kowalski, Marek and Cashman, Thomas J. and Criminisi, Antonio}, title = {VoluMe - Authentic 3D Video Calls from Live Gaussian Splat Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13783-13792} }
ADIEE: Automatic Dataset Creation and Scorer for Instruction-Guided Image Editing Evaluation: Sherry X. Chen,

Yi Wei,

Luowei Zhou,

Suren Kumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Sherry X. and Wei, Yi and Zhou, Luowei and Kumar, Suren}, title = {ADIEE: Automatic Dataset Creation and Scorer for Instruction-Guided Image Editing Evaluation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18345-18356} }
CSD-VAR: Content-Style Decomposition in Visual Autoregressive Models: Quang-Binh Nguyen,

Minh Luu,

Quang Nguyen,

Anh Tran,

Khoi Nguyen; [pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Quang-Binh and Luu, Minh and Nguyen, Quang and Tran, Anh and Nguyen, Khoi}, title = {CSD-VAR: Content-Style Decomposition in Visual Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17013-17023} }
Disrupting Model Merging: A Parameter-Level Defense Without Sacrificing Accuracy: Wei Junhao,

Yu Zhe,

Jun Sakuma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Junhao_2025_ICCV, author = {Junhao, Wei and Zhe, Yu and Sakuma, Jun}, title = {Disrupting Model Merging: A Parameter-Level Defense Without Sacrificing Accuracy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17698-17707} }
VSSD: Vision Mamba with Non-Causal State Space Duality: Yuheng Shi,

Mingjia Li,

Minjing Dong,

Chang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Yuheng and Li, Mingjia and Dong, Minjing and Xu, Chang}, title = {VSSD: Vision Mamba with Non-Causal State Space Duality}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10819-10829} }
Region-Level Data Attribution for Text-to-Image Generative Models: Trong Bang Nguyen,

Phi Le Nguyen,

Simon Lucey,

Minh Hoai; [pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Trong Bang and Le Nguyen, Phi and Lucey, Simon and Hoai, Minh}, title = {Region-Level Data Attribution for Text-to-Image Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18825-18833} }
Diffuman4D: 4D Consistent Human View Synthesis from Sparse-View Videos with Spatio-Temporal Diffusion Models: Yudong Jin,

Sida Peng,

Xuan Wang,

Tao Xie,

Zhen Xu,

Yifan Yang,

Yujun Shen,

Hujun Bao,

Xiaowei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Yudong and Peng, Sida and Wang, Xuan and Xie, Tao and Xu, Zhen and Yang, Yifan and Shen, Yujun and Bao, Hujun and Zhou, Xiaowei}, title = {Diffuman4D: 4D Consistent Human View Synthesis from Sparse-View Videos with Spatio-Temporal Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11047-11057} }
Generic Event Boundary Detection via Denoising Diffusion: Jaejun Hwang,

Dayoung Gong,

Manjin Kim,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hwang_2025_ICCV, author = {Hwang, Jaejun and Gong, Dayoung and Kim, Manjin and Cho, Minsu}, title = {Generic Event Boundary Detection via Denoising Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14084-14094} }
ImageGem: In-the-wild Generative Image Interaction Dataset for Generative Model Personalization: Yuanhe Guo,

Linxi Xie,

Zhuoran Chen,

Kangrui Yu,

Ryan Po,

Guandao Yang,

Gordon Wetzstein,

Hongyi Wen; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Yuanhe and Xie, Linxi and Chen, Zhuoran and Yu, Kangrui and Po, Ryan and Yang, Guandao and Wetzstein, Gordon and Wen, Hongyi}, title = {ImageGem: In-the-wild Generative Image Interaction Dataset for Generative Model Personalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19577-19586} }
Context Guided Transformer Entropy Modeling for Video Compression: Junlong Tong,

Wei Zhang,

Yaohui Jin,

Xiaoyu Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tong_2025_ICCV, author = {Tong, Junlong and Zhang, Wei and Jin, Yaohui and Shen, Xiaoyu}, title = {Context Guided Transformer Entropy Modeling for Video Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18885-18894} }
PrimHOI: Compositional Human-Object Interaction via Reusable Primitives: Kai Jia,

Tengyu Liu,

Mingtao Pei,

Yixin Zhu,

Siyuan Huang; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2025_ICCV, author = {Jia, Kai and Liu, Tengyu and Pei, Mingtao and Zhu, Yixin and Huang, Siyuan}, title = {PrimHOI: Compositional Human-Object Interaction via Reusable Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11491-11501} }
Nautilus: Locality-aware Autoencoder for Scalable Mesh Generation: Yuxuan Wang,

Xuanyu Yi,

Haohan Weng,

Qingshan Xu,

Xiaokang Wei,

Xianghui Yang,

Chunchao Guo,

Long Chen,

Hanwang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuxuan and Yi, Xuanyu and Weng, Haohan and Xu, Qingshan and Wei, Xiaokang and Yang, Xianghui and Guo, Chunchao and Chen, Long and Zhang, Hanwang}, title = {Nautilus: Locality-aware Autoencoder for Scalable Mesh Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10961-10970} }
What Changed and What Could Have Changed? State-Change Counterfactuals for Procedure-Aware Video Representation Learning: Chi-Hsi Kung,

Frangil Ramirez,

Juhyung Ha,

Yi-Ting Chen,

David Crandall,

Yi-Hsuan Tsai; [pdf] [supp]
[bibtex]
@InProceedings{Kung_2025_ICCV, author = {Kung, Chi-Hsi and Ramirez, Frangil and Ha, Juhyung and Chen, Yi-Ting and Crandall, David and Tsai, Yi-Hsuan}, title = {What Changed and What Could Have Changed? State-Change Counterfactuals for Procedure-Aware Video Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12294-12306} }
X-Dancer: Expressive Music to Human Dance Video Generation: Zeyuan Chen,

Hongyi Xu,

Guoxian Song,

You Xie,

Chenxu Zhang,

Xin Chen,

Chao Wang,

Di Chang,

Linjie Luo; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Zeyuan and Xu, Hongyi and Song, Guoxian and Xie, You and Zhang, Chenxu and Chen, Xin and Wang, Chao and Chang, Di and Luo, Linjie}, title = {X-Dancer: Expressive Music to Human Dance Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10602-10611} }
IDF: Iterative Dynamic Filtering Networks for Generalizable Image Denoising: Dongjin Kim,

Jaekyun Ko,

Muhammad Kashif Ali,

Tae Hyun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Dongjin and Ko, Jaekyun and Ali, Muhammad Kashif and Kim, Tae Hyun}, title = {IDF: Iterative Dynamic Filtering Networks for Generalizable Image Denoising}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12180-12190} }
LayerLock: Non-collapsing Representation Learning with Progressive Freezing: Goker Erdogan,

Nikhil Parthasarathy,

Catalin Ionescu,

Drew A. Hudson,

Alexander Lerchner,

Andrew Zisserman,

Mehdi S. M. Sajjadi,

Joao Carreira; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Erdogan_2025_ICCV, author = {Erdogan, Goker and Parthasarathy, Nikhil and Ionescu, Catalin and Hudson, Drew A. and Lerchner, Alexander and Zisserman, Andrew and Sajjadi, Mehdi S. M. and Carreira, Joao}, title = {LayerLock: Non-collapsing Representation Learning with Progressive Freezing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19461-19470} }
Derm1M: A Million-scale Vision-Language Dataset Aligned with Clinical Ontology Knowledge for Dermatology: Siyuan Yan,

Ming Hu,

Yiwen Jiang,

Xieji Li,

Hao Fei,

Philipp Tschandl,

Harald Kittler,

Zongyuan Ge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Siyuan and Hu, Ming and Jiang, Yiwen and Li, Xieji and Fei, Hao and Tschandl, Philipp and Kittler, Harald and Ge, Zongyuan}, title = {Derm1M: A Million-scale Vision-Language Dataset Aligned with Clinical Ontology Knowledge for Dermatology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12681-12690} }
Expressive Talking Human from Single-Image with Imperfect Priors: Jun Xiang,

Yudong Guo,

Leipeng Hu,

Boyang Guo,

Yancheng Yuan,

Juyong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xiang_2025_ICCV, author = {Xiang, Jun and Guo, Yudong and Hu, Leipeng and Guo, Boyang and Yuan, Yancheng and Zhang, Juyong}, title = {Expressive Talking Human from Single-Image with Imperfect Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10398-10409} }
DADM: Dual Alignment of Domain and Modality for Face Anti-spoofing: Jingyi Yang,

Xun Lin,

Zitong Yu,

Liepiao Zhang,

Xin Liu,

Hui Li,

Xiaochen Yuan,

Xiaochun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Jingyi and Lin, Xun and Yu, Zitong and Zhang, Liepiao and Liu, Xin and Li, Hui and Yuan, Xiaochen and Cao, Xiaochun}, title = {DADM: Dual Alignment of Domain and Modality for Face Anti-spoofing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12045-12056} }
IntroStyle: Training-Free Introspective Style Attribution using Diffusion Features: Anand Kumar,

Jiteng Mu,

Nuno Vasconcelos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumar_2025_ICCV, author = {Kumar, Anand and Mu, Jiteng and Vasconcelos, Nuno}, title = {IntroStyle: Training-Free Introspective Style Attribution using Diffusion Features}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14909-14918} }
Learning Deblurring Texture Prior from Unpaired Data with Diffusion Model: Chengxu Liu,

Lu Qi,

Jinshan Pan,

Xueming Qian,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Chengxu and Qi, Lu and Pan, Jinshan and Qian, Xueming and Yang, Ming-Hsuan}, title = {Learning Deblurring Texture Prior from Unpaired Data with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14195-14204} }
Multi-modal Multi-platform Person Re-Identification: Benchmark and Method: Ruiyang Ha,

Songyi Jiang,

Bin Li,

Bikang Pan,

Yihang Zhu,

Junjie Zhang,

Xiatian Zhu,

Shaogang Gong,

Jingya Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ha_2025_ICCV, author = {Ha, Ruiyang and Jiang, Songyi and Li, Bin and Pan, Bikang and Zhu, Yihang and Zhang, Junjie and Zhu, Xiatian and Gong, Shaogang and Wang, Jingya}, title = {Multi-modal Multi-platform Person Re-Identification: Benchmark and Method}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10251-10261} }
CAP: Evaluation of Persuasive and Creative Image Generation: Aysan Aghazadeh,

Adriana Kovashka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aghazadeh_2025_ICCV, author = {Aghazadeh, Aysan and Kovashka, Adriana}, title = {CAP: Evaluation of Persuasive and Creative Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16970-16980} }
Neurons: Emulating the Human Visual Cortex Improves Fidelity and Interpretability in fMRI-to-Video Reconstruction: Haonan Wang,

Qixiang Zhang,

Lehan Wang,

Xuanqi Huang,

Xiaomeng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haonan and Zhang, Qixiang and Wang, Lehan and Huang, Xuanqi and Li, Xiaomeng}, title = {Neurons: Emulating the Human Visual Cortex Improves Fidelity and Interpretability in fMRI-to-Video Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18367-18376} }
LACONIC: A 3D Layout Adapter for Controllable Image Creation: Léopold Maillard,

Tom Durand,

Adrien Ramanana Rahary,

Maks Ovsjanikov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maillard_2025_ICCV, author = {Maillard, L\'eopold and Durand, Tom and Rahary, Adrien Ramanana and Ovsjanikov, Maks}, title = {LACONIC: A 3D Layout Adapter for Controllable Image Creation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18046-18057} }
InfiniDreamer: Arbitrarily Long Human Motion Generation via Segment Score Distillation: Wenjie Zhuo,

Fan Ma,

Hehe Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuo_2025_ICCV, author = {Zhuo, Wenjie and Ma, Fan and Fan, Hehe}, title = {InfiniDreamer: Arbitrarily Long Human Motion Generation via Segment Score Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14688-14698} }
FairGen: Enhancing Fairness in Text-to-Image Diffusion Models via Self-Discovering Latent Directions: Yilei Jiang,

Wei-Hong Li,

Yiyuan Zhang,

Minghong Cai,

Xiangyu Yue; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Yilei and Li, Wei-Hong and Zhang, Yiyuan and Cai, Minghong and Yue, Xiangyu}, title = {FairGen: Enhancing Fairness in Text-to-Image Diffusion Models via Self-Discovering Latent Directions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18411-18420} }
Bridging Continuous and Discrete Tokens for Autoregressive Visual Generation: Yuqing Wang,

Zhijie Lin,

Yao Teng,

Yuanzhi Zhu,

Shuhuai Ren,

Jiashi Feng,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuqing and Lin, Zhijie and Teng, Yao and Zhu, Yuanzhi and Ren, Shuhuai and Feng, Jiashi and Liu, Xihui}, title = {Bridging Continuous and Discrete Tokens for Autoregressive Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18596-18605} }
Trade-offs in Image Generation: How Do Different Dimensions Interact?: Sicheng Zhang,

Binzhu Xie,

Zhonghao Yan,

Yuli Zhang,

Donghao Zhou,

Xiaofei Chen,

Shi Qiu,

Jiaqi Liu,

Guoyang Xie,

Zhichao Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Sicheng and Xie, Binzhu and Yan, Zhonghao and Zhang, Yuli and Zhou, Donghao and Chen, Xiaofei and Qiu, Shi and Liu, Jiaqi and Xie, Guoyang and Lu, Zhichao}, title = {Trade-offs in Image Generation: How Do Different Dimensions Interact?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17256-17267} }
Rethinking Layered Graphic Design Generation with a Top-Down Approach: Jingye Chen,

Zhaowen Wang,

Nanxuan Zhao,

Li Zhang,

Difan Liu,

Jimei Yang,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jingye and Wang, Zhaowen and Zhao, Nanxuan and Zhang, Li and Liu, Difan and Yang, Jimei and Chen, Qifeng}, title = {Rethinking Layered Graphic Design Generation with a Top-Down Approach}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16861-16870} }
The Best of Both Worlds: Integrating Language Models and Diffusion Models for Video Generation: Aoxiong Yin,

Xu Tan,

Kai Shen,

Yichong Leng,

Xinyu Zhou,

Juncheng Li,

Siliang Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_ICCV, author = {Yin, Aoxiong and Tan, Xu and Shen, Kai and Leng, Yichong and Zhou, Xinyu and Li, Juncheng and Tang, Siliang}, title = {The Best of Both Worlds: Integrating Language Models and Diffusion Models for Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15604-15615} }
SV4D 2.0: Enhancing Spatio-Temporal Consistency in Multi-View Video Diffusion for High-Quality 4D Generation: Chun-Han Yao,

Yiming Xie,

Vikram Voleti,

Huaizu Jiang,

Varun Jampani; [pdf] [supp]
[bibtex]
@InProceedings{Yao_2025_ICCV, author = {Yao, Chun-Han and Xie, Yiming and Voleti, Vikram and Jiang, Huaizu and Jampani, Varun}, title = {SV4D 2.0: Enhancing Spatio-Temporal Consistency in Multi-View Video Diffusion for High-Quality 4D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13248-13258} }
Long Context Tuning for Video Generation: Yuwei Guo,

Ceyuan Yang,

Ziyan Yang,

Zhibei Ma,

Zhijie Lin,

Zhenheng Yang,

Dahua Lin,

Lu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Yuwei and Yang, Ceyuan and Yang, Ziyan and Ma, Zhibei and Lin, Zhijie and Yang, Zhenheng and Lin, Dahua and Jiang, Lu}, title = {Long Context Tuning for Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17281-17291} }
TCFG: Truncated Classifier-Free Guidance for Efficient and Scalable Text-to-Image Acceleration: Xiaomeng Fu,

Jia Li; [pdf]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Xiaomeng and Li, Jia}, title = {TCFG: Truncated Classifier-Free Guidance for Efficient and Scalable Text-to-Image Acceleration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18552-18562} }
Skeleton Motion Words for Unsupervised Skeleton-Based Temporal Action Segmentation: Uzay Gökay,

Federico Spurio,

Dominik R. Bach,

Juergen Gall; [pdf] [supp]
[bibtex]
@InProceedings{Gokay_2025_ICCV, author = {G\"okay, Uzay and Spurio, Federico and Bach, Dominik R. and Gall, Juergen}, title = {Skeleton Motion Words for Unsupervised Skeleton-Based Temporal Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12101-12111} }
Towards Robust Defense against Customization via Protective Perturbation Resistant to Diffusion-based Purification: Wenkui Yang,

Jie Cao,

Junxian Duan,

Ran He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Wenkui and Cao, Jie and Duan, Junxian and He, Ran}, title = {Towards Robust Defense against Customization via Protective Perturbation Resistant to Diffusion-based Purification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19290-19300} }
Calibrating MLLM-as-a-judge via Multimodal Bayesian Prompt Ensembles: Eric Slyman,

Mehrab Tanjim,

Kushal Kafle,

Stefan Lee; [pdf] [supp]
[bibtex]
@InProceedings{Slyman_2025_ICCV, author = {Slyman, Eric and Tanjim, Mehrab and Kafle, Kushal and Lee, Stefan}, title = {Calibrating MLLM-as-a-judge via Multimodal Bayesian Prompt Ensembles}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17224-17234} }
SCFlow: Implicitly Learning Style and Content Disentanglement with Flow Models: Pingchuan Ma,

Xiaopei Yang,

Yusong Li,

Ming Gui,

Felix Krause,

Johannes Schusterbauer,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Pingchuan and Yang, Xiaopei and Li, Yusong and Gui, Ming and Krause, Felix and Schusterbauer, Johannes and Ommer, Bj\"orn}, title = {SCFlow: Implicitly Learning Style and Content Disentanglement with Flow Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14919-14929} }
Lay-Your-Scene: Natural Scene Layout Generation with Diffusion Transformers: Divyansh Srivastava,

Xiang Zhang,

He Wen,

Chenru Wen,

Zhuowen Tu; [pdf] [supp]
[bibtex]
@InProceedings{Srivastava_2025_ICCV, author = {Srivastava, Divyansh and Zhang, Xiang and Wen, He and Wen, Chenru and Tu, Zhuowen}, title = {Lay-Your-Scene: Natural Scene Layout Generation with Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17909-17919} }
Multi-scenario Overlapping Text Segmentation with Depth Awareness: Yang Liu,

Xudong Xie,

Yuliang Liu,

Xiang Bai; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yang and Xie, Xudong and Liu, Yuliang and Bai, Xiang}, title = {Multi-scenario Overlapping Text Segmentation with Depth Awareness}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17454-17463} }
VisualCloze: A Universal Image Generation Framework via Visual In-Context Learning: Zhong-Yu Li,

Ruoyi Du,

Juncheng Yan,

Le Zhuo,

Zhen Li,

Peng Gao,

Zhanyu Ma,

Ming-Ming Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhong-Yu and Du, Ruoyi and Yan, Juncheng and Zhuo, Le and Li, Zhen and Gao, Peng and Ma, Zhanyu and Cheng, Ming-Ming}, title = {VisualCloze: A Universal Image Generation Framework via Visual In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18969-18979} }
Multi-Object Sketch Animation by Scene Decomposition and Motion Planning: Jingyu Liu,

Zijie Xin,

Yuhan Fu,

Ruixiang Zhao,

Bangxiang Lan,

Xirong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Jingyu and Xin, Zijie and Fu, Yuhan and Zhao, Ruixiang and Lan, Bangxiang and Li, Xirong}, title = {Multi-Object Sketch Animation by Scene Decomposition and Motion Planning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11537-11546} }
MistSense: Versatile Online Detection of Procedural and Execution Mistakes: Constantin Patsch,

Yuankai Wu,

Marsil Zakour,

Driton Salihu,

Eckehard Steinbach; [pdf]
[bibtex]
@InProceedings{Patsch_2025_ICCV, author = {Patsch, Constantin and Wu, Yuankai and Zakour, Marsil and Salihu, Driton and Steinbach, Eckehard}, title = {MistSense: Versatile Online Detection of Procedural and Execution Mistakes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14528-14537} }
Dirichlet-Constrained Variational Codebook Learning for Temporally Coherent Video Face Restoration: Baoyou Chen,

Ce Liu,

Weihao Yuan,

Zilong Dong,

Siyu Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Baoyou and Liu, Ce and Yuan, Weihao and Dong, Zilong and Zhu, Siyu}, title = {Dirichlet-Constrained Variational Codebook Learning for Temporally Coherent Video Face Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14507-14516} }
Dual-Expert Consistency Model for Efficient and High-Quality Video Generation: Zhengyao Lv,

Chenyang Si,

Tianlin Pan,

Zhaoxi Chen,

Kwan-Yee K. Wong,

Yu Qiao,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2025_ICCV, author = {Lv, Zhengyao and Si, Chenyang and Pan, Tianlin and Chen, Zhaoxi and Wong, Kwan-Yee K. and Qiao, Yu and Liu, Ziwei}, title = {Dual-Expert Consistency Model for Efficient and High-Quality Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14983-14993} }
Dense2MoE: Restructuring Diffusion Transformer to MoE for Efficient Text-to-Image Generation: Youwei Zheng,

Yuxi Ren,

Xin Xia,

Xuefeng Xiao,

Xiaohua Xie; [pdf] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Youwei and Ren, Yuxi and Xia, Xin and Xiao, Xuefeng and Xie, Xiaohua}, title = {Dense2MoE: Restructuring Diffusion Transformer to MoE for Efficient Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18661-18670} }
KinMo: Kinematic-aware Human Motion Understanding and Generation: Pengfei Zhang,

Pinxin Liu,

Pablo Garrido,

Hyeongwoo Kim,

Bindita Chaudhuri; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Pengfei and Liu, Pinxin and Garrido, Pablo and Kim, Hyeongwoo and Chaudhuri, Bindita}, title = {KinMo: Kinematic-aware Human Motion Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11187-11197} }
WikiAutoGen: Towards Multi-Modal Wikipedia-Style Article Generation: Zhongyu Yang,

Jun Chen,

Dannong Xu,

Junjie Fei,

Xiaoqian Shen,

Liangbing Zhao,

Chun-Mei Feng,

Mohamed Elhoseiny; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zhongyu and Chen, Jun and Xu, Dannong and Fei, Junjie and Shen, Xiaoqian and Zhao, Liangbing and Feng, Chun-Mei and Elhoseiny, Mohamed}, title = {WikiAutoGen: Towards Multi-Modal Wikipedia-Style Article Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15532-15541} }
GMMamba: Group Masking Mamba for Whole Slide Image Classification: Tingting Zheng,

Hongxun Yao,

Kui Jiang,

Yi Xiao,

Sicheng Zhao; [pdf]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Tingting and Yao, Hongxun and Jiang, Kui and Xiao, Yi and Zhao, Sicheng}, title = {GMMamba: Group Masking Mamba for Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9935-9944} }
OCK: Unsupervised Dynamic Video Prediction with Object-Centric Kinematics: Yeon-Ji Song,

Jaein Kim,

Suhyung Choi,

Jin-Hwa Kim,

Byoung-Tak Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_ICCV, author = {Song, Yeon-Ji and Kim, Jaein and Choi, Suhyung and Kim, Jin-Hwa and Zhang, Byoung-Tak}, title = {OCK: Unsupervised Dynamic Video Prediction with Object-Centric Kinematics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11359-11368} }
Holistic Unlearning Benchmark: A Multi-Faceted Evaluation for Text-to-Image Diffusion Model Unlearning: Saemi Moon,

Minjong Lee,

Sangdon Park,

Dongwoo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2025_ICCV, author = {Moon, Saemi and Lee, Minjong and Park, Sangdon and Kim, Dongwoo}, title = {Holistic Unlearning Benchmark: A Multi-Faceted Evaluation for Text-to-Image Diffusion Model Unlearning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16356-16366} }
Physical Degradation Model-Guided Interferometric Hyperspectral Reconstruction with Unfolding Transformer: Yuansheng Li,

Yunhao Zou,

Linwei Chen,

Ying Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yuansheng and Zou, Yunhao and Chen, Linwei and Fu, Ying}, title = {Physical Degradation Model-Guided Interferometric Hyperspectral Reconstruction with Unfolding Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13815-13825} }
OmniHuman-1: Rethinking the Scaling-Up of One-Stage Conditioned Human Animation Models: Gaojie Lin,

Jianwen Jiang,

Jiaqi Yang,

Zerong Zheng,

Chao Liang,

Yuan Zhang,

Jingtuo Liu; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Gaojie and Jiang, Jianwen and Yang, Jiaqi and Zheng, Zerong and Liang, Chao and Zhang, Yuan and Liu, Jingtuo}, title = {OmniHuman-1: Rethinking the Scaling-Up of One-Stage Conditioned Human Animation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13847-13858} }
"Principal Components" Enable A New Language of Images: Xin Wen,

Bingchen Zhao,

Ismail Elezi,

Jiankang Deng,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2025_ICCV, author = {Wen, Xin and Zhao, Bingchen and Elezi, Ismail and Deng, Jiankang and Qi, Xiaojuan}, title = {''Principal Components'' Enable A New Language of Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16641-16651} }
Fair Generation without Unfair Distortions: Debiasing Text-to-Image Generation with Entanglement-Free Attention: Jeonghoon Park,

Juyoung Lee,

Chaeyeon Chung,

Jaeseong Lee,

Jaegul Choo,

Jindong Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Jeonghoon and Lee, Juyoung and Chung, Chaeyeon and Lee, Jaeseong and Choo, Jaegul and Gu, Jindong}, title = {Fair Generation without Unfair Distortions: Debiasing Text-to-Image Generation with Entanglement-Free Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17567-17576} }
Reference-based Super-Resolution via Image-based Retrieval-Augmented Generation Diffusion: Byeonghun Lee,

Hyunmin Cho,

Hong Gyu Choi,

Soo Min Kang,

Iljun Ahn,

Kyong Hwan Jin; [pdf]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Byeonghun and Cho, Hyunmin and Choi, Hong Gyu and Kang, Soo Min and Ahn, Iljun and Jin, Kyong Hwan}, title = {Reference-based Super-Resolution via Image-based Retrieval-Augmented Generation Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10764-10774} }
FakeRadar: Probing Forgery Outliers to Detect Unknown Deepfake Videos: Zhaolun Li,

Jichang Li,

Yinqi Cai,

Junye Chen,

Xiaonan Luo,

Guanbin Li,

Rushi Lan; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhaolun and Li, Jichang and Cai, Yinqi and Chen, Junye and Luo, Xiaonan and Li, Guanbin and Lan, Rushi}, title = {FakeRadar: Probing Forgery Outliers to Detect Unknown Deepfake Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13382-13392} }
SynTag: Enhancing the Geometric Robustness of Inversion-based Generative Image Watermarking: Han Fang,

Kejiang Chen,

Zehua Ma,

Jiajun Deng,

Yicong Li,

Weiming Zhang,

Ee-Chien Chang; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Han and Chen, Kejiang and Ma, Zehua and Deng, Jiajun and Li, Yicong and Zhang, Weiming and Chang, Ee-Chien}, title = {SynTag: Enhancing the Geometric Robustness of Inversion-based Generative Image Watermarking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15416-15425} }
DIMCIM: A Quantitative Evaluation Framework for Default-mode Diversity and Generalization in Text-to-Image Generative Models: Revant Teotia,

Candace Ross,

Karen Ullrich,

Sumit Chopra,

Adriana Romero-Soriano,

Melissa Hall,

Matthew Muckley; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Teotia_2025_ICCV, author = {Teotia, Revant and Ross, Candace and Ullrich, Karen and Chopra, Sumit and Romero-Soriano, Adriana and Hall, Melissa and Muckley, Matthew}, title = {DIMCIM: A Quantitative Evaluation Framework for Default-mode Diversity and Generalization in Text-to-Image Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16431-16440} }
ForCenNet: Foreground-Centric Network for Document Image Rectification: Peng Cai,

Qiang Li,

Kaicheng Yang,

Dong Guo,

Jia Li,

Nan Zhou,

Xiang An,

Ninghua Yang,

Jiankang Deng; [pdf] [arXiv]
[bibtex]
@InProceedings{Cai_2025_ICCV, author = {Cai, Peng and Li, Qiang and Yang, Kaicheng and Guo, Dong and Li, Jia and Zhou, Nan and An, Xiang and Yang, Ninghua and Deng, Jiankang}, title = {ForCenNet: Foreground-Centric Network for Document Image Rectification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15137-15146} }
Describe, Don't Dictate: Semantic Image Editing with Natural Language Intent: En Ci,

Shanyan Guan,

Yanhao Ge,

Yilin Zhang,

Wei Li,

Zhenyu Zhang,

Jian Yang,

Ying Tai; [pdf] [supp]
[bibtex]
@InProceedings{Ci_2025_ICCV, author = {Ci, En and Guan, Shanyan and Ge, Yanhao and Zhang, Yilin and Li, Wei and Zhang, Zhenyu and Yang, Jian and Tai, Ying}, title = {Describe, Don't Dictate: Semantic Image Editing with Natural Language Intent}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19185-19194} }
LiT: Delving into a Simple Linear Diffusion Transformer for Image Generation: Jiahao Wang,

Ning Kang,

Lewei Yao,

Mengzhao Chen,

Chengyue Wu,

Songyang Zhang,

Shuchen Xue,

Yong Liu,

Taiqiang Wu,

Xihui Liu,

Kaipeng Zhang,

Shifeng Zhang,

Wenqi Shao,

Zhenguo Li,

Ping Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jiahao and Kang, Ning and Yao, Lewei and Chen, Mengzhao and Wu, Chengyue and Zhang, Songyang and Xue, Shuchen and Liu, Yong and Wu, Taiqiang and Liu, Xihui and Zhang, Kaipeng and Zhang, Shifeng and Shao, Wenqi and Li, Zhenguo and Luo, Ping}, title = {LiT: Delving into a Simple Linear Diffusion Transformer for Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16068-16078} }
ForgeLens: Data-Efficient Forgery Focus for Generalizable Forgery Image Detection: Yingjian Chen,

Lei Zhang,

Yakun Niu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yingjian and Zhang, Lei and Niu, Yakun}, title = {ForgeLens: Data-Efficient Forgery Focus for Generalizable Forgery Image Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16270-16280} }
NoiseController: Towards Consistent Multi-view Video Generation via Noise Decomposition and Collaboration: Haotian Dong,

Xin Wang,

Di Lin,

Yipeng Wu,

Qin Chen,

Ruonan Liu,

Kairui Yang,

Ping Li,

Qing Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Haotian and Wang, Xin and Lin, Di and Wu, Yipeng and Chen, Qin and Liu, Ruonan and Yang, Kairui and Li, Ping and Guo, Qing}, title = {NoiseController: Towards Consistent Multi-view Video Generation via Noise Decomposition and Collaboration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14443-14452} }
MOERL: When Mixture-of-Experts Meet Reinforcement Learning for Adverse Weather Image Restoration: Tao Wang,

Peiwen Xia,

Bo Li,

Peng-Tao Jiang,

Zhe Kong,

Kaihao Zhang,

Tong Lu,

Wenhan Luo; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Tao and Xia, Peiwen and Li, Bo and Jiang, Peng-Tao and Kong, Zhe and Zhang, Kaihao and Lu, Tong and Luo, Wenhan}, title = {MOERL: When Mixture-of-Experts Meet Reinforcement Learning for Adverse Weather Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13673-13683} }
DACoN: DINO for Anime Paint Bucket Colorization with Any Number of Reference Images: Kazuma Nagata,

Naoshi Kaneko; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nagata_2025_ICCV, author = {Nagata, Kazuma and Kaneko, Naoshi}, title = {DACoN: DINO for Anime Paint Bucket Colorization with Any Number of Reference Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17899-17908} }
Multi-identity Human Image Animation with Structural Video Diffusion: Zhenzhi Wang,

Yixuan Li,

Yanhong Zeng,

Yuwei Guo,

Dahua Lin,

Tianfan Xue,

Bo Dai; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zhenzhi and Li, Yixuan and Zeng, Yanhong and Guo, Yuwei and Lin, Dahua and Xue, Tianfan and Dai, Bo}, title = {Multi-identity Human Image Animation with Structural Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11937-11947} }
UniVG: A Generalist Diffusion Model for Unified Image Generation and Editing: Tsu-Jui Fu,

Yusu Qian,

Chen Chen,

Wenze Hu,

Zhe Gan,

Yinfei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Tsu-Jui and Qian, Yusu and Chen, Chen and Hu, Wenze and Gan, Zhe and Yang, Yinfei}, title = {UniVG: A Generalist Diffusion Model for Unified Image Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17160-17170} }
CLOT: Closed Loop Optimal Transport for Unsupervised Action Segmentation: Elena Bueno-Benito,

Mariella Dimiccoli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bueno-Benito_2025_ICCV, author = {Bueno-Benito, Elena and Dimiccoli, Mariella}, title = {CLOT: Closed Loop Optimal Transport for Unsupervised Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10719-10729} }
Accelerating Diffusion Sampling via Exploiting Local Transition Coherence: Shangwen Zhu,

Han Zhang,

Zhantao Yang,

Qianyu Peng,

Zhao Pu,

Huangji Wang,

Fan Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Shangwen and Zhang, Han and Yang, Zhantao and Peng, Qianyu and Pu, Zhao and Wang, Huangji and Cheng, Fan}, title = {Accelerating Diffusion Sampling via Exploiting Local Transition Coherence}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18284-18293} }
What Changed? Detecting and Evaluating Instruction-Guided Image Edits with Multimodal Large Language Models: Lorenzo Baraldi,

Davide Bucciarelli,

Federico Betti,

Marcella Cornia,

Lorenzo Baraldi,

Nicu Sebe,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Baraldi_2025_ICCV, author = {Baraldi, Lorenzo and Bucciarelli, Davide and Betti, Federico and Cornia, Marcella and Baraldi, Lorenzo and Sebe, Nicu and Cucchiara, Rita}, title = {What Changed? Detecting and Evaluating Instruction-Guided Image Edits with Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16217-16226} }
Saliency-Aware Quantized Imitation Learning for Efficient Robotic Control: Seongmin Park,

Hyungmin Kim,

Sangwoo Kim,

Wonseok Jeon,

Juyoung Yang,

Byeongwook Jeon,

Yoonseon Oh,

Jungwook Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Seongmin and Kim, Hyungmin and Kim, Sangwoo and Jeon, Wonseok and Yang, Juyoung and Jeon, Byeongwook and Oh, Yoonseon and Choi, Jungwook}, title = {Saliency-Aware Quantized Imitation Learning for Efficient Robotic Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13140-13150} }
GTR: Guided Thought Reinforcement Prevents Thought Collapse in RL-based VLM Agent Training: Tong Wei,

Yijun Yang,

Junliang Xing,

Yuanchun Shi,

Zongqing Lu,

Deheng Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Tong and Yang, Yijun and Xing, Junliang and Shi, Yuanchun and Lu, Zongqing and Ye, Deheng}, title = {GTR: Guided Thought Reinforcement Prevents Thought Collapse in RL-based VLM Agent Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18855-18865} }
Semantic Discrepancy-aware Detector for Image Forgery Identification: Ziye Wang,

Minghang Yu,

Chunyan Xu,

Zhen Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ziye and Yu, Minghang and Xu, Chunyan and Cui, Zhen}, title = {Semantic Discrepancy-aware Detector for Image Forgery Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18388-18398} }
JPEG Processing Neural Operator for Backward-Compatible Coding: Woo Kyoung Han,

Yongjun Lee,

Byeonghun Lee,

Sang Hyun Park,

Sunghoon Im,

Kyong Hwan Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Woo Kyoung and Lee, Yongjun and Lee, Byeonghun and Park, Sang Hyun and Im, Sunghoon and Jin, Kyong Hwan}, title = {JPEG Processing Neural Operator for Backward-Compatible Coding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19503-19512} }
DPoser-X: Diffusion Model as Robust 3D Whole-body Human Pose Prior: Junzhe Lu,

Jing Lin,

Hongkun Dou,

Ailing Zeng,

Yue Deng,

Xian Liu,

Zhongang Cai,

Lei Yang,

Yulun Zhang,

Haoqian Wang,

Ziwei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Junzhe and Lin, Jing and Dou, Hongkun and Zeng, Ailing and Deng, Yue and Liu, Xian and Cai, Zhongang and Yang, Lei and Zhang, Yulun and Wang, Haoqian and Liu, Ziwei}, title = {DPoser-X: Diffusion Model as Robust 3D Whole-body Human Pose Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9988-9997} }
FlowTok: Flowing Seamlessly Across Text and Image Tokens: Ju He,

Qihang Yu,

Qihao Liu,

Liang-Chieh Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Ju and Yu, Qihang and Liu, Qihao and Chen, Liang-Chieh}, title = {FlowTok: Flowing Seamlessly Across Text and Image Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16629-16640} }
PROGRESSOR: A Perceptually Guided Reward Estimator with Self-Supervised Online Refinement: Tewodros W. Ayalew,

Xiao Zhang,

Kevin Yuanbo Wu,

Tianchong Jiang,

Michael Maire,

Matthew R. Walter; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ayalew_2025_ICCV, author = {Ayalew, Tewodros W. and Zhang, Xiao and Wu, Kevin Yuanbo and Jiang, Tianchong and Maire, Michael and Walter, Matthew R.}, title = {PROGRESSOR: A Perceptually Guided Reward Estimator with Self-Supervised Online Refinement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10297-10306} }
AdaHuman: Animatable Detailed 3D Human Generation with Compositional Multiview Diffusion: Yangyi Huang,

Ye Yuan,

Xueting Li,

Jan Kautz,

Umar Iqbal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Yangyi and Yuan, Ye and Li, Xueting and Kautz, Jan and Iqbal, Umar}, title = {AdaHuman: Animatable Detailed 3D Human Generation with Compositional Multiview Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13533-13543} }
ReCamMaster: Camera-Controlled Generative Rendering from A Single Video: Jianhong Bai,

Menghan Xia,

Xiao Fu,

Xintao Wang,

Lianrui Mu,

Jinwen Cao,

Zuozhu Liu,

Haoji Hu,

Xiang Bai,

Pengfei Wan,

Di Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2025_ICCV, author = {Bai, Jianhong and Xia, Menghan and Fu, Xiao and Wang, Xintao and Mu, Lianrui and Cao, Jinwen and Liu, Zuozhu and Hu, Haoji and Bai, Xiang and Wan, Pengfei and Zhang, Di}, title = {ReCamMaster: Camera-Controlled Generative Rendering from A Single Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14834-14844} }
Learned Image Compression with Hierarchical Progressive Context Modeling: Yuqi Li,

Haotian Zhang,

Li Li,

Dong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yuqi and Zhang, Haotian and Li, Li and Liu, Dong}, title = {Learned Image Compression with Hierarchical Progressive Context Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18834-18843} }
Joint Self-Supervised Video Alignment and Action Segmentation: Ali Shah Ali,

Syed Ahmed Mahmood,

Mubin Saeed,

Andrey Konin,

M. Zeeshan Zia,

Quoc-Huy Tran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ali_2025_ICCV, author = {Ali, Ali Shah and Mahmood, Syed Ahmed and Saeed, Mubin and Konin, Andrey and Zia, M. Zeeshan and Tran, Quoc-Huy}, title = {Joint Self-Supervised Video Alignment and Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10807-10818} }
Are They the Same? Exploring Visual Correspondence Shortcomings of Multimodal LLMs: Yikang Zhou,

Tao Zhang,

Shilin Xu,

Shihao Chen,

Qianyu Zhou,

Yunhai Tong,

Shunping Ji,

Jiangning Zhang,

Lu Qi,

Xiangtai Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Yikang and Zhang, Tao and Xu, Shilin and Chen, Shihao and Zhou, Qianyu and Tong, Yunhai and Ji, Shunping and Zhang, Jiangning and Qi, Lu and Li, Xiangtai}, title = {Are They the Same? Exploring Visual Correspondence Shortcomings of Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17663-17674} }
Flow to the Mode: Mode-Seeking Diffusion Autoencoders for State-of-the-Art Image Tokenization: Kyle Sargent,

Kyle Hsu,

Justin Johnson,

Li Fei-Fei,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sargent_2025_ICCV, author = {Sargent, Kyle and Hsu, Kyle and Johnson, Justin and Fei-Fei, Li and Wu, Jiajun}, title = {Flow to the Mode: Mode-Seeking Diffusion Autoencoders for State-of-the-Art Image Tokenization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19471-19481} }
UniPhys: Unified Planner and Controller with Diffusion for Flexible Physics-Based Character Control: Yan Wu,

Korrawe Karunratanakul,

Zhengyi Luo,

Siyu Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Yan and Karunratanakul, Korrawe and Luo, Zhengyi and Tang, Siyu}, title = {UniPhys: Unified Planner and Controller with Diffusion for Flexible Physics-Based Character Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13214-13224} }
SPADE: Spatial-Aware Denoising Network for Open-vocabulary Panoptic Scene Graph Generation with Long- and Local-range Context Reasoning: Xin Hu,

Ke Qin,

Guiduo Duan,

Ming Li,

Yuan-Fang Li,

Tao He; [pdf]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Xin and Qin, Ke and Duan, Guiduo and Li, Ming and Li, Yuan-Fang and He, Tao}, title = {SPADE: Spatial-Aware Denoising Network for Open-vocabulary Panoptic Scene Graph Generation with Long- and Local-range Context Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15562-15572} }
FPEM: Face Prior Enhanced Facial Attractiveness Prediction for Live Videos with Face Retouching: Hui Li,

Xiaoyu Ren,

Hongjiu Yu,

Ying Chen,

Kai Li,

L Wang,

Xiongkuo Min,

Huiyu Duan,

Guangtao Zhai,

Xu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Hui and Ren, Xiaoyu and Yu, Hongjiu and Chen, Ying and Li, Kai and Wang, L and Min, Xiongkuo and Duan, Huiyu and Zhai, Guangtao and Liu, Xu}, title = {FPEM: Face Prior Enhanced Facial Attractiveness Prediction for Live Videos with Face Retouching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11458-11468} }
Magic Insert: Style-Aware Drag-and-Drop: Nataniel Ruiz,

Yuanzhen Li,

Neal Wadhwa,

Yael Pritch,

Michael Rubinstein,

David E. Jacobs,

Shlomi Fruchter; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ruiz_2025_ICCV, author = {Ruiz, Nataniel and Li, Yuanzhen and Wadhwa, Neal and Pritch, Yael and Rubinstein, Michael and Jacobs, David E. and Fruchter, Shlomi}, title = {Magic Insert: Style-Aware Drag-and-Drop}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15971-15981} }
HADES: Human Avatar with Dynamic Explicit Hair Strands: Zhanfeng Liao,

Hanzhang Tu,

Cheng Peng,

Hongwen Zhang,

Boyao Zhou,

Yebin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Zhanfeng and Tu, Hanzhang and Peng, Cheng and Zhang, Hongwen and Zhou, Boyao and Liu, Yebin}, title = {HADES: Human Avatar with Dynamic Explicit Hair Strands}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12318-12327} }
From Reflection to Perfection: Scaling Inference-Time Optimization for Text-to-Image Diffusion Models via Reflection Tuning: Le Zhuo,

Liangbing Zhao,

Sayak Paul,

Yue Liao,

Renrui Zhang,

Yi Xin,

Peng Gao,

Mohamed Elhoseiny,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuo_2025_ICCV, author = {Zhuo, Le and Zhao, Liangbing and Paul, Sayak and Liao, Yue and Zhang, Renrui and Xin, Yi and Gao, Peng and Elhoseiny, Mohamed and Li, Hongsheng}, title = {From Reflection to Perfection: Scaling Inference-Time Optimization for Text-to-Image Diffusion Models via Reflection Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15329-15339} }
FreeDance: Towards Harmonic Free-Number Group Dance Generation via a Unified Framework: Yiwen Zhao,

Yang Wang,

Liting Wen,

Hengyuan Zhang,

Xingqun Qi; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Yiwen and Wang, Yang and Wen, Liting and Zhang, Hengyuan and Qi, Xingqun}, title = {FreeDance: Towards Harmonic Free-Number Group Dance Generation via a Unified Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10560-10569} }
Beyond the Frame: Generating 360deg Panoramic Videos from Perspective Videos: Rundong Luo,

Matthew Wallingford,

Ali Fahardi,

Noah Snavely,

Wei-Chiu Ma; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Rundong and Wallingford, Matthew and Fahardi, Ali and Snavely, Noah and Ma, Wei-Chiu}, title = {Beyond the Frame: Generating 360deg Panoramic Videos from Perspective Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14336-14345} }
Multi-turn Consistent Image Editing: Zijun Zhou,

Yingying Deng,

Xiangyu He,

Weiming Dong,

Fan Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zijun and Deng, Yingying and He, Xiangyu and Dong, Weiming and Tang, Fan}, title = {Multi-turn Consistent Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15792-15801} }
HUMOTO: A 4D Dataset of Mocap Human Object Interactions: Jiaxin Lu,

Chun-Hao Paul Huang,

Uttaran Bhattacharya,

Qixing Huang,

Yi Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Jiaxin and Huang, Chun-Hao Paul and Bhattacharya, Uttaran and Huang, Qixing and Zhou, Yi}, title = {HUMOTO: A 4D Dataset of Mocap Human Object Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10886-10897} }
Learning Visual Hierarchies in Hyperbolic Space for Image Retrieval: Ziwei Wang,

Sameera Ramasinghe,

Chenchen Xu,

Julien Monteil,

Loris Bazzani,

Thalaiyasingam Ajanthan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ziwei and Ramasinghe, Sameera and Xu, Chenchen and Monteil, Julien and Bazzani, Loris and Ajanthan, Thalaiyasingam}, title = {Learning Visual Hierarchies in Hyperbolic Space for Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9924-9934} }
AutoPrompt: Automated Red-Teaming of Text-to-Image Models via LLM-Driven Adversarial Prompts: Yufan Liu,

Wanqian Zhang,

Huashan Chen,

Lin Wang,

Xiaojun Jia,

Zheng Lin,

Weiping Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yufan and Zhang, Wanqian and Chen, Huashan and Wang, Lin and Jia, Xiaojun and Lin, Zheng and Wang, Weiping}, title = {AutoPrompt: Automated Red-Teaming of Text-to-Image Models via LLM-Driven Adversarial Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17557-17566} }
Visual Interestingness Decoded: How GPT-4o Mirrors Human Interests: Fitim Abdullahu,

Helmut Grabner; [pdf] [supp]
[bibtex]
@InProceedings{Abdullahu_2025_ICCV, author = {Abdullahu, Fitim and Grabner, Helmut}, title = {Visual Interestingness Decoded: How GPT-4o Mirrors Human Interests}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15350-15364} }
Separation for Better Integration: Disentangling Edge and Motion in Event-based Deblurring: Yufei Zhu,

Hao Chen,

Yongjian Deng,

Wei You; [pdf]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yufei and Chen, Hao and Deng, Yongjian and You, Wei}, title = {Separation for Better Integration: Disentangling Edge and Motion in Event-based Deblurring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14732-14742} }
Bitrate-Controlled Diffusion for Disentangling Motion and Content in Video: Xiao Li,

Qi Chen,

Xiulian Peng,

Kai Yu,

Xie Chen,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xiao and Chen, Qi and Peng, Xiulian and Yu, Kai and Chen, Xie and Lu, Yan}, title = {Bitrate-Controlled Diffusion for Disentangling Motion and Content in Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12904-12914} }
Deeply Supervised Flow-Based Generative Models: Inkyu Shin,

Chenglin Yang,

Liang-Chieh Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shin_2025_ICCV, author = {Shin, Inkyu and Yang, Chenglin and Chen, Liang-Chieh}, title = {Deeply Supervised Flow-Based Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16535-16544} }
Towards Efficient General Feature Prediction in Masked Skeleton Modeling: Shengkai Sun,

Zefan Zhang,

Jianfeng Dong,

Zhiyong Cheng,

Xiaojun Chang,

Meng Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Shengkai and Zhang, Zefan and Dong, Jianfeng and Cheng, Zhiyong and Chang, Xiaojun and Wang, Meng}, title = {Towards Efficient General Feature Prediction in Masked Skeleton Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12212-12221} }
Timestep-Aware Diffusion Model for Extreme Image Rescaling: Ce Wang,

Zhenyu Hu,

Wanjie Sun,

Zhenzhong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ce and Hu, Zhenyu and Sun, Wanjie and Chen, Zhenzhong}, title = {Timestep-Aware Diffusion Model for Extreme Image Rescaling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15594-15603} }
COVTrack: Continuous Open-Vocabulary Tracking via Adaptive Multi-Cue Fusion: Zekun Qian,

Ruize Han,

Zhixiang Wang,

Junhui Hou,

Wei Feng; [pdf]
[bibtex]
@InProceedings{Qian_2025_ICCV, author = {Qian, Zekun and Han, Ruize and Wang, Zhixiang and Hou, Junhui and Feng, Wei}, title = {COVTrack: Continuous Open-Vocabulary Tracking via Adaptive Multi-Cue Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10054-10063} }
DIVE: Taming DINO for Subject-Driven Video Editing: Yi Huang,

Wei Xiong,

He Zhang,

Chaoqi Chen,

Jianzhuang Liu,

Mingfu Yan,

Shifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Yi and Xiong, Wei and Zhang, He and Chen, Chaoqi and Liu, Jianzhuang and Yan, Mingfu and Chen, Shifeng}, title = {DIVE: Taming DINO for Subject-Driven Video Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16004-16014} }
GReg: Geometry-Aware Region Refinement for Sign Language Video Generation: Tongkai Shi,

Lianyu Hu,

Fanhua Shang,

Liqing Gao,

Wei Feng; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Tongkai and Hu, Lianyu and Shang, Fanhua and Gao, Liqing and Feng, Wei}, title = {GReg: Geometry-Aware Region Refinement for Sign Language Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16472-16481} }
CarGait: Cross-Attention based Re-ranking for Gait recognition: Gavriel Habib,

Noa Barzilay,

Or Shimshi,

Rami Ben-Ari,

Nir Darshan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Habib_2025_ICCV, author = {Habib, Gavriel and Barzilay, Noa and Shimshi, Or and Ben-Ari, Rami and Darshan, Nir}, title = {CarGait: Cross-Attention based Re-ranking for Gait recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11884-11894} }
Text2VDM: Text to Vector Displacement Maps for Expressive and Interactive 3D Sculpting: Hengyu Meng,

Duotun Wang,

Zhijing Shao,

Ligang Liu,

Zeyu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2025_ICCV, author = {Meng, Hengyu and Wang, Duotun and Shao, Zhijing and Liu, Ligang and Wang, Zeyu}, title = {Text2VDM: Text to Vector Displacement Maps for Expressive and Interactive 3D Sculpting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16882-16892} }
DiffVSR: Revealing an Effective Recipe for Taming Robust Video Super-Resolution Against Complex Degradations: Xiaohui Li,

Yihao Liu,

Shuo Cao,

Ziyan Chen,

Shaobin Zhuang,

Xiangyu Chen,

Yinan He,

Yi Wang,

Yu Qiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xiaohui and Liu, Yihao and Cao, Shuo and Chen, Ziyan and Zhuang, Shaobin and Chen, Xiangyu and He, Yinan and Wang, Yi and Qiao, Yu}, title = {DiffVSR: Revealing an Effective Recipe for Taming Robust Video Super-Resolution Against Complex Degradations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15319-15328} }
Deep Adaptive Unfolded Network via Spatial Morphology Stripping and Spectral Filtration for Pan-sharpening: Hebaixu Wang,

Jiayi Ma; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Hebaixu and Ma, Jiayi}, title = {Deep Adaptive Unfolded Network via Spatial Morphology Stripping and Spectral Filtration for Pan-sharpening}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10730-10740} }
A Quality-Guided Mixture of Score-Fusion Experts Framework for Human Recognition: Jie Zhu,

Yiyang Su,

Minchul Kim,

Anil Jain,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jie and Su, Yiyang and Kim, Minchul and Jain, Anil and Liu, Xiaoming}, title = {A Quality-Guided Mixture of Score-Fusion Experts Framework for Human Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13076-13086} }
MOVE: Motion-Guided Few-Shot Video Object Segmentation: Kaining Ying,

Hengrui Hu,

Henghui Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ying_2025_ICCV, author = {Ying, Kaining and Hu, Hengrui and Ding, Henghui}, title = {MOVE: Motion-Guided Few-Shot Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11632-11642} }
Randomized Autoregressive Visual Generation: Qihang Yu,

Ju He,

Xueqing Deng,

Xiaohui Shen,

Liang-Chieh Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Qihang and He, Ju and Deng, Xueqing and Shen, Xiaohui and Chen, Liang-Chieh}, title = {Randomized Autoregressive Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18431-18441} }
Bootstrap3D: Improving Multi-view Diffusion Model with Synthetic Data: Zeyi Sun,

Tong Wu,

Pan Zhang,

Yuhang Zang,

Xiaoyi Dong,

Yuanjun Xiong,

Dahua Lin,

Jiaqi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Zeyi and Wu, Tong and Zhang, Pan and Zang, Yuhang and Dong, Xiaoyi and Xiong, Yuanjun and Lin, Dahua and Wang, Jiaqi}, title = {Bootstrap3D: Improving Multi-view Diffusion Model with Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15714-15726} }
DICE: Staleness-Centric Optimizations for Parallel Diffusion MoE Inference: Jiajun Luo,

Lizhuo Luo,

Jianru Xu,

Jiajun Song,

Rongwei Lu,

Chen Tang,

Zhi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Jiajun and Luo, Lizhuo and Xu, Jianru and Song, Jiajun and Lu, Rongwei and Tang, Chen and Wang, Zhi}, title = {DICE: Staleness-Centric Optimizations for Parallel Diffusion MoE Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15481-15490} }
SHeaP: Self-Supervised Head Geometry Predictor Learned via 2D Gaussians: Liam Schoneveld,

Zhe Chen,

Davide Davoli,

Jiapeng Tang,

Saimon Terazawa,

Ko Nishino,

Matthias Nießner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schoneveld_2025_ICCV, author = {Schoneveld, Liam and Chen, Zhe and Davoli, Davide and Tang, Jiapeng and Terazawa, Saimon and Nishino, Ko and Nie{\ss}ner, Matthias}, title = {SHeaP: Self-Supervised Head Geometry Predictor Learned via 2D Gaussians}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14162-14172} }
Pretrained Reversible Generation as Unsupervised Visual Representation Learning: Rongkun Xue,

Jinouwen Zhang,

Yazhe Niu,

Dazhong Shen,

Bingqi Ma,

Yu Liu,

Jing Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2025_ICCV, author = {Xue, Rongkun and Zhang, Jinouwen and Niu, Yazhe and Shen, Dazhong and Ma, Bingqi and Liu, Yu and Yang, Jing}, title = {Pretrained Reversible Generation as Unsupervised Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19216-19226} }
CompSlider: Compositional Slider for Disentangled Multiple-Attribute Image Generation: Zixin Zhu,

Kevin Duarte,

Mamshad Nayeem Rizve,

Chengyuan Xu,

Ratheesh Kalarot,

Junsong Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Zixin and Duarte, Kevin and Rizve, Mamshad Nayeem and Xu, Chengyuan and Kalarot, Ratheesh and Yuan, Junsong}, title = {CompSlider: Compositional Slider for Disentangled Multiple-Attribute Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16682-16691} }
InteractAvatar: Modeling Hand-Face Interaction in Photorealistic Avatars with Deformable Gaussians: Kefan Chen,

Sreyas Mohan,

Justin Theiss,

Sergiu Oprea,

Srinath Sridhar,

Aayush Prakash; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Kefan and Mohan, Sreyas and Theiss, Justin and Oprea, Sergiu and Sridhar, Srinath and Prakash, Aayush}, title = {InteractAvatar: Modeling Hand-Face Interaction in Photorealistic Avatars with Deformable Gaussians}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10410-10420} }
DC-ControlNet: Decoupling Inter- and Intra-Element Conditions in Image Generation with Diffusion Models: Hongji Yang,

Wencheng Han,

Yucheng Zhou,

Jianbing Shen; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Hongji and Han, Wencheng and Zhou, Yucheng and Shen, Jianbing}, title = {DC-ControlNet: Decoupling Inter- and Intra-Element Conditions in Image Generation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19065-19074} }
EfficientMT: Efficient Temporal Adaptation for Motion Transfer in Text-to-Video Diffusion Models: Yufei Cai,

Hu Han,

Yuxiang Wei,

Shiguang Shan,

Xilin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2025_ICCV, author = {Cai, Yufei and Han, Hu and Wei, Yuxiang and Shan, Shiguang and Chen, Xilin}, title = {EfficientMT: Efficient Temporal Adaptation for Motion Transfer in Text-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10592-10601} }
Enhanced Pansharpening via Quaternion Spatial-Spectral Interactions: Dong Li,

Chunhui Luo,

Yuanfei Bao,

Gang Yang,

Jie Xiao,

Xueyang Fu,

Zheng-Jun Zha; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Dong and Luo, Chunhui and Bao, Yuanfei and Yang, Gang and Xiao, Jie and Fu, Xueyang and Zha, Zheng-Jun}, title = {Enhanced Pansharpening via Quaternion Spatial-Spectral Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10908-10918} }
Learning to Generalize without Bias for Open-Vocabulary Action Recognition: Yating Yu,

Congqi Cao,

Yifan Zhang,

Yanning Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Yating and Cao, Congqi and Zhang, Yifan and Zhang, Yanning}, title = {Learning to Generalize without Bias for Open-Vocabulary Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12800-12810} }
Metric Convolutions: A Unifying Theory to Adaptive Image Convolutions: Thomas Dagès,

Michael Lindenbaum,

Alfred M. Bruckstein; [pdf] [supp]
[bibtex]
@InProceedings{Dages_2025_ICCV, author = {Dag\`es, Thomas and Lindenbaum, Michael and Bruckstein, Alfred M.}, title = {Metric Convolutions: A Unifying Theory to Adaptive Image Convolutions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13974-13984} }
SDMatte: Grafting Diffusion Models for Interactive Matting: Longfei Huang,

Yu Liang,

Hao Zhang,

Jinwei Chen,

Wei Dong,

Lunde Chen,

Wanyu Liu,

Bo Li,

Peng-Tao Jiang; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Longfei and Liang, Yu and Zhang, Hao and Chen, Jinwei and Dong, Wei and Chen, Lunde and Liu, Wanyu and Li, Bo and Jiang, Peng-Tao}, title = {SDMatte: Grafting Diffusion Models for Interactive Matting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15229-15239} }
DOLLAR: Few-Step Video Generation via Distillation and Latent Reward Optimization: Zihan Ding,

Chi Jin,

Difan Liu,

Haitian Zheng,

Krishna Kumar Singh,

Qiang Zhang,

Yan Kang,

Zhe Lin,

Yuchen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2025_ICCV, author = {Ding, Zihan and Jin, Chi and Liu, Difan and Zheng, Haitian and Singh, Krishna Kumar and Zhang, Qiang and Kang, Yan and Lin, Zhe and Liu, Yuchen}, title = {DOLLAR: Few-Step Video Generation via Distillation and Latent Reward Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17961-17971} }
Semantic Alignment and Reinforcement for Data-Free Quantization of Vision Transformers: Yunshan Zhong,

Yuyao Zhou,

Yuxin Zhang,

Wanchen Sui,

Shen Li,

Yong Li,

Fei Chao,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Yunshan and Zhou, Yuyao and Zhang, Yuxin and Sui, Wanchen and Li, Shen and Li, Yong and Chao, Fei and Ji, Rongrong}, title = {Semantic Alignment and Reinforcement for Data-Free Quantization of Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12479-12490} }
StreamDiffusion: A Pipeline-level Solution for Real-Time Interactive Generation: Akio Kodaira,

Chenfeng Xu,

Toshiki Hazama,

Takanori Yoshimoto,

Kohei Ohno,

Shogo Mitsuhori,

Soichi Sugano,

Hanying Cho,

Zhijian Liu,

Masayoshi Tomizuka,

Kurt Keutzer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kodaira_2025_ICCV, author = {Kodaira, Akio and Xu, Chenfeng and Hazama, Toshiki and Yoshimoto, Takanori and Ohno, Kohei and Mitsuhori, Shogo and Sugano, Soichi and Cho, Hanying and Liu, Zhijian and Tomizuka, Masayoshi and Keutzer, Kurt}, title = {StreamDiffusion: A Pipeline-level Solution for Real-Time Interactive Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12371-12380} }
Toward Better Out-painting: Improving the Image Composition with Initialization Policy Model: Xuan Han,

Yihao Zhao,

Yanhao Ge,

Mingyu You; [pdf] [supp]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Xuan and Zhao, Yihao and Ge, Yanhao and You, Mingyu}, title = {Toward Better Out-painting: Improving the Image Composition with Initialization Policy Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16938-16947} }
StreamMind: Unlocking Full Frame Rate Streaming Video Dialogue through Event-Gated Cognition: Xin Ding,

Hao Wu,

Yifan Yang,

Shiqi Jiang,

Qianxi Zhang,

Donglin Bai,

Zhibo Chen,

Ting Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2025_ICCV, author = {Ding, Xin and Wu, Hao and Yang, Yifan and Jiang, Shiqi and Zhang, Qianxi and Bai, Donglin and Chen, Zhibo and Cao, Ting}, title = {StreamMind: Unlocking Full Frame Rate Streaming Video Dialogue through Event-Gated Cognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13448-13459} }
Discovering Divergent Representations between Text-to-Image Models: Lisa Dunlap,

Joseph E. Gonzalez,

Trevor Darrell,

Fabian Caba Heilbron,

Josef Sivic,

Bryan Russell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dunlap_2025_ICCV, author = {Dunlap, Lisa and Gonzalez, Joseph E. and Darrell, Trevor and Heilbron, Fabian Caba and Sivic, Josef and Russell, Bryan}, title = {Discovering Divergent Representations between Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17516-17525} }
Spectral Image Tokenizer: Carlos Esteves,

Mohammed Suhail,

Ameesh Makadia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Esteves_2025_ICCV, author = {Esteves, Carlos and Suhail, Mohammed and Makadia, Ameesh}, title = {Spectral Image Tokenizer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17181-17190} }
GameFactory: Creating New Games with Generative Interactive Videos: Jiwen Yu,

Yiran Qin,

Xintao Wang,

Pengfei Wan,

Di Zhang,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Jiwen and Qin, Yiran and Wang, Xintao and Wan, Pengfei and Zhang, Di and Liu, Xihui}, title = {GameFactory: Creating New Games with Generative Interactive Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11590-11599} }
Understanding Co-speech Gestures in-the-wild: Sindhu B Hegde,

K R Prajwal,

Taein Kwon,

Andrew Zisserman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hegde_2025_ICCV, author = {Hegde, Sindhu B and Prajwal, K R and Kwon, Taein and Zisserman, Andrew}, title = {Understanding Co-speech Gestures in-the-wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9977-9987} }
Tune-Your-Style: Intensity-tunable 3D Style Transfer with Gaussian Splatting: Yian Zhao,

Rushi Ye,

Ruochong Zheng,

Zesen Cheng,

Chaoran Feng,

Jiashu Yang,

Pengchong Qiao,

Chang Liu,

Jie Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Yian and Ye, Rushi and Zheng, Ruochong and Cheng, Zesen and Feng, Chaoran and Yang, Jiashu and Qiao, Pengchong and Liu, Chang and Chen, Jie}, title = {Tune-Your-Style: Intensity-tunable 3D Style Transfer with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19032-19042} }
Stylized-Face: A Million-level Stylized Face Dataset for Face Recognition: Zhengyuan Peng,

Jianqing Xu,

Yuge Huang,

Jinkun Hao,

Shouhong Ding,

Zhizhong Zhang,

Xin Tan,

Lizhuang Ma; [pdf]
[bibtex]
@InProceedings{Peng_2025_ICCV, author = {Peng, Zhengyuan and Xu, Jianqing and Huang, Yuge and Hao, Jinkun and Ding, Shouhong and Zhang, Zhizhong and Tan, Xin and Ma, Lizhuang}, title = {Stylized-Face: A Million-level Stylized Face Dataset for Face Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13053-13064} }
GestureHYDRA: Semantic Co-speech Gesture Synthesis via Hybrid Modality Diffusion Transformer and Cascaded-Synchronized Retrieval-Augmented Generation: Quanwei Yang,

Luying Huang,

Kaisiyuan Wang,

Jiazhi Guan,

Shengyi He,

Fengguo Li,

Hang Zhou,

Lingyun Yu,

Yingying Li,

Haocheng Feng,

Hongtao Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Quanwei and Huang, Luying and Wang, Kaisiyuan and Guan, Jiazhi and He, Shengyi and Li, Fengguo and Zhou, Hang and Yu, Lingyun and Li, Yingying and Feng, Haocheng and Xie, Hongtao}, title = {GestureHYDRA: Semantic Co-speech Gesture Synthesis via Hybrid Modality Diffusion Transformer and Cascaded-Synchronized Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12615-12625} }
HUST: High-Fidelity Unbiased Skin Tone Estimation via Texture Quantization: Zimin Ran,

Xingyu Ren,

Xiang An,

Kaicheng Yang,

Ziyong Feng,

Jing Yang,

Rolandos Alexandros Potamias,

Linchao Zhu,

Jiankang Deng; [pdf] [supp]
[bibtex]
@InProceedings{Ran_2025_ICCV, author = {Ran, Zimin and Ren, Xingyu and An, Xiang and Yang, Kaicheng and Feng, Ziyong and Yang, Jing and Potamias, Rolandos Alexandros and Zhu, Linchao and Deng, Jiankang}, title = {HUST: High-Fidelity Unbiased Skin Tone Estimation via Texture Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13523-13532} }
DLFR-Gen: Diffusion-based Video Generation with Dynamic Latent Frame Rate: Zhihang Yuan,

Rui Xie,

Yuzhang Shang,

Hanling Zhang,

Siyuan Wang,

Shengen Yan,

Guohao Dai,

Yu Wang; [pdf]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Zhihang and Xie, Rui and Shang, Yuzhang and Zhang, Hanling and Wang, Siyuan and Yan, Shengen and Dai, Guohao and Wang, Yu}, title = {DLFR-Gen: Diffusion-based Video Generation with Dynamic Latent Frame Rate}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16410-16419} }
DiffSim: Taming Diffusion Models for Evaluating Visual Similarity: Yiren Song,

Xiaokang Liu,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_ICCV, author = {Song, Yiren and Liu, Xiaokang and Shou, Mike Zheng}, title = {DiffSim: Taming Diffusion Models for Evaluating Visual Similarity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16904-16915} }
iManip: Skill-Incremental Learning for Robotic Manipulation: Zexin Zheng,

Jia-Feng Cai,

Xiao-Ming Wu,

Yi-Lin Wei,

Yu-Ming Tang,

Ancong Wu,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Zexin and Cai, Jia-Feng and Wu, Xiao-Ming and Wei, Yi-Lin and Tang, Yu-Ming and Wu, Ancong and Zheng, Wei-Shi}, title = {iManip: Skill-Incremental Learning for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13890-13900} }
VideoSetDiff: Identifying and Reasoning Similarities and Differences in Similar Videos: Yue Qiu,

Yanjun Sun,

Takuma Yagi,

Shusaku Egami,

Natsuki Miyata,

Ken Fukuda,

Kensho Hara,

Ryusuke Sagawa; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2025_ICCV, author = {Qiu, Yue and Sun, Yanjun and Yagi, Takuma and Egami, Shusaku and Miyata, Natsuki and Fukuda, Ken and Hara, Kensho and Sagawa, Ryusuke}, title = {VideoSetDiff: Identifying and Reasoning Similarities and Differences in Similar Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12242-12252} }
LUSD: Localized Update Score Distillation for Text-Guided Image Editing: Worameth Chinchuthakun,

Tossaporn Saengja,

Nontawat Tritrong,

Pitchaporn Rewatbowornwong,

Pramook Khungurn,

Supasorn Suwajanakorn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chinchuthakun_2025_ICCV, author = {Chinchuthakun, Worameth and Saengja, Tossaporn and Tritrong, Nontawat and Rewatbowornwong, Pitchaporn and Khungurn, Pramook and Suwajanakorn, Supasorn}, title = {LUSD: Localized Update Score Distillation for Text-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15298-15307} }
ChartCap: Mitigating Hallucination of Dense Chart Captioning: Junyoung Lim,

Jaewoo Ahn,

Gunhee Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lim_2025_ICCV, author = {Lim, Junyoung and Ahn, Jaewoo and Kim, Gunhee}, title = {ChartCap: Mitigating Hallucination of Dense Chart Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13171-13182} }
CuRe: Cultural Gaps in the Long Tail of Text-to-Image Systems: Aniket Rege,

Zinnia Nie,

Mahesh Ramesh,

Unmesh Raskar,

Zhuoran Yu,

Aditya Kusupati,

Yong Jae Lee,

Ramya Korlakai Vinayak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rege_2025_ICCV, author = {Rege, Aniket and Nie, Zinnia and Ramesh, Mahesh and Raskar, Unmesh and Yu, Zhuoran and Kusupati, Aditya and Lee, Yong Jae and Vinayak, Ramya Korlakai}, title = {CuRe: Cultural Gaps in the Long Tail of Text-to-Image Systems}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15680-15691} }
CoMPaSS: Enhancing Spatial Understanding in Text-to-Image Diffusion Models: Gaoyang Zhang,

Bingtao Fu,

Qingnan Fan,

Qi Zhang,

Runxing Liu,

Hong Gu,

Huaqi Zhang,

Xinguo Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Gaoyang and Fu, Bingtao and Fan, Qingnan and Zhang, Qi and Liu, Runxing and Gu, Hong and Zhang, Huaqi and Liu, Xinguo}, title = {CoMPaSS: Enhancing Spatial Understanding in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15253-15265} }
Gradient-Reweighted Adversarial Camouflage for Physical Object Detection Evasion: Jiawei Liang,

Siyuan Liang,

Tianrui Lou,

Ming Zhang,

Wenjin Li,

Dunqiu Fan,

Xiaochun Cao; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Jiawei and Liang, Siyuan and Lou, Tianrui and Zhang, Ming and Li, Wenjin and Fan, Dunqiu and Cao, Xiaochun}, title = {Gradient-Reweighted Adversarial Camouflage for Physical Object Detection Evasion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13880-13889} }
ASCENT: Annotation-free Self-supervised Contrastive Embeddings for 3D Neuron Tracking in Fluorescence Microscopy: Haejun Han,

Hang Lu; [pdf] [supp]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Haejun and Lu, Hang}, title = {ASCENT: Annotation-free Self-supervised Contrastive Embeddings for 3D Neuron Tracking in Fluorescence Microscopy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14676-14687} }
ViLU: Learning Vision-Language Uncertainties for Failure Prediction: Marc Lafon,

Yannis Karmim,

Julio Silva-Rodríguez,

Paul Couairon,

Clément Rambour,

Raphael Fournier-Sniehotta,

Ismail Ben Ayed,

Jose Dolz,

Nicolas Thome; [pdf] [supp]
[bibtex]
@InProceedings{Lafon_2025_ICCV, author = {Lafon, Marc and Karmim, Yannis and Silva-Rodr{\'\i}guez, Julio and Couairon, Paul and Rambour, Cl\'ement and Fournier-Sniehotta, Raphael and Ben Ayed, Ismail and Dolz, Jose and Thome, Nicolas}, title = {ViLU: Learning Vision-Language Uncertainties for Failure Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17807-17817} }
Perception-as-Control: Fine-grained Controllable Image Animation with 3D-aware Motion Representation: Yingjie Chen,

Yifang Men,

Yuan Yao,

Miaomiao Cui,

Liefeng Bo; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yingjie and Men, Yifang and Yao, Yuan and Cui, Miaomiao and Bo, Liefeng}, title = {Perception-as-Control: Fine-grained Controllable Image Animation with 3D-aware Motion Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14380-14389} }
SA-LUT: Spatial Adaptive 4D Look-Up Table for Photorealistic Style Transfer: Zerui Gong,

Zhonghua Wu,

Qingyi Tao,

Qinyue Li,

Chen Change Loy; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2025_ICCV, author = {Gong, Zerui and Wu, Zhonghua and Tao, Qingyi and Li, Qinyue and Loy, Chen Change}, title = {SA-LUT: Spatial Adaptive 4D Look-Up Table for Photorealistic Style Transfer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18294-18303} }
Align Your Rhythm: Generating Highly Aligned Dance Poses with Gating-Enhanced Rhythm-Aware Feature Representation: Congyi Fan,

Jian Guan,

Xuanjia Zhao,

Dongli Xu,

Youtian Lin,

Tong Ye,

Pengming Feng,

Haiwei Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Congyi and Guan, Jian and Zhao, Xuanjia and Xu, Dongli and Lin, Youtian and Ye, Tong and Feng, Pengming and Pan, Haiwei}, title = {Align Your Rhythm: Generating Highly Aligned Dance Poses with Gating-Enhanced Rhythm-Aware Feature Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13193-13202} }
Stable Score Distillation: Haiming Zhu,

Yangyang Xu,

Chenshu Xu,

Tingrui Shen,

Wenxi Liu,

Yong Du,

Jun Yu,

Shengfeng He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Haiming and Xu, Yangyang and Xu, Chenshu and Shen, Tingrui and Liu, Wenxi and Du, Yong and Yu, Jun and He, Shengfeng}, title = {Stable Score Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16597-16606} }
FoundIR: Unleashing Million-scale Training Data to Advance Foundation Models for Image Restoration: Hao Li,

Xiang Chen,

Jiangxin Dong,

Jinhui Tang,

Jinshan Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Hao and Chen, Xiang and Dong, Jiangxin and Tang, Jinhui and Pan, Jinshan}, title = {FoundIR: Unleashing Million-scale Training Data to Advance Foundation Models for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12626-12636} }
Ouroboros: Single-step Diffusion Models for Cycle-consistent Forward and Inverse Rendering: Shanlin Sun,

Yifan Wang,

Hanwen Zhang,

Yifeng Xiong,

Qin Ren,

Ruogu Fang,

Xiaohui Xie,

Chenyu You; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Shanlin and Wang, Yifan and Zhang, Hanwen and Xiong, Yifeng and Ren, Qin and Fang, Ruogu and Xie, Xiaohui and You, Chenyu}, title = {Ouroboros: Single-step Diffusion Models for Cycle-consistent Forward and Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10386-10397} }
Photolithography Overlay Map Generation with Implicit Knowledge Distillation Diffusion Transformer: Yuan-Fu Yang,

Hsiu-Hui Hsiao; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Yuan-Fu and Hsiao, Hsiu-Hui}, title = {Photolithography Overlay Map Generation with Implicit Knowledge Distillation Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15288-15297} }
VQ-VLA: Improving Vision-Language-Action Models via Scaling Vector-Quantized Action Tokenizers: Yating Wang,

Haoyi Zhu,

Mingyu Liu,

Jiange Yang,

Hao-Shu Fang,

Tong He; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yating and Zhu, Haoyi and Liu, Mingyu and Yang, Jiange and Fang, Hao-Shu and He, Tong}, title = {VQ-VLA: Improving Vision-Language-Action Models via Scaling Vector-Quantized Action Tokenizers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11089-11099} }
UDC-VIT: A Real-World Video Dataset for Under-Display Cameras: Kyusu Ahn,

JiSoo Kim,

Sangik Lee,

HyunGyu Lee,

Byeonghyun Ko,

Chanwoo Park,

Jaejin Lee; [pdf] [supp]
[bibtex]
@InProceedings{Ahn_2025_ICCV, author = {Ahn, Kyusu and Kim, JiSoo and Lee, Sangik and Lee, HyunGyu and Ko, Byeonghyun and Park, Chanwoo and Lee, Jaejin}, title = {UDC-VIT: A Real-World Video Dataset for Under-Display Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10950-10960} }
Gait-X: Exploring X modality for Generalized Gait Recognition: Zengbin Wang,

Saihui Hou,

Junjie Li,

Xu Liu,

Chunshui Cao,

Yongzhen Huang,

Siye Wang,

Man Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zengbin and Hou, Saihui and Li, Junjie and Liu, Xu and Cao, Chunshui and Huang, Yongzhen and Wang, Siye and Zhang, Man}, title = {Gait-X: Exploring X modality for Generalized Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13259-13269} }
D3QE: Learning Discrete Distribution Discrepancy-aware Quantization Error for Autoregressive-Generated Image Detection: Yanran Zhang,

Bingyao Yu,

Yu Zheng,

Wenzhao Zheng,

Yueqi Duan,

Lei Chen,

Jie Zhou,

Jiwen Lu; [pdf]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yanran and Yu, Bingyao and Zheng, Yu and Zheng, Wenzhao and Duan, Yueqi and Chen, Lei and Zhou, Jie and Lu, Jiwen}, title = {D3QE: Learning Discrete Distribution Discrepancy-aware Quantization Error for Autoregressive-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16292-16301} }
StrandHead: Text to Hair-Disentangled 3D Head Avatars Using Human-Centric Priors: Xiaokun Sun,

Zeyu Cai,

Ying Tai,

Jian Yang,

Zhenyu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Xiaokun and Cai, Zeyu and Tai, Ying and Yang, Jian and Zhang, Zhenyu}, title = {StrandHead: Text to Hair-Disentangled 3D Head Avatars Using Human-Centric Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13393-13404} }
Penalizing Boundary Activation for Object Completeness in Diffusion Models: Haoyang Xu,

Tianhao Zhao,

Sibei Yang,

Yutian Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Haoyang and Zhao, Tianhao and Yang, Sibei and Lin, Yutian}, title = {Penalizing Boundary Activation for Object Completeness in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14962-14972} }
DMQ: Dissecting Outliers of Diffusion Models for Post-Training Quantization: Dongyeun Lee,

Jiwan Hur,

Hyounguk Shon,

Jae Young Lee,

Junmo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Dongyeun and Hur, Jiwan and Shon, Hyounguk and Lee, Jae Young and Kim, Junmo}, title = {DMQ: Dissecting Outliers of Diffusion Models for Post-Training Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18510-18520} }
QK-Edit: Revisiting Attention-based Injection in MM-DiT for Image and Video Editing: Tiancheng Shen,

Zilong Huang,

Xiangtai Li,

Zhijie Lin,

Jiyang Liu,

Yitong Wang,

Jiashi Feng,

Ming-Hsuan Yang,

Jun Hao Liew; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_ICCV, author = {Shen, Tiancheng and Huang, Zilong and Li, Xiangtai and Lin, Zhijie and Liu, Jiyang and Wang, Yitong and Feng, Jiashi and Yang, Ming-Hsuan and Liew, Jun Hao}, title = {QK-Edit: Revisiting Attention-based Injection in MM-DiT for Image and Video Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19043-19053} }
Visual Chronicles: Using Multimodal LLMs to Analyze Massive Collections of Images: Boyang Deng,

Songyou Peng,

Kyle Genova,

Gordon Wetzstein,

Noah Snavely,

Leonidas Guibas,

Thomas Funkhouser; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Boyang and Peng, Songyou and Genova, Kyle and Wetzstein, Gordon and Snavely, Noah and Guibas, Leonidas and Funkhouser, Thomas}, title = {Visual Chronicles: Using Multimodal LLMs to Analyze Massive Collections of Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12769-12778} }
Decoding Correlation-Induced Misalignment in the Stable Diffusion Workflow for Text-to-Image Generation: Yunze Tong,

Fengda Zhang,

Didi Zhu,

Jun Xiao,

Kun Kuang; [pdf] [supp]
[bibtex]
@InProceedings{Tong_2025_ICCV, author = {Tong, Yunze and Zhang, Fengda and Zhu, Didi and Xiao, Jun and Kuang, Kun}, title = {Decoding Correlation-Induced Misalignment in the Stable Diffusion Workflow for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18187-18196} }
Neuromanifold-Regularized KANs for Shape-fair Feature Representations: Mazlum Ferhat Arslan,

Weihong Guo,

Shuo Li; [pdf] [supp]
[bibtex]
@InProceedings{Arslan_2025_ICCV, author = {Arslan, Mazlum Ferhat and Guo, Weihong and Li, Shuo}, title = {Neuromanifold-Regularized KANs for Shape-fair Feature Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12790-12799} }
Vision-Language Interactive Relation Mining for Open-Vocabulary Scene Graph Generation: Yukuan Min,

Muli Yang,

Jinhao Zhang,

Yuxuan Wang,

Aming Wu,

Cheng Deng; [pdf] [supp]
[bibtex]
@InProceedings{Min_2025_ICCV, author = {Min, Yukuan and Yang, Muli and Zhang, Jinhao and Wang, Yuxuan and Wu, Aming and Deng, Cheng}, title = {Vision-Language Interactive Relation Mining for Open-Vocabulary Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16755-16764} }
Continuous-Time Human Motion Field from Event Cameras: Ziyun Wang,

Ruijun Zhang,

Zi-Yan Liu,

Yufu Wang,

Kostas Daniilidis; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ziyun and Zhang, Ruijun and Liu, Zi-Yan and Wang, Yufu and Daniilidis, Kostas}, title = {Continuous-Time Human Motion Field from Event Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11502-11512} }
MagicMirror: ID-Preserved Video Generation in Video Diffusion Transformers: Yuechen Zhang,

Yaoyang Liu,

Bin Xia,

Bohao Peng,

Zexin Yan,

Eric Lo,

Jiaya Jia; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yuechen and Liu, Yaoyang and Xia, Bin and Peng, Bohao and Yan, Zexin and Lo, Eric and Jia, Jiaya}, title = {MagicMirror: ID-Preserved Video Generation in Video Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14464-14474} }
Unified Multimodal Understanding via Byte-Pair Visual Encoding: Wanpeng Zhang,

Yicheng Feng,

Hao Luo,

Yijiang Li,

Zihao Yue,

Sipeng Zheng,

Zongqing Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wanpeng and Feng, Yicheng and Luo, Hao and Li, Yijiang and Yue, Zihao and Zheng, Sipeng and Lu, Zongqing}, title = {Unified Multimodal Understanding via Byte-Pair Visual Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12976-12986} }
Learning Pixel-adaptive Multi-layer Perceptrons for Real-time Image Enhancement: Junyu Lou,

Xiaorui Zhao,

Kexuan Shi,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lou_2025_ICCV, author = {Lou, Junyu and Zhao, Xiaorui and Shi, Kexuan and Gu, Shuhang}, title = {Learning Pixel-adaptive Multi-layer Perceptrons for Real-time Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14095-14105} }
DuoLoRA : Cycle-consistent and Rank-disentangled Content-Style Personalization: Aniket Roy,

Shubhankar Borse,

Shreya Kadambi,

Debasmit Das,

Shweta Mahajan,

Risheek Garrepalli,

Hyojin Park,

Ankita Nayak,

Rama Chellappa,

Munawar Hayat,

Fatih Porikli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Roy_2025_ICCV, author = {Roy, Aniket and Borse, Shubhankar and Kadambi, Shreya and Das, Debasmit and Mahajan, Shweta and Garrepalli, Risheek and Park, Hyojin and Nayak, Ankita and Chellappa, Rama and Hayat, Munawar and Porikli, Fatih}, title = {DuoLoRA : Cycle-consistent and Rank-disentangled Content-Style Personalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15395-15404} }
F-Bench: Rethinking Human Preference Evaluation Metrics for Benchmarking Face Generation, Customization, and Restoration: Lu Liu,

Huiyu Duan,

Qiang Hu,

Liu Yang,

Chunlei Cai,

Tianxiao Ye,

Huayu Liu,

Xiaoyun Zhang,

Guangtao Zhai; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Lu and Duan, Huiyu and Hu, Qiang and Yang, Liu and Cai, Chunlei and Ye, Tianxiao and Liu, Huayu and Zhang, Xiaoyun and Zhai, Guangtao}, title = {F-Bench: Rethinking Human Preference Evaluation Metrics for Benchmarking Face Generation, Customization, and Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10982-10994} }
GFPack++: Attention-Driven Gradient Fields for Optimizing 2D Irregular Packing: Tianyang Xue,

Lin Lu,

Yang Liu,

Mingdong Wu,

Hao Dong,

Yanbin Zhang,

Renmin Han,

Baoquan Chen; [pdf] [supp]
[bibtex]
@InProceedings{Xue_2025_ICCV, author = {Xue, Tianyang and Lu, Lin and Liu, Yang and Wu, Mingdong and Dong, Hao and Zhang, Yanbin and Han, Renmin and Chen, Baoquan}, title = {GFPack++: Attention-Driven Gradient Fields for Optimizing 2D Irregular Packing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18014-18023} }
PLA: Prompt Learning Attack against Text-to-Image Generative Models: Xinqi Lyu,

Yihao Liu,

Yanjie Li,

Bin Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2025_ICCV, author = {Lyu, Xinqi and Liu, Yihao and Li, Yanjie and Xiao, Bin}, title = {PLA: Prompt Learning Attack against Text-to-Image Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16851-16860} }
RoboFactory: Exploring Embodied Agent Collaboration with Compositional Constraints: Yiran Qin,

Li Kang,

Xiufeng Song,

Zhenfei Yin,

Xiaohong Liu,

Xihui Liu,

Ruimao Zhang,

Lei Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Yiran and Kang, Li and Song, Xiufeng and Yin, Zhenfei and Liu, Xiaohong and Liu, Xihui and Zhang, Ruimao and Bai, Lei}, title = {RoboFactory: Exploring Embodied Agent Collaboration with Compositional Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10075-10085} }
HypDAE: Hyperbolic Diffusion Autoencoders for Hierarchical Few-shot Image Generation: Lingxiao Li,

Kaixuan Fan,

Boqing Gong,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Lingxiao and Fan, Kaixuan and Gong, Boqing and Yue, Xiangyu}, title = {HypDAE: Hyperbolic Diffusion Autoencoders for Hierarchical Few-shot Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17119-17128} }
ShadowHack: Hacking Shadows via Luminance-Color Divide and Conquer: Jin Hu,

Mingjia Li,

Xiaojie Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Jin and Li, Mingjia and Guo, Xiaojie}, title = {ShadowHack: Hacking Shadows via Luminance-Color Divide and Conquer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11403-11413} }
REPA-E: Unlocking VAE for End-to-End Tuning of Latent Diffusion Transformers: Xingjian Leng,

Jaskirat Singh,

Yunzhong Hou,

Zhenchang Xing,

Saining Xie,

Liang Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Leng_2025_ICCV, author = {Leng, Xingjian and Singh, Jaskirat and Hou, Yunzhong and Xing, Zhenchang and Xie, Saining and Zheng, Liang}, title = {REPA-E: Unlocking VAE for End-to-End Tuning of Latent Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18262-18272} }
RoBridge: A Hierarchical Architecture Bridging Cognition and Execution for General Robotic Manipulation: Kaidong Zhang,

Rongtao Xu,

Pengzhen Ren,

Junfan Lin,

Hefeng Wu,

Liang Lin,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Kaidong and Xu, Rongtao and Ren, Pengzhen and Lin, Junfan and Wu, Hefeng and Lin, Liang and Liang, Xiaodan}, title = {RoBridge: A Hierarchical Architecture Bridging Cognition and Execution for General Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14590-14601} }
PersonaCraft: Personalized and Controllable Full-Body Multi-Human Scene Generation Using Occlusion-Aware 3D-Conditioned Diffusion: Gwanghyun Kim,

Suh Yoon Jeon,

Seunggyu Lee,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Gwanghyun and Jeon, Suh Yoon and Lee, Seunggyu and Chun, Se Young}, title = {PersonaCraft: Personalized and Controllable Full-Body Multi-Human Scene Generation Using Occlusion-Aware 3D-Conditioned Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12034-12044} }
Controllable and Expressive One-Shot Video Head Swapping: Chaonan Ji,

Jinwei Qi,

Peng Zhang,

Bang Zhang,

Liefeng Bo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Chaonan and Qi, Jinwei and Zhang, Peng and Zhang, Bang and Bo, Liefeng}, title = {Controllable and Expressive One-Shot Video Head Swapping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10239-10250} }
Intra-modal and Cross-modal Synchronization for Audio-visual Deepfake Detection and Temporal Localization: Ashutosh Anshul,

Shreyas Gopal,

Deepu Rajan,

Eng Siong Chng; [pdf] [supp]
[bibtex]
@InProceedings{Anshul_2025_ICCV, author = {Anshul, Ashutosh and Gopal, Shreyas and Rajan, Deepu and Chng, Eng Siong}, title = {Intra-modal and Cross-modal Synchronization for Audio-visual Deepfake Detection and Temporal Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13826-13836} }
YOLO-Count: Differentiable Object Counting for Text-to-Image Generation: Guanning Zeng,

Xiang Zhang,

Zirui Wang,

Haiyang Xu,

Zeyuan Chen,

Bingnan Li,

Zhuowen Tu; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Guanning and Zhang, Xiang and Wang, Zirui and Xu, Haiyang and Chen, Zeyuan and Li, Bingnan and Tu, Zhuowen}, title = {YOLO-Count: Differentiable Object Counting for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16765-16775} }
DCT-Shield: A Robust Frequency Domain Defense against Malicious Image Editing: Aniruddha Bala,

Rohit Chowdhury,

Rohan Jaiswal,

Siddharth Roheda; [pdf] [supp]
[bibtex]
@InProceedings{Bala_2025_ICCV, author = {Bala, Aniruddha and Chowdhury, Rohit and Jaiswal, Rohan and Roheda, Siddharth}, title = {DCT-Shield: A Robust Frequency Domain Defense against Malicious Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18876-18884} }
Anti-Tamper Protection for Unauthorized Individual Image Generation: Zelin Li,

Ruohan Zong,

Yifan Liu,

Ruichen Yao,

Yaokun Liu,

Yang Zhang,

Dong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zelin and Zong, Ruohan and Liu, Yifan and Yao, Ruichen and Liu, Yaokun and Zhang, Yang and Wang, Dong}, title = {Anti-Tamper Protection for Unauthorized Individual Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15501-15510} }
EgoAdapt: Adaptive Multisensory Distillation and Policy Learning for Efficient Egocentric Perception: Sanjoy Chowdhury,

Subrata Biswas,

Sayan Nag,

Tushar Nagarajan,

Calvin Murdock,

Ishwarya Ananthabhotla,

Yijun Qian,

Vamsi Krishna Ithapu,

Dinesh Manocha,

Ruohan Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chowdhury_2025_ICCV, author = {Chowdhury, Sanjoy and Biswas, Subrata and Nag, Sayan and Nagarajan, Tushar and Murdock, Calvin and Ananthabhotla, Ishwarya and Qian, Yijun and Ithapu, Vamsi Krishna and Manocha, Dinesh and Gao, Ruohan}, title = {EgoAdapt: Adaptive Multisensory Distillation and Policy Learning for Efficient Egocentric Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10741-10752} }
RobAVA: A Large-scale Dataset and Baseline Towards Video based Robotic Arm Action Understanding: Baoli Sun,

Ning Wang,

Xinzhu Ma,

Anqi Zou,

Yihang Lu,

Chuixuan Fan,

Zhihui Wang,

Kun Lu,

Zhiyong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Baoli and Wang, Ning and Ma, Xinzhu and Zou, Anqi and Lu, Yihang and Fan, Chuixuan and Wang, Zhihui and Lu, Kun and Wang, Zhiyong}, title = {RobAVA: A Large-scale Dataset and Baseline Towards Video based Robotic Arm Action Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13985-13994} }
DisenQ: Disentangling Q-Former for Activity-Biometrics: Shehreen Azad,

Yogesh Singh Rawat; [pdf] [supp]
[bibtex]
@InProceedings{Azad_2025_ICCV, author = {Azad, Shehreen and Rawat, Yogesh Singh}, title = {DisenQ: Disentangling Q-Former for Activity-Biometrics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13502-13512} }
Zero-Shot Depth Aware Image Editing with Diffusion Models: Rishubh Parihar,

Sachidanand VS,

R. Venkatesh Babu; [pdf] [supp]
[bibtex]
@InProceedings{Parihar_2025_ICCV, author = {Parihar, Rishubh and VS, Sachidanand and Babu, R. Venkatesh}, title = {Zero-Shot Depth Aware Image Editing with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15748-15759} }
Scaling Action Detection: AdaTAD++ with Transformer-Enhanced Temporal-Spatial Adaptation: Tanay Agrawal,

Abid Ali,

Antitza Dantcheva,

Francois Bremond; [pdf]
[bibtex]
@InProceedings{Agrawal_2025_ICCV, author = {Agrawal, Tanay and Ali, Abid and Dantcheva, Antitza and Bremond, Francois}, title = {Scaling Action Detection: AdaTAD++ with Transformer-Enhanced Temporal-Spatial Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12222-12231} }
MotionCtrl: A Real-time Controllable Vision-Language-Motion Model: Bin Cao,

Sipeng Zheng,

Ye Wang,

Lujie Xia,

Qianshan Wei,

Qin Jin,

Jing Liu,

Zongqing Lu; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Bin and Zheng, Sipeng and Wang, Ye and Xia, Lujie and Wei, Qianshan and Jin, Qin and Liu, Jing and Lu, Zongqing}, title = {MotionCtrl: A Real-time Controllable Vision-Language-Motion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12253-12262} }
Adaptive Routing of Text-to-Image Generation Requests Between Large Cloud Model and Light-Weight Edge Model: Zewei Xin,

Qinya Li,

Chaoyue Niu,

Fan Wu,

Guihai Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xin_2025_ICCV, author = {Xin, Zewei and Li, Qinya and Niu, Chaoyue and Wu, Fan and Chen, Guihai}, title = {Adaptive Routing of Text-to-Image Generation Requests Between Large Cloud Model and Light-Weight Edge Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19482-19491} }
FlowStyler: Artistic Video Stylization via Transformation Fields Transports: Yuning Gong,

Jiaming Chen,

Xiaohua Ren,

Yuanjun Liao,

Yanci Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2025_ICCV, author = {Gong, Yuning and Chen, Jiaming and Ren, Xiaohua and Liao, Yuanjun and Zhang, Yanci}, title = {FlowStyler: Artistic Video Stylization via Transformation Fields Transports}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10229-10238} }
Rethink Sparse Signals for Pose-guided Text-to-image Generation: Wenjie Xuan,

Jing Zhang,

Juhua Liu,

Bo Du,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xuan_2025_ICCV, author = {Xuan, Wenjie and Zhang, Jing and Liu, Juhua and Du, Bo and Tao, Dacheng}, title = {Rethink Sparse Signals for Pose-guided Text-to-image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15896-15906} }
PatchScaler: An Efficient Patch-Independent Diffusion Model for Image Super-Resolution: Yong Liu,

Hang Dong,

Jinshan Pan,

Qingji Dong,

Kai Chen,

Rongxiang Zhang,

Lean Fu,

Fei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yong and Dong, Hang and Pan, Jinshan and Dong, Qingji and Chen, Kai and Zhang, Rongxiang and Fu, Lean and Wang, Fei}, title = {PatchScaler: An Efficient Patch-Independent Diffusion Model for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11283-11293} }
RefEdit: A Benchmark and Method for Improving Instruction-based Image Editing Model on Referring Expressions: Bimsara Pathiraja,

Maitreya Patel,

Shivam Singh,

Yezhou Yang,

Chitta Baral; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pathiraja_2025_ICCV, author = {Pathiraja, Bimsara and Patel, Maitreya and Singh, Shivam and Yang, Yezhou and Baral, Chitta}, title = {RefEdit: A Benchmark and Method for Improving Instruction-based Image Editing Model on Referring Expressions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15646-15656} }
A Unified Framework for Motion Reasoning and Generation in Human Interaction: Jeongeun Park,

Sungjoon Choi,

Sangdoo Yun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Jeongeun and Choi, Sungjoon and Yun, Sangdoo}, title = {A Unified Framework for Motion Reasoning and Generation in Human Interaction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10698-10707} }
Dynamic Group Detection using VLM-augmented Temporal Groupness Graph: Kaname Yokoyama,

Chihiro Nakatani,

Norimichi Ukita; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yokoyama_2025_ICCV, author = {Yokoyama, Kaname and Nakatani, Chihiro and Ukita, Norimichi}, title = {Dynamic Group Detection using VLM-augmented Temporal Groupness Graph}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10475-10484} }
RoboPearls: Editable Video Simulation for Robot Manipulation: Tang Tao,

Likui Zhang,

Youpeng Wen,

Kaidong Zhang,

Jia-Wang Bian,

Xia Zhou,

Tianyi Yan,

Kun Zhan,

Peng Jia,

Hefeng Wu,

Liang Lin,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2025_ICCV, author = {Tao, Tang and Zhang, Likui and Wen, Youpeng and Zhang, Kaidong and Bian, Jia-Wang and Zhou, Xia and Yan, Tianyi and Zhan, Kun and Jia, Peng and Wu, Hefeng and Lin, Liang and Liang, Xiaodan}, title = {RoboPearls: Editable Video Simulation for Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10118-10129} }
FreeMorph: Tuning-Free Generalized Image Morphing with Diffusion Model: Yukang Cao,

Chenyang Si,

Jinghao Wang,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Yukang and Si, Chenyang and Wang, Jinghao and Liu, Ziwei}, title = {FreeMorph: Tuning-Free Generalized Image Morphing with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18111-18120} }
GazeGaussian: High-Fidelity Gaze Redirection with 3D Gaussian Splatting: Xiaobao Wei,

Peng Chen,

Guangyu Li,

Ming Lu,

Hui Chen,

Feng Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Xiaobao and Chen, Peng and Li, Guangyu and Lu, Ming and Chen, Hui and Tian, Feng}, title = {GazeGaussian: High-Fidelity Gaze Redirection with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13293-13303} }
Balancing Task-invariant Interaction and Task-specific Adaptation for Unified Image Fusion: Xingyu Hu,

Junjun Jiang,

Chenyang Wang,

Kui Jiang,

Xianming Liu,

Jiayi Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Xingyu and Jiang, Junjun and Wang, Chenyang and Jiang, Kui and Liu, Xianming and Ma, Jiayi}, title = {Balancing Task-invariant Interaction and Task-specific Adaptation for Unified Image Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11262-11272} }
Stroke2Sketch: Harnessing Stroke Attributes for Training-Free Sketch Generation: Rui Yang,

Huining Li,

Yiyi Long,

Xiaojun Wu,

Shengfeng He; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Rui and Li, Huining and Long, Yiyi and Wu, Xiaojun and He, Shengfeng}, title = {Stroke2Sketch: Harnessing Stroke Attributes for Training-Free Sketch Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16545-16554} }
Beyond Isolated Words: Diffusion Brush for Handwritten Text-Line Generation: Gang Dai,

Yifan Zhang,

Yutao Qin,

Qiangya Guo,

Shuangping Huang,

Shuicheng Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2025_ICCV, author = {Dai, Gang and Zhang, Yifan and Qin, Yutao and Guo, Qiangya and Huang, Shuangping and Yan, Shuicheng}, title = {Beyond Isolated Words: Diffusion Brush for Handwritten Text-Line Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19054-19064} }
Wasserstein Style Distribution Analysis and Transform for Stylized Image Generation: Xi Yu,

Xiang Gu,

Zhihao Shi,

Jian Sun; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Xi and Gu, Xiang and Shi, Zhihao and Sun, Jian}, title = {Wasserstein Style Distribution Analysis and Transform for Stylized Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17496-17505} }
SuMa: A Subspace Mapping Approach for Robust and Effective Concept Erasure in Text-to-Image Diffusion Models: Kien Nguyen,

Anh Tran,

Cuong Pham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Kien and Tran, Anh and Pham, Cuong}, title = {SuMa: A Subspace Mapping Approach for Robust and Effective Concept Erasure in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19587-19596} }
Hierarchical-aware Orthogonal Disentanglement Framework for Fine-grained Skeleton-based Action Recognition: Haochen Chang,

Pengfei Ren,

Haoyang Zhang,

Liang Xie,

Hongbo Chen,

Erwei Yin; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2025_ICCV, author = {Chang, Haochen and Ren, Pengfei and Zhang, Haoyang and Xie, Liang and Chen, Hongbo and Yin, Erwei}, title = {Hierarchical-aware Orthogonal Disentanglement Framework for Fine-grained Skeleton-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11252-11261} }
Depth AnyEvent: A Cross-Modal Distillation Paradigm for Event-Based Monocular Depth Estimation: Luca Bartolomei,

Enrico Mannocci,

Fabio Tosi,

Matteo Poggi,

Stefano Mattoccia; [pdf] [arXiv]
[bibtex]
@InProceedings{Bartolomei_2025_ICCV, author = {Bartolomei, Luca and Mannocci, Enrico and Tosi, Fabio and Poggi, Matteo and Mattoccia, Stefano}, title = {Depth AnyEvent: A Cross-Modal Distillation Paradigm for Event-Based Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19669-19678} }
MH-LVC: Multi-Hypothesis Temporal Prediction for Learned Conditional Residual Video Coding: Huu-Tai Phung,

Zong-Lin Gao,

Yi-Chen Yao,

Kuan-Wei Ho,

Yi-Hsin Chen,

Yu-Hsiang Lin,

Alessandro Gnutti,

Wen-Hsiao Peng; [pdf] [supp]
[bibtex]
@InProceedings{Phung_2025_ICCV, author = {Phung, Huu-Tai and Gao, Zong-Lin and Yao, Yi-Chen and Ho, Kuan-Wei and Chen, Yi-Hsin and Lin, Yu-Hsiang and Gnutti, Alessandro and Peng, Wen-Hsiao}, title = {MH-LVC: Multi-Hypothesis Temporal Prediction for Learned Conditional Residual Video Coding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19649-19658} }
Image Intrinsic Scale Assessment: Bridging the Gap Between Quality and Resolution: Vlad Hosu,

Lorenzo Agnolucci,

Daisuke Iso,

Dietmar Saupe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hosu_2025_ICCV, author = {Hosu, Vlad and Agnolucci, Lorenzo and Iso, Daisuke and Saupe, Dietmar}, title = {Image Intrinsic Scale Assessment: Bridging the Gap Between Quality and Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12863-12872} }
Temporal Rate Reduction Clustering for Human Motion Segmentation: Xianghan Meng,

Zhengyu Tong,

Zhiyuan Huang,

Chun-Guang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2025_ICCV, author = {Meng, Xianghan and Tong, Zhengyu and Huang, Zhiyuan and Li, Chun-Guang}, title = {Temporal Rate Reduction Clustering for Human Motion Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14644-14654} }
Embodied Representation Alignment with Mirror Neurons: Wentao Zhu,

Zhining Zhang,

Yuwei Ren,

Yin Huang,

Hao Xu,

Yizhou Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Wentao and Zhang, Zhining and Ren, Yuwei and Huang, Yin and Xu, Hao and Wang, Yizhou}, title = {Embodied Representation Alignment with Mirror Neurons}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11948-11957} }
LEGION: Learning to Ground and Explain for Synthetic Image Detection: Hengrui Kang,

Siwei Wen,

Zichen Wen,

Junyan Ye,

Weijia Li,

Peilin Feng,

Baichuan Zhou,

Bin Wang,

Dahua Lin,

Linfeng Zhang,

Conghui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_ICCV, author = {Kang, Hengrui and Wen, Siwei and Wen, Zichen and Ye, Junyan and Li, Weijia and Feng, Peilin and Zhou, Baichuan and Wang, Bin and Lin, Dahua and Zhang, Linfeng and He, Conghui}, title = {LEGION: Learning to Ground and Explain for Synthetic Image Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18937-18947} }
FaceLift: Learning Generalizable Single Image 3D Face Reconstruction from Synthetic Heads: Weijie Lyu,

Yi Zhou,

Ming-Hsuan Yang,

Zhixin Shu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2025_ICCV, author = {Lyu, Weijie and Zhou, Yi and Yang, Ming-Hsuan and Shu, Zhixin}, title = {FaceLift: Learning Generalizable Single Image 3D Face Reconstruction from Synthetic Heads}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12691-12701} }
Cross-Granularity Online Optimization with Masked Compensated Information for Learned Image Compression: Haowei Kuang,

Wenhan Yang,

Zongming Guo,

Jiaying Liu; [pdf] [supp]
[bibtex]
@InProceedings{Kuang_2025_ICCV, author = {Kuang, Haowei and Yang, Wenhan and Guo, Zongming and Liu, Jiaying}, title = {Cross-Granularity Online Optimization with Masked Compensated Information for Learned Image Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16514-16523} }
VideoVAE+: Large Motion Video Autoencoding with Cross-modal Video VAE: Yazhou Xing,

Yang Fei,

Yingqing He,

Jingye Chen,

Jiaxin Xie,

Xiaowei Chi,

Qifeng Chen; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2025_ICCV, author = {Xing, Yazhou and Fei, Yang and He, Yingqing and Chen, Jingye and Xie, Jiaxin and Chi, Xiaowei and Chen, Qifeng}, title = {VideoVAE+: Large Motion Video Autoencoding with Cross-modal Video VAE}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17951-17960} }
MotionDiff: Training-free Zero-shot Interactive Motion Editing via Flow-assisted Multi-view Diffusion: Yikun Ma,

Yiqing Li,

Jiawei Wu,

Xing Luo,

Zhi Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Yikun and Li, Yiqing and Wu, Jiawei and Luo, Xing and Jin, Zhi}, title = {MotionDiff: Training-free Zero-shot Interactive Motion Editing via Flow-assisted Multi-view Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14475-14485} }
PASTA: Part-Aware Sketch-to-3D Shape Generation with Text-Aligned Prior: Seunggwan Lee,

Hwanhee Jung,

Byoungsoo Koh,

Qixing Huang,

Sang Ho Yoon,

Sangpil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Seunggwan and Jung, Hwanhee and Koh, Byoungsoo and Huang, Qixing and Yoon, Sang Ho and Kim, Sangpil}, title = {PASTA: Part-Aware Sketch-to-3D Shape Generation with Text-Aligned Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18585-18595} }
VMBench: A Benchmark for Perception-Aligned Video Motion Generation: Xinran Ling,

Chen Zhu,

Meiqi Wu,

Hangyu Li,

Xiaokun Feng,

Cundian Yang,

Aiming Hao,

Jiashu Zhu,

Jiahong Wu,

Xiangxiang Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ling_2025_ICCV, author = {Ling, Xinran and Zhu, Chen and Wu, Meiqi and Li, Hangyu and Feng, Xiaokun and Yang, Cundian and Hao, Aiming and Zhu, Jiashu and Wu, Jiahong and Chu, Xiangxiang}, title = {VMBench: A Benchmark for Perception-Aligned Video Motion Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13087-13098} }
EvolvingGrasp: Evolutionary Grasp Generation via Efficient Preference Alignment: Yufei Zhu,

Yiming Zhong,

Zemin Yang,

Peishan Cong,

Jingyi Yu,

Xinge Zhu,

Yuexin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yufei and Zhong, Yiming and Yang, Zemin and Cong, Peishan and Yu, Jingyi and Zhu, Xinge and Ma, Yuexin}, title = {EvolvingGrasp: Evolutionary Grasp Generation via Efficient Preference Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11665-11674} }
HyTIP: Hybrid Temporal Information Propagation for Masked Conditional Residual Video Coding: Yi-Hsin Chen,

Yi-Chen Yao,

Kuan-Wei Ho,

Chun-Hung Wu,

Huu-Tai Phung,

Martin Benjak,

Jörn Ostermann,

Wen-Hsiao Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yi-Hsin and Yao, Yi-Chen and Ho, Kuan-Wei and Wu, Chun-Hung and Phung, Huu-Tai and Benjak, Martin and Ostermann, J\"orn and Peng, Wen-Hsiao}, title = {HyTIP: Hybrid Temporal Information Propagation for Masked Conditional Residual Video Coding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17889-17898} }
PromptDresser: Improving the Quality and Controllability of Virtual Try-On via Generative Textual Prompt and Prompt-aware Mask: Jeongho Kim,

Hoiyeong Jin,

Sunghyun Park,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jeongho and Jin, Hoiyeong and Park, Sunghyun and Choo, Jaegul}, title = {PromptDresser: Improving the Quality and Controllability of Virtual Try-On via Generative Textual Prompt and Prompt-aware Mask}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16026-16036} }
Grouped Speculative Decoding for Autoregressive Image Generation: Junhyuk So,

Juncheol Shin,

Hyunho Kook,

Eunhyeok Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{So_2025_ICCV, author = {So, Junhyuk and Shin, Juncheol and Kook, Hyunho and Park, Eunhyeok}, title = {Grouped Speculative Decoding for Autoregressive Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15375-15384} }
Music-Aligned Holistic 3D Dance Generation via Hierarchical Motion Modeling: Xiaojie Li,

Ronghui Li,

Shukai Fang,

Shuzhao Xie,

Xiaoyang Guo,

Jiaqing Zhou,

Junkun Peng,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xiaojie and Li, Ronghui and Fang, Shukai and Xie, Shuzhao and Guo, Xiaoyang and Zhou, Jiaqing and Peng, Junkun and Wang, Zhi}, title = {Music-Aligned Holistic 3D Dance Generation via Hierarchical Motion Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14420-14430} }
MatchDiffusion: Training-free Generation of Match-Cuts: Alejandro Pardo,

Fabio Pizzati,

Tong Zhang,

Alexander Pondaven,

Philip Torr,

Juan Camilo Perez,

Bernard Ghanem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pardo_2025_ICCV, author = {Pardo, Alejandro and Pizzati, Fabio and Zhang, Tong and Pondaven, Alexander and Torr, Philip and Perez, Juan Camilo and Ghanem, Bernard}, title = {MatchDiffusion: Training-free Generation of Match-Cuts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14973-14982} }
Proxy-Bridged Game Transformer for Interactive Extreme Motion Prediction: Yanwen Fang,

Wenqi Jia,

Xu Cao,

Peng-Tao Jiang,

Guodong Li,

Jintai Chen; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Yanwen and Jia, Wenqi and Cao, Xu and Jiang, Peng-Tao and Li, Guodong and Chen, Jintai}, title = {Proxy-Bridged Game Transformer for Interactive Extreme Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13912-13921} }
ReFlex: Text-Guided Editing of Real Images in Rectified Flow via Mid-Step Feature Extraction and Attention Adaptation: Jimyeong Kim,

Jungwon Park,

Yeji Song,

Nojun Kwak,

Wonjong Rhee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jimyeong and Park, Jungwon and Song, Yeji and Kwak, Nojun and Rhee, Wonjong}, title = {ReFlex: Text-Guided Editing of Real Images in Rectified Flow via Mid-Step Feature Extraction and Attention Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15939-15948} }
Co-Painter: Fine-Grained Controllable Image Stylization via Implicit Decoupling and Adaptive Injection: Bowen Fu,

Wei Wei,

Jiaqi Tang,

Jiangtao Nie,

Yanyu Ye,

Xiaogang Xu,

Ying-Cong Chen,

Lei Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Bowen and Wei, Wei and Tang, Jiaqi and Nie, Jiangtao and Ye, Yanyu and Xu, Xiaogang and Chen, Ying-Cong and Zhang, Lei}, title = {Co-Painter: Fine-Grained Controllable Image Stylization via Implicit Decoupling and Adaptive Injection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16830-16839} }
Hipandas: Hyperspectral Image Joint Denoising and Super-Resolution by Image Fusion with the Panchromatic Image: Shuang Xu,

Zixiang Zhao,

Haowen Bai,

Chang Yu,

Jiangjun Peng,

Xiangyong Cao,

Deyu Meng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Shuang and Zhao, Zixiang and Bai, Haowen and Yu, Chang and Peng, Jiangjun and Cao, Xiangyong and Meng, Deyu}, title = {Hipandas: Hyperspectral Image Joint Denoising and Super-Resolution by Image Fusion with the Panchromatic Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12002-12011} }
RoboTron-Mani: All-in-One Multimodal Large Model for Robotic Manipulation: Feng Yan,

Fanfan Liu,

Yiyang Huang,

Zechao Guan,

Liming Zheng,

Yufeng Zhong,

Chengjian Feng,

Lin Ma; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Feng and Liu, Fanfan and Huang, Yiyang and Guan, Zechao and Zheng, Liming and Zhong, Yufeng and Feng, Chengjian and Ma, Lin}, title = {RoboTron-Mani: All-in-One Multimodal Large Model for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13707-13718} }
Efficient Input-level Backdoor Defense on Text-to-Image Synthesis via Neuron Activation Variation: Shengfang Zhai,

Jiajun Li,

Yue Liu,

Huanran Chen,

Zhihua Tian,

Wenjie Qu,

Qingni Shen,

Ruoxi Jia,

Yinpeng Dong,

Jiaheng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhai_2025_ICCV, author = {Zhai, Shengfang and Li, Jiajun and Liu, Yue and Chen, Huanran and Tian, Zhihua and Qu, Wenjie and Shen, Qingni and Jia, Ruoxi and Dong, Yinpeng and Zhang, Jiaheng}, title = {Efficient Input-level Backdoor Defense on Text-to-Image Synthesis via Neuron Activation Variation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15182-15193} }
MeshLLM: Empowering Large Language Models to Progressively Understand and Generate 3D Mesh: Shuangkang Fang,

I-Chao Shen,

Yufeng Wang,

Yi-Hsuan Tsai,

Yi Yang,

Shuchang Zhou,

Wenrui Ding,

Takeo Igarashi,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Shuangkang and Shen, I-Chao and Wang, Yufeng and Tsai, Yi-Hsuan and Yang, Yi and Zhou, Shuchang and Ding, Wenrui and Igarashi, Takeo and Yang, Ming-Hsuan}, title = {MeshLLM: Empowering Large Language Models to Progressively Understand and Generate 3D Mesh}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14061-14072} }
Split-and-Combine: Enhancing Style Augmentation for Single Domain Generalization: Zhen Zhang,

Shuai Yang,

Qianlong Dang,

Zhize Wu,

Lichuan Gu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zhen and Yang, Shuai and Dang, Qianlong and Wu, Zhize and Gu, Lichuan}, title = {Split-and-Combine: Enhancing Style Augmentation for Single Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15616-15625} }
Progressive Artwork Outpainting via Latent Diffusion Models: Dae-Young Song,

Jung-Jae Yu,

Donghyeon Cho; [pdf] [supp]
[bibtex]
@InProceedings{Song_2025_ICCV, author = {Song, Dae-Young and Yu, Jung-Jae and Cho, Donghyeon}, title = {Progressive Artwork Outpainting via Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15405-15415} }
SCORE: Scene Context Matters in Open-Vocabulary Remote Sensing Instance Segmentation: Shiqi Huang,

Shuting He,

Huaiyuan Qin,

Bihan Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Shiqi and He, Shuting and Qin, Huaiyuan and Wen, Bihan}, title = {SCORE: Scene Context Matters in Open-Vocabulary Remote Sensing Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12559-12569} }
LEGO-Maker: A Semantic-Driven Algorithm for Text-to-3D Generation: Yifei Zhang,

Lei Chen; [pdf]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yifei and Chen, Lei}, title = {LEGO-Maker: A Semantic-Driven Algorithm for Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15127-15136} }
DiffDoctor: Diagnosing Image Diffusion Models Before Treating: Yiyang Wang,

Xi Chen,

Xiaogang Xu,

Sihui Ji,

Yu Liu,

Yujun Shen,

Hengshuang Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yiyang and Chen, Xi and Xu, Xiaogang and Ji, Sihui and Liu, Yu and Shen, Yujun and Zhao, Hengshuang}, title = {DiffDoctor: Diagnosing Image Diffusion Models Before Treating}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18917-18926} }
SemTalk: Holistic Co-speech Motion Generation with Frame-level Semantic Emphasis: Xiangyue Zhang,

Jianfang Li,

Jiaxu Zhang,

Ziqiang Dang,

Jianqiang Ren,

Liefeng Bo,

Zhigang Tu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiangyue and Li, Jianfang and Zhang, Jiaxu and Dang, Ziqiang and Ren, Jianqiang and Bo, Liefeng and Tu, Zhigang}, title = {SemTalk: Holistic Co-speech Motion Generation with Frame-level Semantic Emphasis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13761-13771} }
Textured 3D Regenerative Morphing with 3D Diffusion Prior: Songlin Yang,

Yushi Lan,

Honghua Chen,

Xingang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Songlin and Lan, Yushi and Chen, Honghua and Pan, Xingang}, title = {Textured 3D Regenerative Morphing with 3D Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15159-15170} }
DreamActor-M1: Holistic, Expressive and Robust Human Image Animation with Hybrid Guidance: Yuxuan Luo,

Zhengkun Rong,

Lizhen Wang,

Longhao Zhang,

Tianshu Hu; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Yuxuan and Rong, Zhengkun and Wang, Lizhen and Zhang, Longhao and Hu, Tianshu}, title = {DreamActor-M1: Holistic, Expressive and Robust Human Image Animation with Hybrid Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11036-11046} }
Blind2Sound: Self-Supervised Image Denoising without Residual Noise: Jiazheng Liu,

Zejin Wang,

Bohao Chen,

Hua Han; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Jiazheng and Wang, Zejin and Chen, Bohao and Han, Hua}, title = {Blind2Sound: Self-Supervised Image Denoising without Residual Noise}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12937-12946} }
PVMamba: Parallelizing Vision Mamba via Dynamic State Aggregation: Fei Xie,

Zhongdao Wang,

Weijia Zhang,

Chao Ma; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Fei and Wang, Zhongdao and Zhang, Weijia and Ma, Chao}, title = {PVMamba: Parallelizing Vision Mamba via Dynamic State Aggregation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10218-10228} }
SynFER: Towards Boosting Facial Expression Recognition with Synthetic Data: Xilin He,

Cheng Luo,

Xiaole Xian,

Bing Li,

Muhammad Haris Khan,

Zongyuan Ge,

Weicheng Xie,

Siyang Song,

Linlin Shen,

Bernard Ghanem,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Xilin and Luo, Cheng and Xian, Xiaole and Li, Bing and Khan, Muhammad Haris and Ge, Zongyuan and Xie, Weicheng and Song, Siyang and Shen, Linlin and Ghanem, Bernard and Yue, Xiangyu}, title = {SynFER: Towards Boosting Facial Expression Recognition with Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10184-10195} }
Generating Physically Stable and Buildable Brick Structures from Text: Ava Pun,

Kangle Deng,

Ruixuan Liu,

Deva Ramanan,

Changliu Liu,

Jun-Yan Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pun_2025_ICCV, author = {Pun, Ava and Deng, Kangle and Liu, Ruixuan and Ramanan, Deva and Liu, Changliu and Zhu, Jun-Yan}, title = {Generating Physically Stable and Buildable Brick Structures from Text}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14798-14809} }
Engage for All: Making Ordinary Image Descriptions Appealing Again!: Yuyan Chen,

Yifan Jiang,

Li Zhou,

Jinghan Cao,

Yu Guan,

Ming Yang,

Qingpei Guo; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yuyan and Jiang, Yifan and Zhou, Li and Cao, Jinghan and Guan, Yu and Yang, Ming and Guo, Qingpei}, title = {Engage for All: Making Ordinary Image Descriptions Appealing Again!}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19342-19352} }
QuantCache: Adaptive Importance-Guided Quantization with Hierarchical Latent and Layer Caching for Video Generation: Junyi Wu,

Zhiteng Li,

Zheng Hui,

Yulun Zhang,

Linghe Kong,

Xiaokang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Junyi and Li, Zhiteng and Hui, Zheng and Zhang, Yulun and Kong, Linghe and Yang, Xiaokang}, title = {QuantCache: Adaptive Importance-Guided Quantization with Hierarchical Latent and Layer Caching for Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15035-15044} }
DH-FaceVid-1K: A Large-Scale High-Quality Dataset for Face Video Generation: Donglin Di,

He Feng,

Wenzhang Sun,

Yongjia Ma,

Hao Li,

Wei Chen,

Lei Fan,

Tonghua Su,

Xun Yang; [pdf] [supp]
[bibtex]
@InProceedings{Di_2025_ICCV, author = {Di, Donglin and Feng, He and Sun, Wenzhang and Ma, Yongjia and Li, Hao and Chen, Wei and Fan, Lei and Su, Tonghua and Yang, Xun}, title = {DH-FaceVid-1K: A Large-Scale High-Quality Dataset for Face Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12124-12134} }
Adaptive Articulated Object Manipulation On The Fly with Foundation Model Reasoning and Part Grounding: Xiaojie Zhang,

Yuanfei Wang,

Ruihai Wu,

Kunqi Xu,

Yu Li,

Liuyu Xiang,

Hao Dong,

Zhaofeng He; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiaojie and Wang, Yuanfei and Wu, Ruihai and Xu, Kunqi and Li, Yu and Xiang, Liuyu and Dong, Hao and He, Zhaofeng}, title = {Adaptive Articulated Object Manipulation On The Fly with Foundation Model Reasoning and Part Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13032-13042} }
LHM: Large Animatable Human Reconstruction Model for Single Image to 3D in Seconds: Lingteng Qiu,

Xiaodong Gu,

Peihao Li,

Qi Zuo,

Weichao Shen,

Junfei Zhang,

Kejie Qiu,

Weihao Yuan,

Guanying Chen,

Zilong Dong,

Liefeng Bo; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2025_ICCV, author = {Qiu, Lingteng and Gu, Xiaodong and Li, Peihao and Zuo, Qi and Shen, Weichao and Zhang, Junfei and Qiu, Kejie and Yuan, Weihao and Chen, Guanying and Dong, Zilong and Bo, Liefeng}, title = {LHM: Large Animatable Human Reconstruction Model for Single Image to 3D in Seconds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14184-14194} }
An Empirical Study of Autoregressive Pre-training from Videos: Jathushan Rajasegaran,

Ilija Radosavovic,

Rahul Ravishankar,

Yossi Gandelsman,

Christoph Feichtenhofer,

Jitendra Malik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rajasegaran_2025_ICCV, author = {Rajasegaran, Jathushan and Radosavovic, Ilija and Ravishankar, Rahul and Gandelsman, Yossi and Feichtenhofer, Christoph and Malik, Jitendra}, title = {An Empirical Study of Autoregressive Pre-training from Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19108-19118} }
Latent Diffusion Models with Masked AutoEncoders: Junho Lee,

Jeongwoo Shin,

Hyungwook Choi,

Joonseok Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Junho and Shin, Jeongwoo and Choi, Hyungwook and Lee, Joonseok}, title = {Latent Diffusion Models with Masked AutoEncoders}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17422-17431} }
Open-World Skill Discovery from Unsegmented Demonstration Videos: Jingwen Deng,

Zihao Wang,

Shaofei Cai,

Anji Liu,

Yitao Liang; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Jingwen and Wang, Zihao and Cai, Shaofei and Liu, Anji and Liang, Yitao}, title = {Open-World Skill Discovery from Unsegmented Demonstration Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10708-10718} }
End-to-End Entity-Predicate Association Reasoning for Dynamic Scene Graph Generation: Liwei Wang,

Yanduo Zhang,

Tao Lu,

Fang Liu,

Huiqin Zhang,

Jiayi Ma,

Huabing Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Liwei and Zhang, Yanduo and Lu, Tao and Liu, Fang and Zhang, Huiqin and Ma, Jiayi and Zhou, Huabing}, title = {End-to-End Entity-Predicate Association Reasoning for Dynamic Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17729-17738} }
Bridging the Skeleton-Text Modality Gap: Diffusion-Powered Modality Alignment for Zero-shot Skeleton-based Action Recognition: Jeonghyeok Do,

Munchurl Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Do_2025_ICCV, author = {Do, Jeonghyeok and Kim, Munchurl}, title = {Bridging the Skeleton-Text Modality Gap: Diffusion-Powered Modality Alignment for Zero-shot Skeleton-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12757-12768} }
MixANT: Observation-dependent Memory Propagation for Stochastic Dense Action Anticipation: Syed Talal Wasim,

Hamid Suleman,

Olga Zatsarynna,

Muzammal Naseer,

Juergen Gall; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wasim_2025_ICCV, author = {Wasim, Syed Talal and Suleman, Hamid and Zatsarynna, Olga and Naseer, Muzammal and Gall, Juergen}, title = {MixANT: Observation-dependent Memory Propagation for Stochastic Dense Action Anticipation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14613-14622} }
Blind Noisy Image Deblurring Using Residual Guidance Strategy: Heyan Liu,

Jianing Sun,

Jun Liu,

Xi-Le Zhao,

Tingting Wu,

Tieyong Zeng; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Heyan and Sun, Jianing and Liu, Jun and Zhao, Xi-Le and Wu, Tingting and Zeng, Tieyong}, title = {Blind Noisy Image Deblurring Using Residual Guidance Strategy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11016-11025} }
Unlocking the Potential of Diffusion Priors in Blind Face Restoration: Yunqi Miao,

Zhiyu Qu,

Mingqi Gao,

Changrui Chen,

Jifei Song,

Jungong Han,

Jiankang Deng; [pdf] [arXiv]
[bibtex]
@InProceedings{Miao_2025_ICCV, author = {Miao, Yunqi and Qu, Zhiyu and Gao, Mingqi and Chen, Changrui and Song, Jifei and Han, Jungong and Deng, Jiankang}, title = {Unlocking the Potential of Diffusion Priors in Blind Face Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13471-13480} }
ProbRes: Probabilistic Jump Diffusion for Open-World Egocentric Activity Recognition: Sanjoy Kundu,

Shanmukha Vellamcheti,

Sathyanarayanan N. Aakur; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kundu_2025_ICCV, author = {Kundu, Sanjoy and Vellamcheti, Shanmukha and Aakur, Sathyanarayanan N.}, title = {ProbRes: Probabilistic Jump Diffusion for Open-World Egocentric Activity Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14128-14140} }
CRAM: Large Scale Video Continual Learning with Bootstrapped Compression: Shivani Mall,

Joao F. Henriques; [pdf] [arXiv]
[bibtex]
@InProceedings{Mall_2025_ICCV, author = {Mall, Shivani and Henriques, Joao F.}, title = {CRAM: Large Scale Video Continual Learning with Bootstrapped Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15045-15055} }
DeSPITE: Exploring Contrastive Deep Skeleton-Pointcloud-IMU-Text Embeddings for Advanced Point Cloud Human Activity Understanding: Thomas Kreutz,

Max Mühlhäuser,

Alejandro Sanchez Guinea; [pdf] [supp]
[bibtex]
@InProceedings{Kreutz_2025_ICCV, author = {Kreutz, Thomas and M\"uhlh\"auser, Max and Guinea, Alejandro Sanchez}, title = {DeSPITE: Exploring Contrastive Deep Skeleton-Pointcloud-IMU-Text Embeddings for Advanced Point Cloud Human Activity Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14633-14643} }
Leveraging Panoptic Scene Graph for Evaluating Fine-Grained Text-to-Image Generation: Xueqing Deng,

Linjie Yang,

Qihang Yu,

Chenglin Yang,

Liang-Chieh Chen; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Xueqing and Yang, Linjie and Yu, Qihang and Yang, Chenglin and Chen, Liang-Chieh}, title = {Leveraging Panoptic Scene Graph for Evaluating Fine-Grained Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15107-15116} }
SpinMeRound: Consistent Multi-View Identity Generation Using Diffusion Models: Stathis Galanakis,

Alexandros Lattas,

Stylianos Moschoglou,

Bernhard Kainz,

Stefanos Zafeiriou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Galanakis_2025_ICCV, author = {Galanakis, Stathis and Lattas, Alexandros and Moschoglou, Stylianos and Kainz, Bernhard and Zafeiriou, Stefanos}, title = {SpinMeRound: Consistent Multi-View Identity Generation Using Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14346-14356} }
D3: Training-Free AI-Generated Video Detection Using Second-Order Features: Chende Zheng,

Ruiqi Suo,

Chenhao Lin,

Zhengyu Zhao,

Le Yang,

Shuai Liu,

Minghui Yang,

Cong Wang,

Chao Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Chende and Suo, Ruiqi and Lin, Chenhao and Zhao, Zhengyu and Yang, Le and Liu, Shuai and Yang, Minghui and Wang, Cong and Shen, Chao}, title = {D3: Training-Free AI-Generated Video Detection Using Second-Order Features}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12852-12862} }
Hybrid Layout Control for Diffusion Transformer: Fewer Annotations, Superior Aesthetics: Keming Wu,

Junwen Chen,

Zhanhao Liang,

Yinuo Wang,

Ji Li,

Chao Zhang,

Bin Wang,

Yuhui Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Keming and Chen, Junwen and Liang, Zhanhao and Wang, Yinuo and Li, Ji and Zhang, Chao and Wang, Bin and Yuan, Yuhui}, title = {Hybrid Layout Control for Diffusion Transformer: Fewer Annotations, Superior Aesthetics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17930-17940} }
EC-Flow: Enabling Versatile Robotic Manipulation from Action-Unlabeled Videos via Embodiment-Centric Flow: Yixiang Chen,

Peiyan Li,

Yan Huang,

Jiabing Yang,

Kehan Chen,

Liang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yixiang and Li, Peiyan and Huang, Yan and Yang, Jiabing and Chen, Kehan and Wang, Liang}, title = {EC-Flow: Enabling Versatile Robotic Manipulation from Action-Unlabeled Videos via Embodiment-Centric Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11958-11968} }
Omegance: A Single Parameter for Various Granularities in Diffusion-Based Synthesis: Xinyu Hou,

Zongsheng Yue,

Xiaoming Li,

Chen Change Loy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2025_ICCV, author = {Hou, Xinyu and Yue, Zongsheng and Li, Xiaoming and Loy, Chen Change}, title = {Omegance: A Single Parameter for Various Granularities in Diffusion-Based Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19353-19362} }
Frequency-Semantic Enhanced Variational Autoencoder for Zero-Shot Skeleton-based Action Recognition: Wenhan Wu,

Zhishuai Guo,

Chen Chen,

Hongfei Xue,

Aidong Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Wenhan and Guo, Zhishuai and Chen, Chen and Xue, Hongfei and Lu, Aidong}, title = {Frequency-Semantic Enhanced Variational Autoencoder for Zero-Shot Skeleton-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11122-11131} }
TryOn-Refiner: Conditional Rectified-flow-based TryOn Refiner for More Accurate Detail Reconstruction: Wen Qian; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2025_ICCV, author = {Qian, Wen}, title = {TryOn-Refiner: Conditional Rectified-flow-based TryOn Refiner for More Accurate Detail Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15669-15679} }
AM-Adapter: Appearance Matching Adapter for Exemplar-based Semantic Image Synthesis in-the-Wild: Siyoon Jin,

Jisu Nam,

Jiyoung Kim,

Dahyun Chung,

Yeong-Seok Kim,

Joonhyung Park,

Heonjeong Chu,

Seungryong Kim; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Siyoon and Nam, Jisu and Kim, Jiyoung and Chung, Dahyun and Kim, Yeong-Seok and Park, Joonhyung and Chu, Heonjeong and Kim, Seungryong}, title = {AM-Adapter: Appearance Matching Adapter for Exemplar-based Semantic Image Synthesis in-the-Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17077-17086} }
Trans-Adapter: A Plug-and-Play Framework for Transparent Image Inpainting: Yuekun Dai,

Haitian Li,

Shangchen Zhou,

Chen Change Loy; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2025_ICCV, author = {Dai, Yuekun and Li, Haitian and Zhou, Shangchen and Loy, Chen Change}, title = {Trans-Adapter: A Plug-and-Play Framework for Transparent Image Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15015-15024} }
MetaMorph: Multimodal Understanding and Generation via Instruction Tuning: Shengbang Tong,

David Fan,

Jiachen Li,

Yunyang Xiong,

Xinlei Chen,

Koustuv Sinha,

Michael Rabbat,

Yann LeCun,

Saining Xie,

Zhuang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Tong_2025_ICCV, author = {Tong, Shengbang and Fan, David and Li, Jiachen and Xiong, Yunyang and Chen, Xinlei and Sinha, Koustuv and Rabbat, Michael and LeCun, Yann and Xie, Saining and Liu, Zhuang}, title = {MetaMorph: Multimodal Understanding and Generation via Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17001-17012} }
Sculpting Memory: Multi-Concept Forgetting in Diffusion Models via Dynamic Mask and Concept-Aware Optimization: Gen Li,

Yang Xiao,

Jie Ji,

Kaiyuan Deng,

Bo Hui,

Linke Guo,

Xiaolong Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Gen and Xiao, Yang and Ji, Jie and Deng, Kaiyuan and Hui, Bo and Guo, Linke and Ma, Xiaolong}, title = {Sculpting Memory: Multi-Concept Forgetting in Diffusion Models via Dynamic Mask and Concept-Aware Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19659-19668} }
LLM Thought Divergence and Convergence for Dialogue-Based Image Generation Control: Hui Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Hui}, title = {LLM Thought Divergence and Convergence for Dialogue-Based Image Generation Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18101-18110} }
LATINO-PRO: LAtent consisTency INverse sOlver with PRompt Optimization: Alessio Spagnoletti,

Jean Prost,

Andrés Almansa,

Nicolas Papadakis,

Marcelo Pereyra; [pdf] [supp]
[bibtex]
@InProceedings{Spagnoletti_2025_ICCV, author = {Spagnoletti, Alessio and Prost, Jean and Almansa, Andr\'es and Papadakis, Nicolas and Pereyra, Marcelo}, title = {LATINO-PRO: LAtent consisTency INverse sOlver with PRompt Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19597-19607} }
Lightweight and Fast Real-time Image Enhancement via Decomposition of the Spatial-aware Lookup Tables: Wontae Kim,

Keuntek Lee,

Nam Ik Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Wontae and Lee, Keuntek and Cho, Nam Ik}, title = {Lightweight and Fast Real-time Image Enhancement via Decomposition of the Spatial-aware Lookup Tables}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11895-11905} }
Compression-Aware One-Step Diffusion Model for JPEG Artifact Removal: Jinpei Guo,

Zheng Chen,

Wenbo Li,

Yong Guo,

Yulun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Jinpei and Chen, Zheng and Li, Wenbo and Guo, Yong and Zhang, Yulun}, title = {Compression-Aware One-Step Diffusion Model for JPEG Artifact Removal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14930-14939} }
AnyPortal: Zero-Shot Consistent Video Background Replacement: Wenshuo Gao,

Xicheng Lan,

Shuai Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Wenshuo and Lan, Xicheng and Yang, Shuai}, title = {AnyPortal: Zero-Shot Consistent Video Background Replacement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18990-18999} }
Towards a Universal Image Degradation Model via Content-Degradation Disentanglement: Wenbo Yang,

Zhongling Wang,

Zhou Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Wenbo and Wang, Zhongling and Wang, Zhou}, title = {Towards a Universal Image Degradation Model via Content-Degradation Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12966-12975} }
Balanced Image Stylization with Style Matching Score: Yuxin Jiang,

Liming Jiang,

Shuai Yang,

Jia-Wei Liu,

Ivor W. Tsang,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Yuxin and Jiang, Liming and Yang, Shuai and Liu, Jia-Wei and Tsang, Ivor W. and Shou, Mike Zheng}, title = {Balanced Image Stylization with Style Matching Score}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17346-17355} }
MultiModal Action Conditioned Video Simulation: Yichen Li,

Antonio Torralba; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yichen and Torralba, Antonio}, title = {MultiModal Action Conditioned Video Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14173-14183} }
Imbalance in Balance: Online Concept Balancing in Generation Models: Yukai Shi,

Jiarong Ou,

Rui Chen,

Haotian Yang,

Jiahao Wang,

Xin Tao,

Pengfei Wan,

Di Zhang,

Kun Gai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Yukai and Ou, Jiarong and Chen, Rui and Yang, Haotian and Wang, Jiahao and Tao, Xin and Wan, Pengfei and Zhang, Di and Gai, Kun}, title = {Imbalance in Balance: Online Concept Balancing in Generation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17432-17442} }
Unleashing High-Quality Image Generation in Diffusion Sampling Using Second-Order Levenberg-Marquardt-Langevin: Fangyikang Wang,

Hubery Yin,

Lei Qian,

Yinan Li,

Shaobin Zhuang,

Huminhao Zhu,

Yilin Zhang,

Yanlong Tang,

Chao Zhang,

Hanbin Zhao,

Hui Qian,

Chen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Fangyikang and Yin, Hubery and Qian, Lei and Li, Yinan and Zhuang, Shaobin and Zhu, Huminhao and Zhang, Yilin and Tang, Yanlong and Zhang, Chao and Zhao, Hanbin and Qian, Hui and Li, Chen}, title = {Unleashing High-Quality Image Generation in Diffusion Sampling Using Second-Order Levenberg-Marquardt-Langevin}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10453-10464} }
SD2Actor: Continuous State Decomposition via Diffusion Embeddings for Robotic Manipulation: Jiayi Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jiayi}, title = {SD2Actor: Continuous State Decomposition via Diffusion Embeddings for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13751-13760} }
Free-Form Motion Control: Controlling the 6D Poses of Camera and Objects in Video Generation: Xincheng Shuai,

Henghui Ding,

Zhenyuan Qin,

Hao Luo,

Xingjun Ma,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shuai_2025_ICCV, author = {Shuai, Xincheng and Ding, Henghui and Qin, Zhenyuan and Luo, Hao and Ma, Xingjun and Tao, Dacheng}, title = {Free-Form Motion Control: Controlling the 6D Poses of Camera and Objects in Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12449-12458} }
Multi-Modal Few-Shot Temporal Action Segmentation: Zijia Lu,

Ehsan Elhamifar; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Zijia and Elhamifar, Ehsan}, title = {Multi-Modal Few-Shot Temporal Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14106-14116} }
What If: Understanding Motion Through Sparse Interactions: Stefan Andreas Baumann,

Nick Stracke,

Timy Phan,

Björn Ommer; [pdf] [supp]
[bibtex]
@InProceedings{Baumann_2025_ICCV, author = {Baumann, Stefan Andreas and Stracke, Nick and Phan, Timy and Ommer, Bj\"orn}, title = {What If: Understanding Motion Through Sparse Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10286-10296} }
SAMPLE: Semantic Alignment through Temporal-Adaptive Multimodal Prompt Learning for Event-Based Open-Vocabulary Action Recognition: Jing Wang,

Rui Zhao,

Ruiqin Xiong,

Xingtao Wang,

Xiaopeng Fan,

Tiejun Huang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jing and Zhao, Rui and Xiong, Ruiqin and Wang, Xingtao and Fan, Xiaopeng and Huang, Tiejun}, title = {SAMPLE: Semantic Alignment through Temporal-Adaptive Multimodal Prompt Learning for Event-Based Open-Vocabulary Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14409-14419} }
Semantic Watermarking Reinvented: Enhancing Robustness and Generation Quality with Fourier Integrity: Sung Ju Lee,

Nam Ik Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Sung Ju and Cho, Nam Ik}, title = {Semantic Watermarking Reinvented: Enhancing Robustness and Generation Quality with Fourier Integrity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18759-18769} }
CoMatch: Dynamic Covisibility-Aware Transformer for Bilateral Subpixel-Level Semi-Dense Image Matching: Zizhuo Li,

Yifan Lu,

Linfeng Tang,

Shihua Zhang,

Jiayi Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zizhuo and Lu, Yifan and Tang, Linfeng and Zhang, Shihua and Ma, Jiayi}, title = {CoMatch: Dynamic Covisibility-Aware Transformer for Bilateral Subpixel-Level Semi-Dense Image Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18521-18530} }
Memory-Efficient Generative Models via Product Quantization: Jie Shao,

Hanxiao Zhang,

Hao Yu,

Jianxin Wu; [pdf] [supp]
[bibtex]
@InProceedings{Shao_2025_ICCV, author = {Shao, Jie and Zhang, Hanxiao and Yu, Hao and Wu, Jianxin}, title = {Memory-Efficient Generative Models via Product Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16871-16881} }
Unsupervised Visible-Infrared Person Re-identification under Unpaired Settings: Haoyu Yao,

Bin Yang,

Wenke Huang,

Bo Du,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Yao_2025_ICCV, author = {Yao, Haoyu and Yang, Bin and Huang, Wenke and Du, Bo and Ye, Mang}, title = {Unsupervised Visible-Infrared Person Re-identification under Unpaired Settings}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11916-11926} }
Multimodal LLMs as Customized Reward Models for Text-to-Image Generation: Shijie Zhou,

Ruiyi Zhang,

Huaisheng Zhu,

Branislav Kveton,

Yufan Zhou,

Jiuxiang Gu,

Jian Chen,

Changyou Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Shijie and Zhang, Ruiyi and Zhu, Huaisheng and Kveton, Branislav and Zhou, Yufan and Gu, Jiuxiang and Chen, Jian and Chen, Changyou}, title = {Multimodal LLMs as Customized Reward Models for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19638-19648} }
Diving into the Fusion of Monocular Priors for Generalized Stereo Matching: Chengtang Yao,

Lidong Yu,

Zhidan Liu,

Jiaxi Zeng,

Yuwei Wu,

Yunde Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_ICCV, author = {Yao, Chengtang and Yu, Lidong and Liu, Zhidan and Zeng, Jiaxi and Wu, Yuwei and Jia, Yunde}, title = {Diving into the Fusion of Monocular Priors for Generalized Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14887-14897} }
RAGNet: Large-scale Reasoning-based Affordance Segmentation Benchmark towards General Grasping: Dongming Wu,

Yanping Fu,

Saike Huang,

Yingfei Liu,

Fan Jia,

Nian Liu,

Feng Dai,

Tiancai Wang,

Rao Muhammad Anwer,

Fahad Shahbaz Khan,

Jianbing Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Dongming and Fu, Yanping and Huang, Saike and Liu, Yingfei and Jia, Fan and Liu, Nian and Dai, Feng and Wang, Tiancai and Anwer, Rao Muhammad and Khan, Fahad Shahbaz and Shen, Jianbing}, title = {RAGNet: Large-scale Reasoning-based Affordance Segmentation Benchmark towards General Grasping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11980-11990} }
A3GS: Arbitrary Artistic Style into Arbitrary 3D Gaussian Splatting: Zhiyuan Fang,

Rengan Xie,

Xuancheng Jin,

Qi Ye,

Wei Chen,

Wenting Zheng,

Rui Wang,

Yuchi Huo; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Zhiyuan and Xie, Rengan and Jin, Xuancheng and Ye, Qi and Chen, Wei and Zheng, Wenting and Wang, Rui and Huo, Yuchi}, title = {A3GS: Arbitrary Artistic Style into Arbitrary 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17751-17760} }
SEREP: Semantic Facial Expression Representation for Robust In-the-Wild Capture and Retargeting: Arthur Josi,

Luiz Gustavo Hafemann,

Abdallah Dib,

Emeline Got,

Rafael M. O. Cruz,

Marc-André Carbonneau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Josi_2025_ICCV, author = {Josi, Arthur and Hafemann, Luiz Gustavo and Dib, Abdallah and Got, Emeline and Cruz, Rafael M. O. and Carbonneau, Marc-Andr\'e}, title = {SEREP: Semantic Facial Expression Representation for Robust In-the-Wild Capture and Retargeting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14538-14548} }
The Silent Assistant: NoiseQuery as Implicit Guidance for Goal-Driven Image Generation: Ruoyu Wang,

Huayang Huang,

Ye Zhu,

Olga Russakovsky,

Yu Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ruoyu and Huang, Huayang and Zhu, Ye and Russakovsky, Olga and Wu, Yu}, title = {The Silent Assistant: NoiseQuery as Implicit Guidance for Goal-Driven Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17618-17628} }
AnyBimanual: Transferring Unimanual Policy for General Bimanual Manipulation: Guanxing Lu,

Tengbo Yu,

Haoyuan Deng,

Season Si Chen,

Yansong Tang,

Ziwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Guanxing and Yu, Tengbo and Deng, Haoyuan and Chen, Season Si and Tang, Yansong and Wang, Ziwei}, title = {AnyBimanual: Transferring Unimanual Policy for General Bimanual Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13662-13672} }
FreeFlux: Understanding and Exploiting Layer-Specific Roles in RoPE-Based MMDiT for Versatile Image Editing: Tianyi Wei,

Yifan Zhou,

Dongdong Chen,

Xingang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Tianyi and Zhou, Yifan and Chen, Dongdong and Pan, Xingang}, title = {FreeFlux: Understanding and Exploiting Layer-Specific Roles in RoPE-Based MMDiT for Versatile Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16745-16754} }
Exploring Multimodal Diffusion Transformers for Enhanced Prompt-based Image Editing: Joonghyuk Shin,

Alchan Hwang,

Yujin Kim,

Daneul Kim,

Jaesik Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shin_2025_ICCV, author = {Shin, Joonghyuk and Hwang, Alchan and Kim, Yujin and Kim, Daneul and Park, Jaesik}, title = {Exploring Multimodal Diffusion Transformers for Enhanced Prompt-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19492-19502} }
IM-LUT: Interpolation Mixing Look-Up Tables for Image Super-Resolution: Sejin Park,

Sangmin Lee,

Kyong Hwan Jin,

Seung-Won Jung; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Sejin and Lee, Sangmin and Jin, Kyong Hwan and Jung, Seung-Won}, title = {IM-LUT: Interpolation Mixing Look-Up Tables for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14317-14325} }
TeEFusion: Blending Text Embeddings to Distill Classifier-Free Guidance: Minghao Fu,

Guo-Hua Wang,

Xiaohao Chen,

Qing-Guo Chen,

Zhao Xu,

Weihua Luo,

Kaifu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Minghao and Wang, Guo-Hua and Chen, Xiaohao and Chen, Qing-Guo and Xu, Zhao and Luo, Weihua and Zhang, Kaifu}, title = {TeEFusion: Blending Text Embeddings to Distill Classifier-Free Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16652-16661} }
LayerD: Decomposing Raster Graphic Designs into Layers: Tomoyuki Suzuki,

Kang-Jun Liu,

Naoto Inoue,

Kota Yamaguchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Suzuki_2025_ICCV, author = {Suzuki, Tomoyuki and Liu, Kang-Jun and Inoue, Naoto and Yamaguchi, Kota}, title = {LayerD: Decomposing Raster Graphic Designs into Layers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17783-17792} }
DexVLG: Dexterous Vision-Language-Grasp Model at Scale: Jiawei He,

Danshi Li,

Xinqiang Yu,

Zekun Qi,

Wenyao Zhang,

Jiayi Chen,

Zhaoxiang Zhang,

Zhizheng Zhang,

Li Yi,

He Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Jiawei and Li, Danshi and Yu, Xinqiang and Qi, Zekun and Zhang, Wenyao and Chen, Jiayi and Zhang, Zhaoxiang and Zhang, Zhizheng and Yi, Li and Wang, He}, title = {DexVLG: Dexterous Vision-Language-Grasp Model at Scale}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14248-14258} }
DeepShield: Fortifying Deepfake Video Detection with Local and Global Forgery Analysis: Yinqi Cai,

Jichang Li,

Zhaolun Li,

Weikai Chen,

Rushi Lan,

Xi Xie,

Xiaonan Luo,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2025_ICCV, author = {Cai, Yinqi and Li, Jichang and Li, Zhaolun and Chen, Weikai and Lan, Rushi and Xie, Xi and Luo, Xiaonan and Li, Guanbin}, title = {DeepShield: Fortifying Deepfake Video Detection with Local and Global Forgery Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12524-12534} }
Anchor Token Matching: Implicit Structure Locking for Training-free AR Image Editing: Taihang Hu,

Linxuan Li,

Kai Wang,

Yaxing Wang,

Jian Yang,

Ming-Ming Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Taihang and Li, Linxuan and Wang, Kai and Wang, Yaxing and Yang, Jian and Cheng, Ming-Ming}, title = {Anchor Token Matching: Implicit Structure Locking for Training-free AR Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18166-18176} }
FedMVP: Federated Multimodal Visual Prompt Tuning for Vision-Language Models: Mainak Singha,

Subhankar Roy,

Sarthak Mehrotra,

Ankit Jha,

Moloud Abdar,

Biplab Banerjee,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singha_2025_ICCV, author = {Singha, Mainak and Roy, Subhankar and Mehrotra, Sarthak and Jha, Ankit and Abdar, Moloud and Banerjee, Biplab and Ricci, Elisa}, title = {FedMVP: Federated Multimodal Visual Prompt Tuning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17869-17878} }
TIP-I2V: A Million-Scale Real Text and Image Prompt Dataset for Image-to-Video Generation: Wenhao Wang,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Wenhao and Yang, Yi}, title = {TIP-I2V: A Million-Scale Real Text and Image Prompt Dataset for Image-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14898-14908} }
From Reusing to Forecasting: Accelerating Diffusion Models with TaylorSeers: Jiacheng Liu,

Chang Zou,

Yuanhuiyi Lyu,

Junjie Chen,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Jiacheng and Zou, Chang and Lyu, Yuanhuiyi and Chen, Junjie and Zhang, Linfeng}, title = {From Reusing to Forecasting: Accelerating Diffusion Models with TaylorSeers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15853-15863} }
FICGen: Frequency-Inspired Contextual Disentanglement for Layout-driven Degraded Image Generation: Wenzhuang Wang,

Yifan Zhao,

Mingcan Ma,

Ming Liu,

Zhonglin Jiang,

Yong Chen,

Jia Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Wenzhuang and Zhao, Yifan and Ma, Mingcan and Liu, Ming and Jiang, Zhonglin and Chen, Yong and Li, Jia}, title = {FICGen: Frequency-Inspired Contextual Disentanglement for Layout-driven Degraded Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19097-19107} }
VACE: All-in-One Video Creation and Editing: Zeyinzi Jiang,

Zhen Han,

Chaojie Mao,

Jingfeng Zhang,

Yulin Pan,

Yu Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Zeyinzi and Han, Zhen and Mao, Chaojie and Zhang, Jingfeng and Pan, Yulin and Liu, Yu}, title = {VACE: All-in-One Video Creation and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17191-17202} }
ContextFace: Generating Facial Expressions from Emotional Contexts: Min-jung Kim,

Minsang Kim,

Seung Jun Baek; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Min-jung and Kim, Minsang and Baek, Seung Jun}, title = {ContextFace: Generating Facial Expressions from Emotional Contexts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11383-11392} }
Error Recognition in Procedural Videos using Generalized Task Graph: Shih-Po Lee,

Ehsan Elhamifar; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Shih-Po and Elhamifar, Ehsan}, title = {Error Recognition in Procedural Videos using Generalized Task Graph}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10009-10021} }
Reangle-A-Video: 4D Video Generation as Video-to-Video Translation: Hyeonho Jeong,

Suhyeon Lee,

Jong Chul Ye; [pdf] [supp]
[bibtex]
@InProceedings{Jeong_2025_ICCV, author = {Jeong, Hyeonho and Lee, Suhyeon and Ye, Jong Chul}, title = {Reangle-A-Video: 4D Video Generation as Video-to-Video Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11164-11175} }
DualReal: Adaptive Joint Training for Lossless Identity-Motion Fusion in Video Customization: Wenchuan Wang,

Mengqi Huang,

Yijing Tu,

Zhendong Mao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Wenchuan and Huang, Mengqi and Tu, Yijing and Mao, Zhendong}, title = {DualReal: Adaptive Joint Training for Lossless Identity-Motion Fusion in Video Customization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16565-16575} }
LDIP: Long Distance Information Propagation for Video Super-Resolution: Michael Bernasconi,

Abdelaziz Djelouah,

Yang Zhang,

Markus Gross,

Christopher Schroers; [pdf] [supp]
[bibtex]
@InProceedings{Bernasconi_2025_ICCV, author = {Bernasconi, Michael and Djelouah, Abdelaziz and Zhang, Yang and Gross, Markus and Schroers, Christopher}, title = {LDIP: Long Distance Information Propagation for Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11558-11567} }
Who Controls the Authorization? Invertible Networks for Copyright Protection in Text-to-Image Synthesis: Baoyue Hu,

Yang Wei,

Junhao Xiao,

Wendong Huang,

Xiuli Bi,

Bin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Baoyue and Wei, Yang and Xiao, Junhao and Huang, Wendong and Bi, Xiuli and Xiao, Bin}, title = {Who Controls the Authorization? Invertible Networks for Copyright Protection in Text-to-Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15832-15841} }
AnimeGamer: Infinite Anime Life Simulation with Next Game State Prediction: Junhao Cheng,

Yuying Ge,

Yixiao Ge,

Jing Liao,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Junhao and Ge, Yuying and Ge, Yixiao and Liao, Jing and Shan, Ying}, title = {AnimeGamer: Infinite Anime Life Simulation with Next Game State Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10875-10885} }
OmniPaint: Mastering Object-Oriented Editing via Disentangled Insertion-Removal Inpainting: Yongsheng Yu,

Ziyun Zeng,

Haitian Zheng,

Jiebo Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Yongsheng and Zeng, Ziyun and Zheng, Haitian and Luo, Jiebo}, title = {OmniPaint: Mastering Object-Oriented Editing via Disentangled Insertion-Removal Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17324-17334} }
A0: An Affordance-Aware Hierarchical Model for General Robotic Manipulation: Rongtao Xu,

Jian Zhang,

Minghao Guo,

Youpeng Wen,

Haoting Yang,

Min Lin,

Jianzheng Huang,

Zhe Li,

Kaidong Zhang,

Liqiong Wang,

Yuxuan Kuang,

Meng Cao,

Feng Zheng,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Rongtao and Zhang, Jian and Guo, Minghao and Wen, Youpeng and Yang, Haoting and Lin, Min and Huang, Jianzheng and Li, Zhe and Zhang, Kaidong and Wang, Liqiong and Kuang, Yuxuan and Cao, Meng and Zheng, Feng and Liang, Xiaodan}, title = {A0: An Affordance-Aware Hierarchical Model for General Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13491-13501} }
X-Prompt: Generalizable Auto-Regressive Visual Learning with In-Context Prompting: Zeyi Sun,

Ziyang Chu,

Pan Zhang,

Tong Wu,

Yuhang Zang,

Xiaoyi Dong,

Yuanjun Xiong,

Dahua Lin,

Jiaqi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Zeyi and Chu, Ziyang and Zhang, Pan and Wu, Tong and Zang, Yuhang and Dong, Xiaoyi and Xiong, Yuanjun and Lin, Dahua and Wang, Jiaqi}, title = {X-Prompt: Generalizable Auto-Regressive Visual Learning with In-Context Prompting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17268-17280} }
Early Timestep Zero-Shot Candidate Selection for Instruction-Guided Image Editing: Joowon Kim,

Ziseok Lee,

Donghyeon Cho,

Sanghyun Jo,

Yeonsung Jung,

Kyungsu Kim,

Eunho Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Joowon and Lee, Ziseok and Cho, Donghyeon and Jo, Sanghyun and Jung, Yeonsung and Kim, Kyungsu and Yang, Eunho}, title = {Early Timestep Zero-Shot Candidate Selection for Instruction-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18844-18854} }
Identity Preserving 3D Head Stylization with Multiview Score Distillation: Bahri Batuhan Bilecen,

Ahmet Berke Gökmen,

Furkan Guzelant,

Aysegul Dundar; [pdf] [supp]
[bibtex]
@InProceedings{Bilecen_2025_ICCV, author = {Bilecen, Bahri Batuhan and G\"okmen, Ahmet Berke and Guzelant, Furkan and Dundar, Aysegul}, title = {Identity Preserving 3D Head Stylization with Multiview Score Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12169-12179} }
Causal-Entity Reflected Egocentric Traffic Accident Video Synthesis: Lei-Lei Li,

Jianwu Fang,

Junbin Xiao,

Shanmin Pang,

Hongkai Yu,

Chen Lv,

Jianru Xue,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Lei-Lei and Fang, Jianwu and Xiao, Junbin and Pang, Shanmin and Yu, Hongkai and Lv, Chen and Xue, Jianru and Chua, Tat-Seng}, title = {Causal-Entity Reflected Egocentric Traffic Accident Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11208-11218} }
How Would It Sound? Material-Controlled Multimodal Acoustic Profile Generation for Indoor Scenes: Mahnoor Fatima Saad,

Ziad Al-Halah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saad_2025_ICCV, author = {Saad, Mahnoor Fatima and Al-Halah, Ziad}, title = {How Would It Sound? Material-Controlled Multimodal Acoustic Profile Generation for Indoor Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12232-12241} }
Teeth Reconstruction and Performance Capture Using a Phone Camera: Weixi Zheng,

Jingwang Ling,

Zhibo Wang,

Quan Wang,

Feng Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Weixi and Ling, Jingwang and Wang, Zhibo and Wang, Quan and Xu, Feng}, title = {Teeth Reconstruction and Performance Capture Using a Phone Camera}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9998-10008} }
VIGFace: Virtual Identity Generation for Privacy-Free Face Recognition Dataset: Minsoo Kim,

Min-Cheol Sagong,

Gi Pyo Nam,

Junghyun Cho,

Ig-Jae Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Minsoo and Sagong, Min-Cheol and Nam, Gi Pyo and Cho, Junghyun and Kim, Ig-Jae}, title = {VIGFace: Virtual Identity Generation for Privacy-Free Face Recognition Dataset}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10043-10053} }
MamTiff-CAD: Multi-Scale Latent Diffusion with Mamba+ for Complex Parametric Sequence: Liyuan Deng,

Yunpeng Bai,

Yongkang Dai,

Xiaoshui Huang,

Hongping Gan,

Dongshuo Huang,

Hao Jiacheng,

Yilei Shi; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Liyuan and Bai, Yunpeng and Dai, Yongkang and Huang, Xiaoshui and Gan, Hongping and Huang, Dongshuo and Jiacheng, Hao and Shi, Yilei}, title = {MamTiff-CAD: Multi-Scale Latent Diffusion with Mamba+ for Complex Parametric Sequence}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10517-10526} }
ReasonVQA: A Multi-hop Reasoning Benchmark with Structural Knowledge for Visual Question Answering: Duong T. Tran,

Trung-Kien Tran,

Manfred Hauswirth,

Danh Le Phuoc; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2025_ICCV, author = {Tran, Duong T. and Tran, Trung-Kien and Hauswirth, Manfred and Le Phuoc, Danh}, title = {ReasonVQA: A Multi-hop Reasoning Benchmark with Structural Knowledge for Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18793-18803} }
Rethinking DPO-style Diffusion Aligning Frameworks: Xun Wu,

Shaohan Huang,

Lingjie Jiang,

Furu Wei; [pdf]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Xun and Huang, Shaohan and Jiang, Lingjie and Wei, Furu}, title = {Rethinking DPO-style Diffusion Aligning Frameworks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18068-18077} }
VSC: Visual Search Compositional Text-to-Image Diffusion Model: Do Huu Dat,

Nam Hyeon-Woo,

Po-Yuan Mao,

Tae-Hyun Oh; [pdf] [supp]
[bibtex]
@InProceedings{Dat_2025_ICCV, author = {Dat, Do Huu and Hyeon-Woo, Nam and Mao, Po-Yuan and Oh, Tae-Hyun}, title = {VSC: Visual Search Compositional Text-to-Image Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19153-19162} }
PersonalVideo: High ID-Fidelity Video Customization without Dynamic and Semantic Degradation: Hengjia Li,

Haonan Qiu,

Shiwei Zhang,

Xiang Wang,

Yujie Wei,

Zekun Li,

Yingya Zhang,

Boxi Wu,

Deng Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Hengjia and Qiu, Haonan and Zhang, Shiwei and Wang, Xiang and Wei, Yujie and Li, Zekun and Zhang, Yingya and Wu, Boxi and Cai, Deng}, title = {PersonalVideo: High ID-Fidelity Video Customization without Dynamic and Semantic Degradation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19406-19416} }
MP-HSIR: A Multi-Prompt Framework for Universal Hyperspectral Image Restoration: Zhehui Wu,

Yong Chen,

Naoto Yokoya,

Wei He; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Zhehui and Chen, Yong and Yokoya, Naoto and He, Wei}, title = {MP-HSIR: A Multi-Prompt Framework for Universal Hyperspectral Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13009-13020} }
Learning A Unified Template for Gait Recognition: Panjian Huang,

Saihui Hou,

Junzhou Huang,

Yongzhen Huang; [pdf]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Panjian and Hou, Saihui and Huang, Junzhou and Huang, Yongzhen}, title = {Learning A Unified Template for Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12459-12469} }
REGEN: Learning Compact Video Embedding with (Re-)Generative Decoder: Yitian Zhang,

Long Mai,

Aniruddha Mahapatra,

David Bourgin,

Yicong Hong,

Jonah Casebeer,

Feng Liu,

Yun Fu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yitian and Mai, Long and Mahapatra, Aniruddha and Bourgin, David and Hong, Yicong and Casebeer, Jonah and Liu, Feng and Fu, Yun}, title = {REGEN: Learning Compact Video Embedding with (Re-)Generative Decoder}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18453-18462} }
DADet: Safeguarding Image Conditional Diffusion Models against Adversarial and Backdoor Attacks via Diffusion Anomaly Detection: Hongwei Yu,

Xinlong Ding,

Jiawei Li,

Jinlong Wang,

Yudong Zhang,

Rongquan Wang,

Huimin Ma,

Jiansheng Chen; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Hongwei and Ding, Xinlong and Li, Jiawei and Wang, Jinlong and Zhang, Yudong and Wang, Rongquan and Ma, Huimin and Chen, Jiansheng}, title = {DADet: Safeguarding Image Conditional Diffusion Models against Adversarial and Backdoor Attacks via Diffusion Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17411-17421} }
Inpaint4Drag: Repurposing Inpainting Models for Drag-Based Image Editing via Bidirectional Warping: Jingyi Lu,

Kai Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Jingyi and Han, Kai}, title = {Inpaint4Drag: Repurposing Inpainting Models for Drag-Based Image Editing via Bidirectional Warping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18304-18313} }
Transformed Low-rank Adaptation via Tensor Decomposition and Its Applications to Text-to-image Models: Zerui Tao,

Yuhta Takida,

Naoki Murata,

Qibin Zhao,

Yuki Mitsufuji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2025_ICCV, author = {Tao, Zerui and Takida, Yuhta and Murata, Naoki and Zhao, Qibin and Mitsufuji, Yuki}, title = {Transformed Low-rank Adaptation via Tensor Decomposition and Its Applications to Text-to-image Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16333-16344} }
MotionFollower: Editing Video Motion via Score-Guided Diffusion: Shuyuan Tu,

Qi Dai,

Zihao Zhang,

Sicheng Xie,

Zhi-Qi Cheng,

Chong Luo,

Xintong Han,

Zuxuan Wu,

Yu-Gang Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Tu_2025_ICCV, author = {Tu, Shuyuan and Dai, Qi and Zhang, Zihao and Xie, Sicheng and Cheng, Zhi-Qi and Luo, Chong and Han, Xintong and Wu, Zuxuan and Jiang, Yu-Gang}, title = {MotionFollower: Editing Video Motion via Score-Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12822-12831} }
Learning Few-Step Diffusion Models by Trajectory Distribution Matching: Yihong Luo,

Tianyang Hu,

Jiacheng Sun,

Yujun Cai,

Jing Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Yihong and Hu, Tianyang and Sun, Jiacheng and Cai, Yujun and Tang, Jing}, title = {Learning Few-Step Diffusion Models by Trajectory Distribution Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17719-17728} }
Certifiably Optimal Anisotropic Rotation Averaging: Carl Olsson,

Yaroslava Lochman,

Johan Malmport,

Christopher Zach; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Olsson_2025_ICCV, author = {Olsson, Carl and Lochman, Yaroslava and Malmport, Johan and Zach, Christopher}, title = {Certifiably Optimal Anisotropic Rotation Averaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14856-14865} }
TeRA: Rethinking Text-guided Realistic 3D Avatar Generation: Yanwen Wang,

Yiyu Zhuang,

Jiawei Zhang,

Li Wang,

Yifei Zeng,

Xun Cao,

Xinxin Zuo,

Hao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yanwen and Zhuang, Yiyu and Zhang, Jiawei and Wang, Li and Zeng, Yifei and Cao, Xun and Zuo, Xinxin and Zhu, Hao}, title = {TeRA: Rethinking Text-guided Realistic 3D Avatar Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10686-10697} }
UniGlyph: Unified Segmentation-Conditioned Diffusion for Precise Visual Text Synthesis: Yuanrui Wang,

Cong Han,

Yafei Li,

Zhipeng Jin,

Xiawei Li,

SiNan Du,

Wen Tao,

Shuanglong Li,

Yi Yang,

Chun Yuan,

Liu Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuanrui and Han, Cong and Li, Yafei and Jin, Zhipeng and Li, Xiawei and Du, SiNan and Tao, Wen and Li, Shuanglong and Yang, Yi and Yuan, Chun and Lin, Liu}, title = {UniGlyph: Unified Segmentation-Conditioned Diffusion for Precise Visual Text Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18335-18344} }
LA-MOTR: End-to-End Multi-Object Tracking by Learnable Association: Peng Wang,

Yongcai Wang,

Hualong Cao,

Wang Chen,

Deying Li; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Peng and Wang, Yongcai and Cao, Hualong and Chen, Wang and Li, Deying}, title = {LA-MOTR: End-to-End Multi-Object Tracking by Learnable Association}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12438-12448} }
OmniVTON: Training-Free Universal Virtual Try-On: Zhaotong Yang,

Yuhui Li,

Shengfeng He,

Xinzhe Li,

Yangyang Xu,

Junyu Dong,

Yong Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zhaotong and Li, Yuhui and He, Shengfeng and Li, Xinzhe and Xu, Yangyang and Dong, Junyu and Du, Yong}, title = {OmniVTON: Training-Free Universal Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16702-16711} }
InfGen: A Resolution-Agnostic Paradigm for Scalable Image Synthesis: Tao Han,

Wanghan Xu,

Junchao Gong,

Xiaoyu Yue,

Song Guo,

Luping Zhou,

Lei Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Tao and Xu, Wanghan and Gong, Junchao and Yue, Xiaoyu and Guo, Song and Zhou, Luping and Bai, Lei}, title = {InfGen: A Resolution-Agnostic Paradigm for Scalable Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17941-17950} }
PlanGen: Towards Unified Layout Planning and Image Generation in Auto-Regressive Vision Language Models: Runze He,

Bo Cheng,

Yuhang Ma,

Qingxiang Jia,

Shanyuan Liu,

Ao Ma,

Xiaoyu Wu,

Liebucha Wu,

Dawei Leng,

Yuhui Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Runze and Cheng, Bo and Ma, Yuhang and Jia, Qingxiang and Liu, Shanyuan and Ma, Ao and Wu, Xiaoyu and Wu, Liebucha and Leng, Dawei and Yin, Yuhui}, title = {PlanGen: Towards Unified Layout Planning and Image Generation in Auto-Regressive Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18143-18154} }
From Linearity to Non-Linearity: How Masked Autoencoders Capture Spatial Correlations: Anthony Bisulco,

Rahul Ramesh,

Randall Balestriero,

Pratik Chaudhari; [pdf] [supp]
[bibtex]
@InProceedings{Bisulco_2025_ICCV, author = {Bisulco, Anthony and Ramesh, Rahul and Balestriero, Randall and Chaudhari, Pratik}, title = {From Linearity to Non-Linearity: How Masked Autoencoders Capture Spatial Correlations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16441-16450} }
SliderSpace: Decomposing the Visual Capabilities of Diffusion Models: Rohit Gandikota,

Zongze Wu,

Richard Zhang,

David Bau,

Eli Shechtman,

Nick Kolkin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gandikota_2025_ICCV, author = {Gandikota, Rohit and Wu, Zongze and Zhang, Richard and Bau, David and Shechtman, Eli and Kolkin, Nick}, title = {SliderSpace: Decomposing the Visual Capabilities of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15994-16003} }
TrustMark: Robust Watermarking and Watermark Removal for Arbitrary Resolution Images: Tu Bui,

Shruti Agarwal,

John Collomosse; [pdf] [supp]
[bibtex]
@InProceedings{Bui_2025_ICCV, author = {Bui, Tu and Agarwal, Shruti and Collomosse, John}, title = {TrustMark: Robust Watermarking and Watermark Removal for Arbitrary Resolution Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18629-18639} }
TikZero: Zero-Shot Text-Guided Graphics Program Synthesis: Jonas Belouadi,

Eddy Ilg,

Margret Keuper,

Hideki Tanaka,

Masao Utiyama,

Raj Dabre,

Steffen Eger,

Simone Ponzetto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Belouadi_2025_ICCV, author = {Belouadi, Jonas and Ilg, Eddy and Keuper, Margret and Tanaka, Hideki and Utiyama, Masao and Dabre, Raj and Eger, Steffen and Ponzetto, Simone}, title = {TikZero: Zero-Shot Text-Guided Graphics Program Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17793-17806} }
FreeScale: Unleashing the Resolution of Diffusion Models via Tuning-Free Scale Fusion: Haonan Qiu,

Shiwei Zhang,

Yujie Wei,

Ruihang Chu,

Hangjie Yuan,

Xiang Wang,

Yingya Zhang,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2025_ICCV, author = {Qiu, Haonan and Zhang, Shiwei and Wei, Yujie and Chu, Ruihang and Yuan, Hangjie and Wang, Xiang and Zhang, Yingya and Liu, Ziwei}, title = {FreeScale: Unleashing the Resolution of Diffusion Models via Tuning-Free Scale Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16893-16903} }
TokenUnify: Scaling Up Autoregressive Pretraining for Neuron Segmentation: Yinda Chen,

Haoyuan Shi,

Xiaoyu Liu,

Te Shi,

Ruobing Zhang,

Dong Liu,

Zhiwei Xiong,

Feng Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yinda and Shi, Haoyuan and Liu, Xiaoyu and Shi, Te and Zhang, Ruobing and Liu, Dong and Xiong, Zhiwei and Wu, Feng}, title = {TokenUnify: Scaling Up Autoregressive Pretraining for Neuron Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13604-13613} }
Not All Degradations Are Equal: A Targeted Feature Denoising Framework for Generalizable Image Super-Resolution: Hongjun Wang,

Jiyuan Chen,

Zhengwei Yin,

Xuan Song,

Yinqiang Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Hongjun and Chen, Jiyuan and Yin, Zhengwei and Song, Xuan and Zheng, Yinqiang}, title = {Not All Degradations Are Equal: A Targeted Feature Denoising Framework for Generalizable Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14152-14161} }
TLB-VFI: Temporal-Aware Latent Brownian Bridge Diffusion for Video Frame Interpolation: Zonglin Lyu,

Chen Chen; [pdf] [supp]
[bibtex]
@InProceedings{Lyu_2025_ICCV, author = {Lyu, Zonglin and Chen, Chen}, title = {TLB-VFI: Temporal-Aware Latent Brownian Bridge Diffusion for Video Frame Interpolation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16260-16269} }
MeshPad: Interactive Sketch-Conditioned Artist-Reminiscent Mesh Generation and Editing: Haoxuan Li,

Ziya Erkoç,

Lei Li,

Daniele Sirigatti,

Vladislav Rosov,

Angela Dai,

Matthias Nießner; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Haoxuan and Erko\c{c}, Ziya and Li, Lei and Sirigatti, Daniele and Rosov, Vladislav and Dai, Angela and Nie{\ss}ner, Matthias}, title = {MeshPad: Interactive Sketch-Conditioned Artist-Reminiscent Mesh Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16227-16237} }
UniCombine: Unified Multi-Conditional Combination with Diffusion Transformer: Haoxuan Wang,

Jinlong Peng,

Qingdong He,

Hao Yang,

Ying Jin,

Jiafu Wu,

Xiaobin Hu,

Yanjie Pan,

Zhenye Gan,

Mingmin Chi,

Bo Peng,

Yabiao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haoxuan and Peng, Jinlong and He, Qingdong and Yang, Hao and Jin, Ying and Wu, Jiafu and Hu, Xiaobin and Pan, Yanjie and Gan, Zhenye and Chi, Mingmin and Peng, Bo and Wang, Yabiao}, title = {UniCombine: Unified Multi-Conditional Combination with Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18325-18334} }
Addressing Text Embedding Leakage in Diffusion-based Image Editing: Sunung Mun,

Jinhwan Nam,

Sunghyun Cho,

Jungseul Ok; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mun_2025_ICCV, author = {Mun, Sunung and Nam, Jinhwan and Cho, Sunghyun and Ok, Jungseul}, title = {Addressing Text Embedding Leakage in Diffusion-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16451-16460} }
Sequential Gaussian Avatars with Hierarchical Motion Context: Wangze Xu,

Yifan Zhan,

Zhihang Zhong,

Xiao Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Wangze and Zhan, Yifan and Zhong, Zhihang and Sun, Xiao}, title = {Sequential Gaussian Avatars with Hierarchical Motion Context}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13592-13603} }
Vulnerability-Aware Spatio-Temporal Learning for Generalizable Deepfake Video Detection: Dat Nguyen,

Marcella Astrid,

Anis Kacem,

Enjie Ghorbel,

Djamila Aouada; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Dat and Astrid, Marcella and Kacem, Anis and Ghorbel, Enjie and Aouada, Djamila}, title = {Vulnerability-Aware Spatio-Temporal Learning for Generalizable Deepfake Video Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10786-10796} }
Efficient Autoregressive Shape Generation via Octree-Based Adaptive Tokenization: Kangle Deng,

Hsueh-Ti Derek Liu,

Yiheng Zhu,

Xiaoxia Sun,

Chong Shang,

Kiran S. Bhat,

Deva Ramanan,

Jun-Yan Zhu,

Maneesh Agrawala,

Tinghui Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Kangle and Liu, Hsueh-Ti Derek and Zhu, Yiheng and Sun, Xiaoxia and Shang, Chong and Bhat, Kiran S. and Ramanan, Deva and Zhu, Jun-Yan and Agrawala, Maneesh and Zhou, Tinghui}, title = {Efficient Autoregressive Shape Generation via Octree-Based Adaptive Tokenization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11685-11696} }
Exploring Weather-aware Aggregation and Adaptation for Semantic Segmentation under Adverse Conditions: Yuwen Pan,

Rui Sun,

Wangkai Li,

Tianzhu Zhang; [pdf]
[bibtex]
@InProceedings{Pan_2025_ICCV, author = {Pan, Yuwen and Sun, Rui and Li, Wangkai and Zhang, Tianzhu}, title = {Exploring Weather-aware Aggregation and Adaptation for Semantic Segmentation under Adverse Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13952-13962} }
VEGGIE: Instructional Editing and Reasoning Video Concepts with Grounded Generation: Shoubin Yu,

Difan Liu,

Ziqiao Ma,

Yicong Hong,

Yang Zhou,

Hao Tan,

Joyce Chai,

Mohit Bansal; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Shoubin and Liu, Difan and Ma, Ziqiao and Hong, Yicong and Zhou, Yang and Tan, Hao and Chai, Joyce and Bansal, Mohit}, title = {VEGGIE: Instructional Editing and Reasoning Video Concepts with Grounded Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15147-15158} }
From Imitation to Innovation: The Emergence of AI's Unique Artistic Styles and the Challenge of Copyright Protection: Zexi Jia,

Chuanwei Huang,

Yeshuang Zhu,

Hongyan Fei,

Ying Deng,

Zhiqiang Yuan,

Jiapei Zhang,

Jinchao Zhang,

Jie Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2025_ICCV, author = {Jia, Zexi and Huang, Chuanwei and Zhu, Yeshuang and Fei, Hongyan and Deng, Ying and Yuan, Zhiqiang and Zhang, Jiapei and Zhang, Jinchao and Zhou, Jie}, title = {From Imitation to Innovation: The Emergence of AI's Unique Artistic Styles and the Challenge of Copyright Protection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18980-18989} }
VPO: Aligning Text-to-Video Generation Models with Prompt Optimization: Jiale Cheng,

Ruiliang Lyu,

Xiaotao Gu,

Xiao Liu,

Jiazheng Xu,

Yida Lu,

Jiayan Teng,

Zhuoyi Yang,

Yuxiao Dong,

Jie Tang,

Hongning Wang,

Minlie Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Jiale and Lyu, Ruiliang and Gu, Xiaotao and Liu, Xiao and Xu, Jiazheng and Lu, Yida and Teng, Jiayan and Yang, Zhuoyi and Dong, Yuxiao and Tang, Jie and Wang, Hongning and Huang, Minlie}, title = {VPO: Aligning Text-to-Video Generation Models with Prompt Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15636-15645} }
Versatile Transition Generation with Image-to-Video Diffusion: Zuhao Yang,

Jiahui Zhang,

Yingchen Yu,

Shijian Lu,

Song Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zuhao and Zhang, Jiahui and Yu, Yingchen and Lu, Shijian and Bai, Song}, title = {Versatile Transition Generation with Image-to-Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16981-16990} }
SMGDiff: Soccer Motion Generation using Diffusion Probabilistic Models: Hongdi Yang,

Chengyang Li,

Zhenxuan Wu,

Gaozheng Li,

Jingya Wang,

Jingyi Yu,

Zhuo Su,

Lan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Hongdi and Li, Chengyang and Wu, Zhenxuan and Li, Gaozheng and Wang, Jingya and Yu, Jingyi and Su, Zhuo and Xu, Lan}, title = {SMGDiff: Soccer Motion Generation using Diffusion Probabilistic Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11807-11817} }
Text Embedding Knows How to Quantize Text-Guided Diffusion Models: Hongjae Lee,

Myungjun Son,

Dongjea Kang,

Seung-Won Jung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Hongjae and Son, Myungjun and Kang, Dongjea and Jung, Seung-Won}, title = {Text Embedding Knows How to Quantize Text-Guided Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15426-15436} }
GUAVA: Generalizable Upper Body 3D Gaussian Avatar: Dongbin Zhang,

Yunfei Liu,

Lijian Lin,

Ye Zhu,

Yang Li,

Minghan Qin,

Yu Li,

Haoqian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Dongbin and Liu, Yunfei and Lin, Lijian and Zhu, Ye and Li, Yang and Qin, Minghan and Li, Yu and Wang, Haoqian}, title = {GUAVA: Generalizable Upper Body 3D Gaussian Avatar}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14205-14217} }
FastVAR: Linear Visual Autoregressive Modeling via Cached Token Pruning: Hang Guo,

Yawei Li,

Taolin Zhang,

Jiangshan Wang,

Tao Dai,

Shu-Tao Xia,

Luca Benini; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Hang and Li, Yawei and Zhang, Taolin and Wang, Jiangshan and Dai, Tao and Xia, Shu-Tao and Benini, Luca}, title = {FastVAR: Linear Visual Autoregressive Modeling via Cached Token Pruning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19011-19021} }
Online Generic Event Boundary Detection: Hyungrok Jung,

Daneul Kim,

Seunggyun Lim,

Jeany Son,

Jonghyun Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_ICCV, author = {Jung, Hyungrok and Kim, Daneul and Lim, Seunggyun and Son, Jeany and Choi, Jonghyun}, title = {Online Generic Event Boundary Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13741-13750} }
Decouple to Reconstruct: High Quality UHD Restoration via Active Feature Disentanglement and Reversible Fusion: Yidi Liu,

Dong Li,

Yuxin Ma,

Jie Huang,

Wenlong Zhang,

Xueyang Fu,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yidi and Li, Dong and Ma, Yuxin and Huang, Jie and Zhang, Wenlong and Fu, Xueyang and Zha, Zheng-Jun}, title = {Decouple to Reconstruct: High Quality UHD Restoration via Active Feature Disentanglement and Reversible Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11622-11631} }
Generating Multi-Image Synthetic Data for Text-to-Image Customization: Nupur Kumari,

Xi Yin,

Jun-Yan Zhu,

Ishan Misra,

Samaneh Azadi; [pdf] [arXiv]
[bibtex]
@InProceedings{Kumari_2025_ICCV, author = {Kumari, Nupur and Yin, Xi and Zhu, Jun-Yan and Misra, Ishan and Azadi, Samaneh}, title = {Generating Multi-Image Synthetic Data for Text-to-Image Customization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16524-16534} }
MR-FIQA: Face Image Quality Assessment with Multi-Reference Representations from Synthetic Data Generation: Fu-Zhao Ou,

Chongyi Li,

Shiqi Wang,

Sam Kwong; [pdf] [supp]
[bibtex]
@InProceedings{Ou_2025_ICCV, author = {Ou, Fu-Zhao and Li, Chongyi and Wang, Shiqi and Kwong, Sam}, title = {MR-FIQA: Face Image Quality Assessment with Multi-Reference Representations from Synthetic Data Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12915-12925} }
IDFace: Face Template Protection for Efficient and Secure Identification: Sunpill Kim,

Seunghun Paik,

Chanwoo Hwang,

Dongsoo Kim,

Junbum Shin,

Jae Hong Seo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Sunpill and Paik, Seunghun and Hwang, Chanwoo and Kim, Dongsoo and Shin, Junbum and Seo, Jae Hong}, title = {IDFace: Face Template Protection for Efficient and Secure Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13995-14005} }
D2ST-Adapter: Disentangled-and-Deformable Spatio-Temporal Adapter for Few-shot Action Recognition: Wenjie Pei,

Qizhong Tan,

Guangming Lu,

Jiandong Tian,

Jun Yu; [pdf] [supp]
[bibtex]
@InProceedings{Pei_2025_ICCV, author = {Pei, Wenjie and Tan, Qizhong and Lu, Guangming and Tian, Jiandong and Yu, Jun}, title = {D2ST-Adapter: Disentangled-and-Deformable Spatio-Temporal Adapter for Few-shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11317-11326} }
Synchronization of Multiple Videos: Avihai Naaman,

Ron Shapira Weber,

Oren Freifeld; [pdf] [supp]
[bibtex]
@InProceedings{Naaman_2025_ICCV, author = {Naaman, Avihai and Weber, Ron Shapira and Freifeld, Oren}, title = {Synchronization of Multiple Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12514-12523} }
MotionAgent: Fine-grained Controllable Video Generation via Motion Field Agent: Xinyao Liao,

Xianfang Zeng,

Liao Wang,

Gang Yu,

Guosheng Lin,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Xinyao and Zeng, Xianfang and Wang, Liao and Yu, Gang and Lin, Guosheng and Zhang, Chi}, title = {MotionAgent: Fine-grained Controllable Video Generation via Motion Field Agent}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11305-11316} }
OpenAnimals: Revisiting Person Re-Identification for Animals Towards Better Generalization: Saihui Hou,

Panjian Huang,

Zengbin Wang,

Yuan Liu,

Zeyu Li,

Man Zhang,

Yongzhen Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2025_ICCV, author = {Hou, Saihui and Huang, Panjian and Wang, Zengbin and Liu, Yuan and Li, Zeyu and Zhang, Man and Huang, Yongzhen}, title = {OpenAnimals: Revisiting Person Re-Identification for Animals Towards Better Generalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14369-14379} }
VSRM: A Robust Mamba-Based Framework for Video Super-Resolution: Dinh Phu Tran,

Dao Duy Hung,

Daeyoung Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2025_ICCV, author = {Tran, Dinh Phu and Hung, Dao Duy and Kim, Daeyoung}, title = {VSRM: A Robust Mamba-Based Framework for Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14711-14721} }
EEdit : Rethinking the Spatial and Temporal Redundancy for Efficient Image Editing: Zexuan Yan,

Yue Ma,

Chang Zou,

Wenteng Chen,

Qifeng Chen,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Zexuan and Ma, Yue and Zou, Chang and Chen, Wenteng and Chen, Qifeng and Zhang, Linfeng}, title = {EEdit : Rethinking the Spatial and Temporal Redundancy for Efficient Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17474-17484} }
SuperEdit: Rectifying and Facilitating Supervision for Instruction-Based Image Editing: Ming Li,

Xin Gu,

Fan Chen,

Xiaoying Xing,

Longyin Wen,

Chen Chen,

Sijie Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Ming and Gu, Xin and Chen, Fan and Xing, Xiaoying and Wen, Longyin and Chen, Chen and Zhu, Sijie}, title = {SuperEdit: Rectifying and Facilitating Supervision for Instruction-Based Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19206-19215} }
HPSv3: Towards Wide-Spectrum Human Preference Score: Yuhang Ma,

Xiaoshi Wu,

Keqiang Sun,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Yuhang and Wu, Xiaoshi and Sun, Keqiang and Li, Hongsheng}, title = {HPSv3: Towards Wide-Spectrum Human Preference Score}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15086-15095} }
NullSwap: Proactive Identity Cloaking Against Deepfake Face Swapping: Tianyi Wang,

Shuaicheng Niu,

Harry Cheng,

Xiao Zhang,

Yinglong Wang; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Tianyi and Niu, Shuaicheng and Cheng, Harry and Zhang, Xiao and Wang, Yinglong}, title = {NullSwap: Proactive Identity Cloaking Against Deepfake Face Swapping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9945-9954} }
VertexRegen: Mesh Generation with Continuous Level of Detail: Xiang Zhang,

Yawar Siddiqui,

Armen Avetisyan,

Chris Xie,

Jakob Engel,

Henry Howard-Jenkins; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiang and Siddiqui, Yawar and Avetisyan, Armen and Xie, Chris and Engel, Jakob and Howard-Jenkins, Henry}, title = {VertexRegen: Mesh Generation with Continuous Level of Detail}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12570-12580} }
FontAnimate: High Quality Few-shot Font Generation via Animating Font Transfer Process: Bin Fu,

Zixuan Wang,

Kainan Yan,

Shitian Zhao,

Qi Qin,

Jie Wen,

Junjun He,

Peng Gao; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Bin and Wang, Zixuan and Yan, Kainan and Zhao, Shitian and Qin, Qi and Wen, Jie and He, Junjun and Gao, Peng}, title = {FontAnimate: High Quality Few-shot Font Generation via Animating Font Transfer Process}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16015-16025} }
GenFlowRL: Shaping Rewards with Generative Object-Centric Flow in Visual Reinforcement Learning: Kelin Yu,

Sheng Zhang,

Harshit Soora,

Furong Huang,

Heng Huang,

Pratap Tokekar,

Ruohan Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Kelin and Zhang, Sheng and Soora, Harshit and Huang, Furong and Huang, Heng and Tokekar, Pratap and Gao, Ruohan}, title = {GenFlowRL: Shaping Rewards with Generative Object-Centric Flow in Visual Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13183-13192} }
MAVFlow: Preserving Paralinguistic Elements with Conditional Flow Matching for Zero-Shot AV2AV Multilingual Translation: Sungwoo Cho,

Jeongsoo Choi,

Sungnyun Kim,

Se-Young Yun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cho_2025_ICCV, author = {Cho, Sungwoo and Choi, Jeongsoo and Kim, Sungnyun and Yun, Se-Young}, title = {MAVFlow: Preserving Paralinguistic Elements with Conditional Flow Matching for Zero-Shot AV2AV Multilingual Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13151-13161} }
Video Color Grading via Look-Up Table Generation: Seunghyun Shin,

Dongmin Shin,

Jisu Shin,

Hae-Gon Jeon,

Joon-Young Lee; [pdf] [arXiv]
[bibtex]
@InProceedings{Shin_2025_ICCV, author = {Shin, Seunghyun and Shin, Dongmin and Shin, Jisu and Jeon, Hae-Gon and Lee, Joon-Young}, title = {Video Color Grading via Look-Up Table Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19141-19152} }
DC-AR: Efficient Masked Autoregressive Image Generation with Deep Compression Hybrid Tokenizer: Yecheng Wu,

Han Cai,

Junyu Chen,

Zhuoyang Zhang,

Enze Xie,

Jincheng Yu,

Junsong Chen,

Jinyi Hu,

Yao Lu,

Song Han; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Yecheng and Cai, Han and Chen, Junyu and Zhang, Zhuoyang and Xie, Enze and Yu, Jincheng and Chen, Junsong and Hu, Jinyi and Lu, Yao and Han, Song}, title = {DC-AR: Efficient Masked Autoregressive Image Generation with Deep Compression Hybrid Tokenizer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18034-18045} }
BVINet: Unlocking Blind Video Inpainting with Zero Annotations: Zhiliang Wu,

Kerui Chen,

Kun Li,

Hehe Fan,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Zhiliang and Chen, Kerui and Li, Kun and Fan, Hehe and Yang, Yi}, title = {BVINet: Unlocking Blind Video Inpainting with Zero Annotations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14017-14027} }
Video-T1: Test-time Scaling for Video Generation: Fangfu Liu,

Hanyang Wang,

Yimo Cai,

Kaiyan Zhang,

Xiaohang Zhan,

Yueqi Duan; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Fangfu and Wang, Hanyang and Cai, Yimo and Zhang, Kaiyan and Zhan, Xiaohang and Duan, Yueqi}, title = {Video-T1: Test-time Scaling for Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18671-18681} }
MemoryTalker: Personalized Speech-Driven 3D Facial Animation via Audio-Guided Stylization: Hyung Kyu Kim,

Sangmin Lee,

Hak Gu Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Hyung Kyu and Lee, Sangmin and Kim, Hak Gu}, title = {MemoryTalker: Personalized Speech-Driven 3D Facial Animation via Audio-Guided Stylization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11241-11251} }
Text2Outfit: Controllable Outfit Generation with Multimodal Language Models: Yuanhao Zhai,

Yen-Liang Lin,

Minxu Peng,

Larry S. Davis,

Ashwin Chandramouli,

Junsong Yuan,

David Doermann; [pdf] [supp]
[bibtex]
@InProceedings{Zhai_2025_ICCV, author = {Zhai, Yuanhao and Lin, Yen-Liang and Peng, Minxu and Davis, Larry S. and Chandramouli, Ashwin and Yuan, Junsong and Doermann, David}, title = {Text2Outfit: Controllable Outfit Generation with Multimodal Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16165-16174} }
HairCUP: Hair Compositional Universal Prior for 3D Gaussian Avatars: Byungjun Kim,

Shunsuke Saito,

Giljoo Nam,

Tomas Simon,

Jason Saragih,

Hanbyul Joo,

Junxuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Byungjun and Saito, Shunsuke and Nam, Giljoo and Simon, Tomas and Saragih, Jason and Joo, Hanbyul and Li, Junxuan}, title = {HairCUP: Hair Compositional Universal Prior for 3D Gaussian Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9966-9976} }
Temporal Unlearnable Examples: Preventing Personal Video Data from Unauthorized Exploitation by Object Tracking: Qiangqiang Wu,

Yi Yu,

Chenqi Kong,

Ziquan Liu,

Jia Wan,

Haoliang Li,

Alex C. Kot,

Antoni B. Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Qiangqiang and Yu, Yi and Kong, Chenqi and Liu, Ziquan and Wan, Jia and Li, Haoliang and Kot, Alex C. and Chan, Antoni B.}, title = {Temporal Unlearnable Examples: Preventing Personal Video Data from Unauthorized Exploitation by Object Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11110-11121} }
StyleKeeper: Prevent Content Leakage using Negative Visual Query Guidance: Jaeseok Jeong,

Junho Kim,

Gayoung Lee,

Yunjey Choi,

Youngjung Uh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_ICCV, author = {Jeong, Jaeseok and Kim, Junho and Lee, Gayoung and Choi, Yunjey and Uh, Youngjung}, title = {StyleKeeper: Prevent Content Leakage using Negative Visual Query Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15760-15769} }
Frequency-Guided Diffusion for Training-Free Text-Driven Image Translation: Zheng Gao,

Jifei Song,

Zhensong Zhang,

Jiankang Deng,

Ioannis Patras; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Zheng and Song, Jifei and Zhang, Zhensong and Deng, Jiankang and Patras, Ioannis}, title = {Frequency-Guided Diffusion for Training-Free Text-Driven Image Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19195-19205} }
MotionShot: Adaptive Motion Transfer across Arbitrary Objects for Text-to-Video Generation: Yanchen Liu,

Yanan Sun,

Zhening Xing,

Junyao Gao,

Kai Chen,

Wenjie Pei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yanchen and Sun, Yanan and Xing, Zhening and Gao, Junyao and Chen, Kai and Pei, Wenjie}, title = {MotionShot: Adaptive Motion Transfer across Arbitrary Objects for Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11861-11871} }
STIV: Scalable Text and Image Conditioned Video Generation: Zongyu Lin,

Wei Liu,

Chen Chen,

Jiasen Lu,

Wenze Hu,

Tsu-Jui Fu,

Jesse Allardice,

Zhengfeng Lai,

Liangchen Song,

Bowen Zhang,

Cha Chen,

Yiran Fei,

Lezhi Li,

Yinfei Yang,

Yizhou Sun,

Kai-Wei Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Zongyu and Liu, Wei and Chen, Chen and Lu, Jiasen and Hu, Wenze and Fu, Tsu-Jui and Allardice, Jesse and Lai, Zhengfeng and Song, Liangchen and Zhang, Bowen and Chen, Cha and Fei, Yiran and Li, Lezhi and Yang, Yinfei and Sun, Yizhou and Chang, Kai-Wei}, title = {STIV: Scalable Text and Image Conditioned Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16249-16259} }
Repurposing 2D Diffusion Models with Gaussian Atlas for 3D Generation: Tiange Xiang,

Kai Li,

Chengjiang Long,

Christian Häne,

Peihong Guo,

Scott Delp,

Ehsan Adeli,

Li Fei-Fei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2025_ICCV, author = {Xiang, Tiange and Li, Kai and Long, Chengjiang and H\"ane, Christian and Guo, Peihong and Delp, Scott and Adeli, Ehsan and Fei-Fei, Li}, title = {Repurposing 2D Diffusion Models with Gaussian Atlas for 3D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16492-16502} }
Generating, Fast and Slow: Scalable Parallel Video Generation with Video Interface Networks: Bhishma Dedhia,

David Bourgin,

Krishna Kumar Singh,

Yuheng Li,

Yan Kang,

Zhan Xu,

Niraj K. Jha,

Yuchen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dedhia_2025_ICCV, author = {Dedhia, Bhishma and Bourgin, David and Singh, Krishna Kumar and Li, Yuheng and Kang, Yan and Xu, Zhan and Jha, Niraj K. and Liu, Yuchen}, title = {Generating, Fast and Slow: Scalable Parallel Video Generation with Video Interface Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15385-15394} }
Guiding Noisy Label Conditional Diffusion Models with Score-based Discriminator Correction: Dat Nguyen Cong,

Hieu Tran Bao,

Tung Hoang-Thanh; [pdf] [supp]
[bibtex]
@InProceedings{Cong_2025_ICCV, author = {Cong, Dat Nguyen and Bao, Hieu Tran and Hoang-Thanh, Tung}, title = {Guiding Noisy Label Conditional Diffusion Models with Score-based Discriminator Correction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18531-18541} }
IAP: Invisible Adversarial Patch Attack through Perceptibility-Aware Localization and Perturbation Optimization: Subrat Kishore Dutta,

Xiao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dutta_2025_ICCV, author = {Dutta, Subrat Kishore and Zhang, Xiao}, title = {IAP: Invisible Adversarial Patch Attack through Perceptibility-Aware Localization and Perturbation Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14766-14775} }
InterSyn: Interleaved Learning for Dynamic Motion Synthesis in the Wild: Yiyi Ma,

Yuanzhi Liang,

Xiu Li,

Chi Zhang,

Xuelong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Yiyi and Liang, Yuanzhi and Li, Xiu and Zhang, Chi and Li, Xuelong}, title = {InterSyn: Interleaved Learning for Dynamic Motion Synthesis in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12832-12841} }
SegmentDreamer: Towards High-fidelity Text-to-3D Synthesis with Segmented Consistency Trajectory Distillation: Jiahao Zhu,

Zixuan Chen,

Guangcong Wang,

Xiaohua Xie,

Yi Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jiahao and Chen, Zixuan and Wang, Guangcong and Xie, Xiaohua and Zhou, Yi}, title = {SegmentDreamer: Towards High-fidelity Text-to-3D Synthesis with Segmented Consistency Trajectory Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15864-15874} }
MaTe: Images Are All You Need for Material Transfer via Diffusion Transformer: Nisha Huang,

Henglin Liu,

Yizhou Lin,

Kaer Huang,

Chubin Chen,

Jie Guo,

Tong-yee Lee,

Xiu Li; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Nisha and Liu, Henglin and Lin, Yizhou and Huang, Kaer and Chen, Chubin and Guo, Jie and Lee, Tong-yee and Li, Xiu}, title = {MaTe: Images Are All You Need for Material Transfer via Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15117-15126} }
STAR: Spatial-Temporal Augmentation with Text-to-Video Models for Real-World Video Super-Resolution: Rui Xie,

Yinhong Liu,

Penghao Zhou,

Chen Zhao,

Jun Zhou,

Kai Zhang,

Zhenyu Zhang,

Jian Yang,

Zhenheng Yang,

Ying Tai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Rui and Liu, Yinhong and Zhou, Penghao and Zhao, Chen and Zhou, Jun and Zhang, Kai and Zhang, Zhenyu and Yang, Jian and Yang, Zhenheng and Tai, Ying}, title = {STAR: Spatial-Temporal Augmentation with Text-to-Video Models for Real-World Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17108-17118} }
V.I.P. : Iterative Online Preference Distillation for Efficient Video Diffusion Models: Jisoo Kim,

Wooseok Seo,

Junwan Kim,

Seungho Park,

Sooyeon Park,

Youngjae Yu; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jisoo and Seo, Wooseok and Kim, Junwan and Park, Seungho and Park, Sooyeon and Yu, Youngjae}, title = {V.I.P. : Iterative Online Preference Distillation for Efficient Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17235-17245} }
Domain Generalizable Portrait Style Transfer: Xinbo Wang,

Wenju Xu,

Qing Zhang,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xinbo and Xu, Wenju and Zhang, Qing and Zheng, Wei-Shi}, title = {Domain Generalizable Portrait Style Transfer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15802-15811} }
Devil is in the Uniformity: Exploring Diverse Learners within Transformer for Image Restoration: Shihao Zhou,

Dayu Li,

Jinshan Pan,

Juncheng Zhou,

Jinglei Shi,

Jufeng Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Shihao and Li, Dayu and Pan, Jinshan and Zhou, Juncheng and Shi, Jinglei and Yang, Jufeng}, title = {Devil is in the Uniformity: Exploring Diverse Learners within Transformer for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12307-12317} }
GDKVM: Echocardiography Video Segmentation via Spatiotemporal Key-Value Memory with Gated Delta Rule: Rui Wang,

Yimu Sun,

Jingxing Guo,

Huisi Wu,

Jing Qin; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Rui and Sun, Yimu and Guo, Jingxing and Wu, Huisi and Qin, Jing}, title = {GDKVM: Echocardiography Video Segmentation via Spatiotemporal Key-Value Memory with Gated Delta Rule}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12191-12200} }
DanceEditor: Towards Iterative Editable Music-driven Dance Generation with Open-Vocabulary Descriptions: Hengyuan Zhang,

Zhe Li,

Xingqun Qi,

Mengze Li,

Muyi Sun,

Siye Wang,

Man Zhang,

Sirui Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hengyuan and Li, Zhe and Qi, Xingqun and Li, Mengze and Sun, Muyi and Wang, Siye and Zhang, Man and Han, Sirui}, title = {DanceEditor: Towards Iterative Editable Music-driven Dance Generation with Open-Vocabulary Descriptions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12158-12168} }
AnimalClue: Recognizing Animals by their Traces: Risa Shinoda,

Nakamasa Inoue,

Iro Laina,

Christian Rupprecht,

Hirokatsu Kataoka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shinoda_2025_ICCV, author = {Shinoda, Risa and Inoue, Nakamasa and Laina, Iro and Rupprecht, Christian and Kataoka, Hirokatsu}, title = {AnimalClue: Recognizing Animals by their Traces}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14776-14786} }
LazyMAR: Accelerating Masked Autoregressive Models via Feature Caching: Feihong Yan,

Qingyan Wei,

Jiayi Tang,

Jiajun Li,

Yulin Wang,

Xuming Hu,

Huiqi Li,

Linfeng Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Feihong and Wei, Qingyan and Tang, Jiayi and Li, Jiajun and Wang, Yulin and Hu, Xuming and Li, Huiqi and Zhang, Linfeng}, title = {LazyMAR: Accelerating Masked Autoregressive Models via Feature Caching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15552-15561} }
Multimodal Latent Diffusion Model for Complex Sewing Pattern Generation: Shengqi Liu,

Yuhao Cheng,

Zhuo Chen,

Xingyu Ren,

Wenhan Zhu,

Lincheng Li,

Mengxiao Bi,

Xiaokang Yang,

Yichao Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Shengqi and Cheng, Yuhao and Chen, Zhuo and Ren, Xingyu and Zhu, Wenhan and Li, Lincheng and Bi, Mengxiao and Yang, Xiaokang and Yan, Yichao}, title = {Multimodal Latent Diffusion Model for Complex Sewing Pattern Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17640-17650} }
GGTalker: Talking Head Systhesis with Generalizable Gaussian Priors and Identity-Specific Adaptation: Wentao Hu,

Shunkai Li,

Ziqiao Peng,

Haoxian Zhang,

Fan Shi,

Xiaoqiang Liu,

Pengfei Wan,

Di Zhang,

Hui Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Wentao and Li, Shunkai and Peng, Ziqiao and Zhang, Haoxian and Shi, Fan and Liu, Xiaoqiang and Wan, Pengfei and Zhang, Di and Tian, Hui}, title = {GGTalker: Talking Head Systhesis with Generalizable Gaussian Priors and Identity-Specific Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10108-10117} }
Make Me Happier: Evoking Emotions Through Image Diffusion Models: Qing Lin,

Jingfeng Zhang,

Yew-Soon Ong,

Mengmi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Qing and Zhang, Jingfeng and Ong, Yew-Soon and Zhang, Mengmi}, title = {Make Me Happier: Evoking Emotions Through Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16367-16376} }
DiTFastAttnV2: Head-wise Attention Compression for Multi-Modality Diffusion Transformers: Hanling Zhang,

Rundong Su,

Zhihang Yuan,

Pengtao Chen,

Mingzhu Shen,

Yibo Fan,

Shengen Yan,

Guohao Dai,

Yu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hanling and Su, Rundong and Yuan, Zhihang and Chen, Pengtao and Shen, Mingzhu and Fan, Yibo and Yan, Shengen and Dai, Guohao and Wang, Yu}, title = {DiTFastAttnV2: Head-wise Attention Compression for Multi-Modality Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16399-16409} }
GAS: Generative Avatar Synthesis from a Single Image: Yixing Lu,

Junting Dong,

Youngjoong Kwon,

Qin Zhao,

Bo Dai,

Fernando De la Torre; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Yixing and Dong, Junting and Kwon, Youngjoong and Zhao, Qin and Dai, Bo and De la Torre, Fernando}, title = {GAS: Generative Avatar Synthesis from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12883-12893} }
GenM3: Generative Pretrained Multi-path Motion Model for Text Conditional Human Motion Generation: Junyu Shi,

Lijiang Liu,

Yong Sun,

Zhiyuan Zhang,

Jinni Zhou,

Qiang Nie; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Junyu and Liu, Lijiang and Sun, Yong and Zhang, Zhiyuan and Zhou, Jinni and Nie, Qiang}, title = {GenM3: Generative Pretrained Multi-path Motion Model for Text Conditional Human Motion Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13129-13139} }
OCR Hinders RAG: Evaluating the Cascading Impact of OCR on Retrieval-Augmented Generation: Junyuan Zhang,

Qintong Zhang,

Bin Wang,

Linke Ouyang,

Zichen Wen,

Ying Li,

Ka-Ho Chow,

Conghui He,

Wentao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Junyuan and Zhang, Qintong and Wang, Bin and Ouyang, Linke and Wen, Zichen and Li, Ying and Chow, Ka-Ho and He, Conghui and Zhang, Wentao}, title = {OCR Hinders RAG: Evaluating the Cascading Impact of OCR on Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17443-17453} }
Ponimator: Unfolding Interactive Pose for Versatile Human-human Interaction Animation: Shaowei Liu,

Chuan Guo,

Bing Zhou,

Jian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Shaowei and Guo, Chuan and Zhou, Bing and Wang, Jian}, title = {Ponimator: Unfolding Interactive Pose for Versatile Human-human Interaction Animation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12068-12077} }
Streamlining Image Editing with Layered Diffusion Brushes: Peyman Gholami,

Robert Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gholami_2025_ICCV, author = {Gholami, Peyman and Xiao, Robert}, title = {Streamlining Image Editing with Layered Diffusion Brushes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17368-17378} }
ObjectMate: A Recurrence Prior for Object Insertion and Subject-Driven Generation: Daniel Winter,

Asaf Shul,

Matan Cohen,

Dana Berman,

Yael Pritch,

Alex Rav-Acha,

Yedid Hoshen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Winter_2025_ICCV, author = {Winter, Daniel and Shul, Asaf and Cohen, Matan and Berman, Dana and Pritch, Yael and Rav-Acha, Alex and Hoshen, Yedid}, title = {ObjectMate: A Recurrence Prior for Object Insertion and Subject-Driven Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16281-16291} }
CycleVAR: Repurposing Autoregressive Model for Unsupervised One-Step Image Translation: Yi Liu,

Shengqian Li,

Zuzeng Lin,

Feng Wang,

Si Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yi and Li, Shengqian and Lin, Zuzeng and Wang, Feng and Liu, Si}, title = {CycleVAR: Repurposing Autoregressive Model for Unsupervised One-Step Image Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15194-15204} }
Synthetic Video Enhances Physical Fidelity in Video Synthesis: Qi Zhao,

Xingyu Ni,

Ziyu Wang,

Feng Cheng,

Ziyan Yang,

Lu Jiang,

Bohan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Qi and Ni, Xingyu and Wang, Ziyu and Cheng, Feng and Yang, Ziyan and Jiang, Lu and Wang, Bohan}, title = {Synthetic Video Enhances Physical Fidelity in Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12135-12146} }
Ultra High-Resolution Image Inpainting with Patch-Based Content Consistency Adapter: Jianhui Zhang,

Shen Cheng,

Qirui Sun,

Jia Liu,

Wang Luyang,

Chaoyu Feng,

Chen Fang,

Lei Lei,

Jue Wang,

Shuaicheng Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jianhui and Cheng, Shen and Sun, Qirui and Liu, Jia and Luyang, Wang and Feng, Chaoyu and Fang, Chen and Lei, Lei and Wang, Jue and Liu, Shuaicheng}, title = {Ultra High-Resolution Image Inpainting with Patch-Based Content Consistency Adapter}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16991-17000} }
OURO: A Self-Bootstrapped Framework for Enhancing Multimodal Scene Understanding: Tianrun Xu,

Guanyu Chen,

Ye Li,

Yuxin Xi,

Zeyu Mu,

Ruichen Wang,

Tianren Zhang,

Haichuan Gao,

Feng Chen; [pdf]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Tianrun and Chen, Guanyu and Li, Ye and Xi, Yuxin and Mu, Zeyu and Wang, Ruichen and Zhang, Tianren and Gao, Haichuan and Chen, Feng}, title = {OURO: A Self-Bootstrapped Framework for Enhancing Multimodal Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18240-18251} }
DiGA3D: Coarse-to-Fine Diffusional Propagation of Geometry and Appearance for Versatile 3D Inpainting: Jingyi Pan,

Dan Xu,

Qiong Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_ICCV, author = {Pan, Jingyi and Xu, Dan and Luo, Qiong}, title = {DiGA3D: Coarse-to-Fine Diffusional Propagation of Geometry and Appearance for Versatile 3D Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16345-16355} }
Text-to-Any-Skeleton Motion Generation Without Retargeting: Qingyuan Liu,

Ke Lv,

Kun Dong,

Jian Xue,

Zehai Niu,

Jinbao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Qingyuan and Lv, Ke and Dong, Kun and Xue, Jian and Niu, Zehai and Wang, Jinbao}, title = {Text-to-Any-Skeleton Motion Generation Without Retargeting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12926-12936} }
EVDM: Event-based Real-world Video Deblurring with Mamba: Zhijing Sun,

Senyan Xu,

Kean Liu,

Runze Tian,

Xueyang Fu,

Zheng-Jun Zha; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Zhijing and Xu, Senyan and Liu, Kean and Tian, Runze and Fu, Xueyang and Zha, Zheng-Jun}, title = {EVDM: Event-based Real-world Video Deblurring with Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13793-13803} }
FaceXFormer: A Unified Transformer for Facial Analysis: Kartik Narayan,

Vibashan VS,

Rama Chellappa,

Vishal M. Patel; [pdf] [arXiv]
[bibtex]
@InProceedings{Narayan_2025_ICCV, author = {Narayan, Kartik and VS, Vibashan and Chellappa, Rama and Patel, Vishal M.}, title = {FaceXFormer: A Unified Transformer for Facial Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11369-11382} }
InstantEdit: Text-Guided Few-Step Image Editing with Piecewise Rectified Flow: Yiming Gong,

Zhen Zhu,

Minjia Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2025_ICCV, author = {Gong, Yiming and Zhu, Zhen and Zhang, Minjia}, title = {InstantEdit: Text-Guided Few-Step Image Editing with Piecewise Rectified Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16808-16817} }
The Source Image is the Best Attention for Infrared and Visible Image Fusion: Song Wang,

Xie Han,

Liqun Kuang,

Boying Wang,

Zhongyu Chen,

Zherui Qiao,

Fan Yang,

Xiaoxia Liu,

Bingyu Zhang,

Zhixun Wang; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Song and Han, Xie and Kuang, Liqun and Wang, Boying and Chen, Zhongyu and Qiao, Zherui and Yang, Fan and Liu, Xiaoxia and Zhang, Bingyu and Wang, Zhixun}, title = {The Source Image is the Best Attention for Infrared and Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13513-13522} }
Towards Immersive Human-X Interaction: A Real-Time Framework for Physically Plausible Motion Synthesis: Kaiyang Ji,

Ye Shi,

Zichen Jin,

Kangyi Chen,

Lan Xu,

Yuexin Ma,

Jingyi Yu,

Jingya Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Kaiyang and Shi, Ye and Jin, Zichen and Chen, Kangyi and Xu, Lan and Ma, Yuexin and Yu, Jingyi and Wang, Jingya}, title = {Towards Immersive Human-X Interaction: A Real-Time Framework for Physically Plausible Motion Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10173-10183} }
Generative Video Bi-flow: Chen Liu,

Tobias Ritschel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Chen and Ritschel, Tobias}, title = {Generative Video Bi-flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19363-19372} }
FED-PsyAU: Privacy-Preserving Micro-Expression Recognition via Psychological AU Coordination and Dynamic Facial Motion Modeling: Jingting Li,

Yu Qian,

Lin Zhao,

Su-Jing Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jingting and Qian, Yu and Zhao, Lin and Wang, Su-Jing}, title = {FED-PsyAU: Privacy-Preserving Micro-Expression Recognition via Psychological AU Coordination and Dynamic Facial Motion Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14453-14463} }
Unified Video Generation via Next-Set Prediction in Continuous Domain: Zhanzhou Feng,

Qingpei Guo,

Xinyu Xiao,

Ruihan Xu,

Ming Yang,

Shiliang Zhang; [pdf]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Zhanzhou and Guo, Qingpei and Xiao, Xinyu and Xu, Ruihan and Yang, Ming and Zhang, Shiliang}, title = {Unified Video Generation via Next-Set Prediction in Continuous Domain}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19427-19438} }
Latent-Reframe: Enabling Camera Control for Video Diffusion Models without Training: Zhenghong Zhou,

Jie An,

Jiebo Luo; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zhenghong and An, Jie and Luo, Jiebo}, title = {Latent-Reframe: Enabling Camera Control for Video Diffusion Models without Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12779-12789} }
EditCLIP: Representation Learning for Image Editing: Qian Wang,

Aleksandar Cvejić,

Abdelrahman Eldesokey,

Peter Wonka; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Qian and Cveji\'c, Aleksandar and Eldesokey, Abdelrahman and Wonka, Peter}, title = {EditCLIP: Representation Learning for Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15960-15970} }
Tiling artifacts and trade-offs of feature normalization in the segmentation of large biological images: Elena Buglakova,

Anwai Archit,

Edoardo D'Imprima,

Julia Mahamid,

Constantin Pape,

Anna Kreshuk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Buglakova_2025_ICCV, author = {Buglakova, Elena and Archit, Anwai and D'Imprima, Edoardo and Mahamid, Julia and Pape, Constantin and Kreshuk, Anna}, title = {Tiling artifacts and trade-offs of feature normalization in the segmentation of large biological images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13109-13118} }
RoboAnnotatorX: A Comprehensive and Universal Annotation Framework for Accurate Understanding of Long-horizon Robot Demonstration: Longxin Kou,

Fei Ni,

Yan Zheng,

Peilong Han,

Jinyi Liu,

Haiqin Cui,

Rui Liu,

Jianye Hao; [pdf] [supp]
[bibtex]
@InProceedings{Kou_2025_ICCV, author = {Kou, Longxin and Ni, Fei and Zheng, Yan and Han, Peilong and Liu, Jinyi and Cui, Haiqin and Liu, Rui and Hao, Jianye}, title = {RoboAnnotatorX: A Comprehensive and Universal Annotation Framework for Accurate Understanding of Long-horizon Robot Demonstration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10353-10363} }
On-Device Diffusion Transformer Policy for Efficient Robot Manipulation: Yiming Wu,

Huan Wang,

Zhenghao Chen,

Jianxin Pang,

Dong Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Yiming and Wang, Huan and Chen, Zhenghao and Pang, Jianxin and Xu, Dong}, title = {On-Device Diffusion Transformer Policy for Efficient Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14073-14083} }
Training-Free Text-Guided Image Editing with Visual Autoregressive Model: Yufei Wang,

Lanqing Guo,

Zhihao Li,

Jiaxing Huang,

Pichao Wang,

Bihan Wen,

Jian Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yufei and Guo, Lanqing and Li, Zhihao and Huang, Jiaxing and Wang, Pichao and Wen, Bihan and Wang, Jian}, title = {Training-Free Text-Guided Image Editing with Visual Autoregressive Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17577-17586} }
RealGeneral: Unifying Visual Generation via Temporal In-Context Learning with Video Models: Yijing Lin,

Mengqi Huang,

Shuhan Zhuang,

Zhendong Mao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Yijing and Huang, Mengqi and Zhuang, Shuhan and Mao, Zhendong}, title = {RealGeneral: Unifying Visual Generation via Temporal In-Context Learning with Video Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14994-15004} }
Fine-structure Preserved Real-world Image Super-resolution via Transfer VAE Training: Qiaosi Yi,

Shuai Li,

Rongyuan Wu,

Lingchen Sun,

Yuhui Wu,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yi_2025_ICCV, author = {Yi, Qiaosi and Li, Shuai and Wu, Rongyuan and Sun, Lingchen and Wu, Yuhui and Zhang, Lei}, title = {Fine-structure Preserved Real-world Image Super-resolution via Transfer VAE Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12415-12426} }
Augmented Mass-Spring Model for Real-Time Dense Hair Simulation: J. Alejandro Amador H.,

Yi Zhou,

Xin Sun,

Zhixin Shu,

Chengan He,

Soren Pirk,

Dominik L. Michels; [pdf] [supp]
[bibtex]
@InProceedings{H._2025_ICCV, author = {H., J. Alejandro Amador and Zhou, Yi and Sun, Xin and Shu, Zhixin and He, Chengan and Pirk, Soren and Michels, Dominik L.}, title = {Augmented Mass-Spring Model for Real-Time Dense Hair Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11339-11347} }
IQA-Adapter: Exploring Knowledge Transfer from Image Quality Assessment to Diffusion-based Generative Models: Khaled Abud,

Sergey Lavrushkin,

Alexey Kirillov,

Dmitriy Vatolin; [pdf] [supp]
[bibtex]
@InProceedings{Abud_2025_ICCV, author = {Abud, Khaled and Lavrushkin, Sergey and Kirillov, Alexey and Vatolin, Dmitriy}, title = {IQA-Adapter: Exploring Knowledge Transfer from Image Quality Assessment to Diffusion-based Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15469-15480} }
RomanTex: Decoupling 3D-aware Rotary Positional Embedded Multi-Attention Network for Texture Synthesis: Yifei Feng,

Mingxin Yang,

Shuhui Yang,

Sheng Zhang,

Jiaao Yu,

Zibo Zhao,

Yuhong Liu,

Jie Jiang,

Chunchao Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Yifei and Yang, Mingxin and Yang, Shuhui and Zhang, Sheng and Yu, Jiaao and Zhao, Zibo and Liu, Yuhong and Jiang, Jie and Guo, Chunchao}, title = {RomanTex: Decoupling 3D-aware Rotary Positional Embedded Multi-Attention Network for Texture Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17203-17213} }
Phantom: Subject-Consistent Video Generation via Cross-Modal Alignment: Lijie Liu,

Tianxiang Ma,

Bingchuan Li,

Zhuowei Chen,

Jiawei Liu,

Gen Li,

Siyu Zhou,

Qian He,

Xinglong Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Lijie and Ma, Tianxiang and Li, Bingchuan and Chen, Zhuowei and Liu, Jiawei and Li, Gen and Zhou, Siyu and He, Qian and Wu, Xinglong}, title = {Phantom: Subject-Consistent Video Generation via Cross-Modal Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14951-14961} }
IFAdapter: Instance Feature Control for Grounded Text-to-Image Generation: Yinwei Wu,

Xianpan Zhou,

Bing Ma,

Xuefeng Su,

Kai Ma,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Yinwei and Zhou, Xianpan and Ma, Bing and Su, Xuefeng and Ma, Kai and Wang, Xinchao}, title = {IFAdapter: Instance Feature Control for Grounded Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15949-15959} }
DIMO: Diverse 3D Motion Generation for Arbitrary Objects: Linzhan Mou,

Jiahui Lei,

Chen Wang,

Lingjie Liu,

Kostas Daniilidis; [pdf] [supp]
[bibtex]
@InProceedings{Mou_2025_ICCV, author = {Mou, Linzhan and Lei, Jiahui and Wang, Chen and Liu, Lingjie and Daniilidis, Kostas}, title = {DIMO: Diverse 3D Motion Generation for Arbitrary Objects}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14357-14368} }
MMAD: Multi-label Micro-Action Detection in Videos: Kun Li,

Pengyu Liu,

Dan Guo,

Fei Wang,

Zhiliang Wu,

Hehe Fan,

Meng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Kun and Liu, Pengyu and Guo, Dan and Wang, Fei and Wu, Zhiliang and Fan, Hehe and Wang, Meng}, title = {MMAD: Multi-label Micro-Action Detection in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13225-13236} }
Guiding Diffusion Models with Adaptive Negative Sampling Without External Resources: Alakh Desai,

Nuno Vasconcelos; [pdf] [supp]
[bibtex]
@InProceedings{Desai_2025_ICCV, author = {Desai, Alakh and Vasconcelos, Nuno}, title = {Guiding Diffusion Models with Adaptive Negative Sampling Without External Resources}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16122-16131} }
AlignGuard: Scalable Safety Alignment for Text-to-Image Generation: Runtao Liu,

I Chieh Chen,

Jindong Gu,

Jipeng Zhang,

Renjie Pi,

Qifeng Chen,

Philip Torr,

Ashkan Khakzar,

Fabio Pizzati; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Runtao and Chen, I Chieh and Gu, Jindong and Zhang, Jipeng and Pi, Renjie and Chen, Qifeng and Torr, Philip and Khakzar, Ashkan and Pizzati, Fabio}, title = {AlignGuard: Scalable Safety Alignment for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17024-17034} }
Function-centric Bayesian Network for Zero-Shot Object Goal Navigation: Sixian Zhang,

Xinyao Yu,

Xinhang Song,

Yiyao Wang,

Shuqiang Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Sixian and Yu, Xinyao and Song, Xinhang and Wang, Yiyao and Jiang, Shuqiang}, title = {Function-centric Bayesian Network for Zero-Shot Object Goal Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19535-19545} }
Preserve Anything: Controllable Image Synthesis with Object Preservation: Prasen Kumar Sharma,

Neeraj Matiyali,

Siddharth Srivastava,

Gaurav Sharma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sharma_2025_ICCV, author = {Sharma, Prasen Kumar and Matiyali, Neeraj and Srivastava, Siddharth and Sharma, Gaurav}, title = {Preserve Anything: Controllable Image Synthesis with Object Preservation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18058-18067} }
GIViC: Generative Implicit Video Compression: Ge Gao,

Siyue Teng,

Tianhao Peng,

Fan Zhang,

David Bull; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Ge and Teng, Siyue and Peng, Tianhao and Zhang, Fan and Bull, David}, title = {GIViC: Generative Implicit Video Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17356-17367} }
Rethinking Bimanual Robotic Manipulation: Learning with Decoupled Interaction Framework: Jian-Jian Jiang,

Xiao-Ming Wu,

Yi-Xiang He,

Ling-An Zeng,

Yi-Lin Wei,

Dandan Zhang,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Jian-Jian and Wu, Xiao-Ming and He, Yi-Xiang and Zeng, Ling-An and Wei, Yi-Lin and Zhang, Dandan and Zheng, Wei-Shi}, title = {Rethinking Bimanual Robotic Manipulation: Learning with Decoupled Interaction Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12427-12437} }
Avat3r: Large Animatable Gaussian Reconstruction Model for High-fidelity 3D Head Avatars: Tobias Kirschstein,

Javier Romero,

Artem Sevastopolsky,

Matthias Nießner,

Shunsuke Saito; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kirschstein_2025_ICCV, author = {Kirschstein, Tobias and Romero, Javier and Sevastopolsky, Artem and Nie{\ss}ner, Matthias and Saito, Shunsuke}, title = {Avat3r: Large Animatable Gaussian Reconstruction Model for High-fidelity 3D Head Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12089-12100} }
Democratizing High-Fidelity Co-Speech Gesture Video Generation: Xu Yang,

Shaoli Huang,

Shenbo Xie,

Xuelin Chen,

Yifei Liu,

Changxing Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Xu and Huang, Shaoli and Xie, Shenbo and Chen, Xuelin and Liu, Yifei and Ding, Changxing}, title = {Democratizing High-Fidelity Co-Speech Gesture Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14283-14292} }
DiffIP: Representation Fingerprints for Robust IP Protection of Diffusion Models: Zhuoling Li,

Haoxuan Qu,

Jason Kuen,

Jiuxiang Gu,

Qiuhong Ke,

Jun Liu,

Hossein Rahmani; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhuoling and Qu, Haoxuan and Kuen, Jason and Gu, Jiuxiang and Ke, Qiuhong and Liu, Jun and Rahmani, Hossein}, title = {DiffIP: Representation Fingerprints for Robust IP Protection of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17035-17045} }
All Parts Matter: A Unified Mask-Free Virtual Try-On Framework: Chenghu Du,

Shengwu Xiong,

Yi Rong; [pdf]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Chenghu and Xiong, Shengwu and Rong, Yi}, title = {All Parts Matter: A Unified Mask-Free Virtual Try-On Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19525-19534} }
LOCATEdit: Graph Laplacian Optimized Cross Attention for Localized Text-Guided Image Editing: Achint Soni,

Meet Soni,

Sirisha Rambhatla; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Soni_2025_ICCV, author = {Soni, Achint and Soni, Meet and Rambhatla, Sirisha}, title = {LOCATEdit: Graph Laplacian Optimized Cross Attention for Localized Text-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18804-18814} }
Highlight What You Want: Weakly-Supervised Instance-Level Controllable Infrared-Visible Image Fusion: Zeyu Wang,

Jizheng Zhang,

Haiyu Song,

Mingyu Ge,

Jiayu Wang,

Haoran Duan; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zeyu and Zhang, Jizheng and Song, Haiyu and Ge, Mingyu and Wang, Jiayu and Duan, Haoran}, title = {Highlight What You Want: Weakly-Supervised Instance-Level Controllable Infrared-Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12637-12647} }
You Think, You ACT: The New Task of Arbitrary Text to Motion Generation: Runqi Wang,

Caoyuan Ma,

Guopeng Li,

Hanrui Xu,

Yuke Li,

Zheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Runqi and Ma, Caoyuan and Li, Guopeng and Xu, Hanrui and Li, Yuke and Wang, Zheng}, title = {You Think, You ACT: The New Task of Arbitrary Text to Motion Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12012-12022} }
HERO: Human Reaction Generation from Videos: Chengjun Yu,

Wei Zhai,

Yuhang Yang,

Yang Cao,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Chengjun and Zhai, Wei and Yang, Yuhang and Cao, Yang and Zha, Zheng-Jun}, title = {HERO: Human Reaction Generation from Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10262-10274} }
Frequency-Guided Posterior Sampling for Diffusion-Based Image Restoration: Darshan Thaker,

Abhishek Goyal,

Rene Vidal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thaker_2025_ICCV, author = {Thaker, Darshan and Goyal, Abhishek and Vidal, Rene}, title = {Frequency-Guided Posterior Sampling for Diffusion-Based Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12873-12882} }
Beyond Walking: A Large-Scale Image-Text Benchmark for Text-based Person Anomaly Search: Shuyu Yang,

Yaxiong Wang,

Li Zhu,

Zhedong Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Shuyu and Wang, Yaxiong and Zhu, Li and Zheng, Zhedong}, title = {Beyond Walking: A Large-Scale Image-Text Benchmark for Text-based Person Anomaly Search}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11720-11730} }
GigaTok: Scaling Visual Tokenizers to 3 Billion Parameters for Autoregressive Image Generation: Tianwei Xiong,

Jun Hao Liew,

Zilong Huang,

Jiashi Feng,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2025_ICCV, author = {Xiong, Tianwei and Liew, Jun Hao and Huang, Zilong and Feng, Jiashi and Liu, Xihui}, title = {GigaTok: Scaling Visual Tokenizers to 3 Billion Parameters for Autoregressive Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18770-18780} }
FullDiT: Video Generative Foundation Models with Multimodal Control via Full Attention: Xuan Ju,

Weicai Ye,

Quande Liu,

Qiulin Wang,

Xintao Wang,

Pengfei Wan,

Di Zhang,

Kun Gai,

Qiang Xu; [pdf]
[bibtex]
@InProceedings{Ju_2025_ICCV, author = {Ju, Xuan and Ye, Weicai and Liu, Quande and Wang, Qiulin and Wang, Xintao and Wan, Pengfei and Zhang, Di and Gai, Kun and Xu, Qiang}, title = {FullDiT: Video Generative Foundation Models with Multimodal Control via Full Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15737-15747} }
VideoAuteur: Towards Long Narrative Video Generation: Junfei Xiao,

Feng Cheng,

Lu Qi,

Liangke Gui,

Yang Zhao,

Shanchuan Lin,

Jiepeng Cen,

Zhibei Ma,

Alan Yuille,

Lu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_ICCV, author = {Xiao, Junfei and Cheng, Feng and Qi, Lu and Gui, Liangke and Zhao, Yang and Lin, Shanchuan and Cen, Jiepeng and Ma, Zhibei and Yuille, Alan and Jiang, Lu}, title = {VideoAuteur: Towards Long Narrative Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19163-19173} }
Multi-modal Identity Extraction: Ryan Webster,

Teddy Furon; [pdf] [supp]
[bibtex]
@InProceedings{Webster_2025_ICCV, author = {Webster, Ryan and Furon, Teddy}, title = {Multi-modal Identity Extraction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10797-10806} }
Diffusion Epistemic Uncertainty with Asymmetric Learning for Diffusion-Generated Image Detection: Yingsong Huang,

Hui Guo,

Jing Huang,

Bing Bai,

Qi Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Yingsong and Guo, Hui and Huang, Jing and Bai, Bing and Xiong, Qi}, title = {Diffusion Epistemic Uncertainty with Asymmetric Learning for Diffusion-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17097-17107} }
Model Reveals What to Cache: Profiling-Based Feature Reuse for Video Diffusion Models: Xuran Ma,

Yexin Liu,

Yaofu Liu,

Xianfeng Wu,

Mingzhe Zheng,

Zihao Wang,

Ser-Nam Lim,

Harry Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Xuran and Liu, Yexin and Liu, Yaofu and Wu, Xianfeng and Zheng, Mingzhe and Wang, Zihao and Lim, Ser-Nam and Yang, Harry}, title = {Model Reveals What to Cache: Profiling-Based Feature Reuse for Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17150-17159} }
Diffusion-Based Imaginative Coordination for Bimanual Manipulation: Huilin Xu,

Jian Ding,

Jiakun Xu,

Ruixiang Wang,

Jun Chen,

Jinjie Mai,

Yanwei Fu,

Bernard Ghanem,

Feng Xu,

Mohamed Elhoseiny; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Huilin and Ding, Jian and Xu, Jiakun and Wang, Ruixiang and Chen, Jun and Mai, Jinjie and Fu, Yanwei and Ghanem, Bernard and Xu, Feng and Elhoseiny, Mohamed}, title = {Diffusion-Based Imaginative Coordination for Bimanual Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11469-11479} }
Animate Anyone 2: High-Fidelity Character Image Animation with Environment Affordance: Li Hu,

Guangyuan Wang,

Zhen Shen,

Xin Gao,

Dechao Meng,

Lian Zhuo,

Peng Zhang,

Bang Zhang,

Liefeng Bo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Li and Wang, Guangyuan and Shen, Zhen and Gao, Xin and Meng, Dechao and Zhuo, Lian and Zhang, Peng and Zhang, Bang and Bo, Liefeng}, title = {Animate Anyone 2: High-Fidelity Character Image Animation with Environment Affordance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10207-10217} }
LongAnimation: Long Animation Generation with Dynamic Global-Local Memory: Nan Chen,

Mengqi Huang,

Yihao Meng,

Zhendong Mao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Nan and Huang, Mengqi and Meng, Yihao and Mao, Zhendong}, title = {LongAnimation: Long Animation Generation with Dynamic Global-Local Memory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10032-10042} }
UniversalBooth: Model-Agnostic Personalized Text-to-Image Generation: Songhua Liu,

Ruonan Yu,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Songhua and Yu, Ruonan and Wang, Xinchao}, title = {UniversalBooth: Model-Agnostic Personalized Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18314-18324} }
EEGMirror: Leveraging EEG Data in the Wild via Montage-Agnostic Self-Supervision for EEG to Video Decoding: Xuan-Hao Liu,

Bao-Liang Lu,

Wei-Long Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Xuan-Hao and Lu, Bao-Liang and Zheng, Wei-Long}, title = {EEGMirror: Leveraging EEG Data in the Wild via Montage-Agnostic Self-Supervision for EEG to Video Decoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18273-18283} }
Edit360: 2D Image Edits to 3D Assets from Any Angle: Junchao Huang,

Xinting Hu,

Shaoshuai Shi,

Zhuotao Tian,

Li Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Junchao and Hu, Xinting and Shi, Shaoshuai and Tian, Zhuotao and Jiang, Li}, title = {Edit360: 2D Image Edits to 3D Assets from Any Angle}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16618-16628} }
AV-Link: Temporally-Aligned Diffusion Features for Cross-Modal Audio-Video Generation: Moayed Haji-Ali,

Willi Menapace,

Aliaksandr Siarohin,

Ivan Skorokhodov,

Alper Canberk,

Kwot Sin Lee,

Vicente Ordonez,

Sergey Tulyakov; [pdf] [supp]
[bibtex]
@InProceedings{Haji-Ali_2025_ICCV, author = {Haji-Ali, Moayed and Menapace, Willi and Siarohin, Aliaksandr and Skorokhodov, Ivan and Canberk, Alper and Lee, Kwot Sin and Ordonez, Vicente and Tulyakov, Sergey}, title = {AV-Link: Temporally-Aligned Diffusion Features for Cross-Modal Audio-Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19373-19385} }
ADCD-Net: Robust Document Image Forgery Localization via Adaptive DCT Feature and Hierarchical Content Disentanglement: Kahim Wong,

Jicheng Zhou,

Haiwei Wu,

Yain-Whar Si,

Jiantao Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wong_2025_ICCV, author = {Wong, Kahim and Zhou, Jicheng and Wu, Haiwei and Si, Yain-Whar and Zhou, Jiantao}, title = {ADCD-Net: Robust Document Image Forgery Localization via Adaptive DCT Feature and Hierarchical Content Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19280-19289} }
STaR: Seamless Spatial-Temporal Aware Motion Retargeting with Penetration and Consistency Constraints: Xiaohang Yang,

Qing Wang,

Jiahao Yang,

Gregory Slabaugh,

Shanxin Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Xiaohang and Wang, Qing and Yang, Jiahao and Slabaugh, Gregory and Yuan, Shanxin}, title = {STaR: Seamless Spatial-Temporal Aware Motion Retargeting with Penetration and Consistency Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12947-12955} }
One-Step Specular Highlight Removal with Adapted Diffusion Models: Mahir Atmis,

Levent Karacan,

Mehmet Sarıgül; [pdf] [supp]
[bibtex]
@InProceedings{Atmis_2025_ICCV, author = {Atmis, Mahir and Karacan, Levent and Sar{\i}g\"ul, Mehmet}, title = {One-Step Specular Highlight Removal with Adapted Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16313-16322} }
GraspCoT: Integrating Physical Property Reasoning for 6-DoF Grasping under Flexible Language Instructions: Xiaomeng Chu,

Jiajun Deng,

Guoliang You,

Wei Liu,

Xingchen Li,

Jianmin Ji,

Yanyong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chu_2025_ICCV, author = {Chu, Xiaomeng and Deng, Jiajun and You, Guoliang and Liu, Wei and Li, Xingchen and Ji, Jianmin and Zhang, Yanyong}, title = {GraspCoT: Integrating Physical Property Reasoning for 6-DoF Grasping under Flexible Language Instructions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10130-10140} }
Uncover Treasures in DCT: Advancing JPEG Quality Enhancement by Exploiting Latent Correlations: Jing Yang,

Qunliang Xing,

Mai Xu,

Minglang Qiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Jing and Xing, Qunliang and Xu, Mai and Qiao, Minglang}, title = {Uncover Treasures in DCT: Advancing JPEG Quality Enhancement by Exploiting Latent Correlations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17598-17607} }
ICE-Bench: A Unified and Comprehensive Benchmark for Image Creating and Editing: Yulin Pan,

Xiangteng He,

Chaojie Mao,

Zhen Han,

Zeyinzi Jiang,

Jingfeng Zhang,

Yu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2025_ICCV, author = {Pan, Yulin and He, Xiangteng and Mao, Chaojie and Han, Zhen and Jiang, Zeyinzi and Zhang, Jingfeng and Liu, Yu}, title = {ICE-Bench: A Unified and Comprehensive Benchmark for Image Creating and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16586-16596} }
Unified Adversarial Augmentation for Improving Palmprint Recognition: Jianlong Jin,

Chenglong Zhao,

Ruixin Zhang,

Sheng Shang,

Yang Zhao,

Jun Wang,

Jingyun Zhang,

Shouhong Ding,

Wei Jia,

Yunsheng Wu; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Jianlong and Zhao, Chenglong and Zhang, Ruixin and Shang, Sheng and Zhao, Yang and Wang, Jun and Zhang, Jingyun and Ding, Shouhong and Jia, Wei and Wu, Yunsheng}, title = {Unified Adversarial Augmentation for Improving Palmprint Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14141-14151} }
PlugMark: A Plug-in Zero-Watermarking Framework for Diffusion Models: Pengzhen Chen,

Yanwei Liu,

Xiaoyan Gu,

Enci Liu,

Zhuoyi Shang,

Xiangyang Ji,

Wu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Pengzhen and Liu, Yanwei and Gu, Xiaoyan and Liu, Enci and Shang, Zhuoyi and Ji, Xiangyang and Liu, Wu}, title = {PlugMark: A Plug-in Zero-Watermarking Framework for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17335-17345} }
Human-Object Interaction from Human-Level Instructions: Zhen Wu,

Jiaman Li,

Pei Xu,

C. Karen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Zhen and Li, Jiaman and Xu, Pei and Liu, C. Karen}, title = {Human-Object Interaction from Human-Level Instructions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11176-11186} }
SAM2Long: Enhancing SAM 2 for Long Video Segmentation with a Training-Free Memory Tree: Shuangrui Ding,

Rui Qian,

Xiaoyi Dong,

Pan Zhang,

Yuhang Zang,

Yuhang Cao,

Yuwei Guo,

Dahua Lin,

Jiaqi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2025_ICCV, author = {Ding, Shuangrui and Qian, Rui and Dong, Xiaoyi and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and Guo, Yuwei and Lin, Dahua and Wang, Jiaqi}, title = {SAM2Long: Enhancing SAM 2 for Long Video Segmentation with a Training-Free Memory Tree}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13614-13624} }
X2I: Seamless Integration of Multimodal Understanding into Diffusion Transformer via Attention Distillation: Jian Ma,

Qirong Peng,

Xu Guo,

Chen Chen,

Haonan Lu,

Zhenyu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Jian and Peng, Qirong and Guo, Xu and Chen, Chen and Lu, Haonan and Yang, Zhenyu}, title = {X2I: Seamless Integration of Multimodal Understanding into Diffusion Transformer via Attention Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16733-16744} }
Audio-visual Controlled Video Diffusion with Masked Selective State Spaces Modeling for Natural Talking Head Generation: Fa-Ting Hong,

Zunnan Xu,

Zixiang Zhou,

Jun Zhou,

Xiu Li,

Qin Lin,

Qinglin Lu,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_ICCV, author = {Hong, Fa-Ting and Xu, Zunnan and Zhou, Zixiang and Zhou, Jun and Li, Xiu and Lin, Qin and Lu, Qinglin and Xu, Dan}, title = {Audio-visual Controlled Video Diffusion with Masked Selective State Spaces Modeling for Natural Talking Head Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12549-12558} }
SILO: Solving Inverse Problems with Latent Operators: Ron Raphaeli,

Sean Man,

Michael Elad; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Raphaeli_2025_ICCV, author = {Raphaeli, Ron and Man, Sean and Elad, Michael}, title = {SILO: Solving Inverse Problems with Latent Operators}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10570-10580} }
Turbo2K: Towards Ultra-Efficient and High-Quality 2K Video Synthesis: Jingjing Ren,

Wenbo Li,

Zhongdao Wang,

Haoze Sun,

Bangzhen Liu,

Haoyu Chen,

Jiaqi Xu,

Aoxue Li,

Shifeng Zhang,

Bin Shao,

Yong Guo,

Lei Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Jingjing and Li, Wenbo and Wang, Zhongdao and Sun, Haoze and Liu, Bangzhen and Chen, Haoyu and Xu, Jiaqi and Li, Aoxue and Zhang, Shifeng and Shao, Bin and Guo, Yong and Zhu, Lei}, title = {Turbo2K: Towards Ultra-Efficient and High-Quality 2K Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18155-18165} }
Puppet-Master: Scaling Interactive Video Generation as a Motion Prior for Part-Level Dynamics: Ruining Li,

Chuanxia Zheng,

Christian Rupprecht,

Andrea Vedaldi; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Ruining and Zheng, Chuanxia and Rupprecht, Christian and Vedaldi, Andrea}, title = {Puppet-Master: Scaling Interactive Video Generation as a Motion Prior for Part-Level Dynamics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13405-13415} }
FlowChef: Steering of Rectified Flow Models for Controlled Generations: Maitreya Patel,

Song Wen,

Dimitris N. Metaxas,

Yezhou Yang; [pdf] [supp]
[bibtex]
@InProceedings{Patel_2025_ICCV, author = {Patel, Maitreya and Wen, Song and Metaxas, Dimitris N. and Yang, Yezhou}, title = {FlowChef: Steering of Rectified Flow Models for Controlled Generations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15308-15318} }
Adaptive Caching for Faster Video Generation with Diffusion Transformers: Kumara Kahatapitiya,

Haozhe Liu,

Sen He,

Ding Liu,

Menglin Jia,

Chenyang Zhang,

Michael S. Ryoo,

Tian Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kahatapitiya_2025_ICCV, author = {Kahatapitiya, Kumara and Liu, Haozhe and He, Sen and Liu, Ding and Jia, Menglin and Zhang, Chenyang and Ryoo, Michael S. and Xie, Tian}, title = {Adaptive Caching for Faster Video Generation with Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15240-15252} }
EgoMusic-driven Human Dance Motion Estimation with Skeleton Mamba: Quang Nguyen,

Nhat Le,

Baoru Huang,

Minh Nhat Vu,

Chengcheng Tang,

Van Nguyen,

Ngan Le,

Thieu Vo,

Anh Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Quang and Le, Nhat and Huang, Baoru and Vu, Minh Nhat and Tang, Chengcheng and Nguyen, Van and Le, Ngan and Vo, Thieu and Nguyen, Anh}, title = {EgoMusic-driven Human Dance Motion Estimation with Skeleton Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12023-12033} }
OmniCache: A Trajectory-Oriented Global Perspective on Training-Free Cache Reuse for Diffusion Transformer Models: Huanpeng Chu,

Wei Wu,

Guanyu Feng,

Yutao Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chu_2025_ICCV, author = {Chu, Huanpeng and Wu, Wei and Feng, Guanyu and Zhang, Yutao}, title = {OmniCache: A Trajectory-Oriented Global Perspective on Training-Free Cache Reuse for Diffusion Transformer Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16302-16312} }
DreamFuse: Adaptive Image Fusion with Diffusion Transformer: Junjia Huang,

Pengxiang Yan,

Jiyang Liu,

Jie Wu,

Zhao Wang,

Yitong Wang,

Liang Lin,

Guanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Junjia and Yan, Pengxiang and Liu, Jiyang and Wu, Jie and Wang, Zhao and Wang, Yitong and Lin, Liang and Li, Guanbin}, title = {DreamFuse: Adaptive Image Fusion with Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17292-17301} }
Free2Guide: Training-Free Text-to-Video Alignment using Image LVLM: Jaemin Kim,

Bryan Sangwoo Kim,

Jong Chul Ye; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jaemin and Kim, Bryan Sangwoo and Ye, Jong Chul}, title = {Free2Guide: Training-Free Text-to-Video Alignment using Image LVLM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17920-17929} }
T2I-Copilot: A Training-Free Multi-Agent Text-to-Image System for Enhanced Prompt Interpretation and Interactive Generation: Chieh-Yun Chen,

Min Shi,

Gong Zhang,

Humphrey Shi; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Chieh-Yun and Shi, Min and Zhang, Gong and Shi, Humphrey}, title = {T2I-Copilot: A Training-Free Multi-Agent Text-to-Image System for Enhanced Prompt Interpretation and Interactive Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19396-19405} }
LightBSR: Towards Lightweight Blind Super-Resolution via Discriminative Implicit Degradation Representation Learning: Jiang Yuan,

Ji Ma,

Bo Wang,

Guanzhou Ke,

Weiming Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Jiang and Ma, Ji and Wang, Bo and Ke, Guanzhou and Hu, Weiming}, title = {LightBSR: Towards Lightweight Blind Super-Resolution via Discriminative Implicit Degradation Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11927-11936} }
LOMM: Latest Object Memory Management for Temporally Consistent Video Instance Segmentation: Seunghun Lee,

Jiwan Seo,

Minwoo Choi,

Kiljoon Han,

Jahoon Jeong,

Zane Durante,

Ehsan Adeli,

Sang Hyun Park,

Sunghoon Im; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Seunghun and Seo, Jiwan and Choi, Minwoo and Han, Kiljoon and Jeong, Jahoon and Durante, Zane and Adeli, Ehsan and Park, Sang Hyun and Im, Sunghoon}, title = {LOMM: Latest Object Memory Management for Temporally Consistent Video Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13719-13729} }
LUT-Fuse: Towards Extremely Fast Infrared and Visible Image Fusion via Distillation to Learnable Look-Up Tables: Xunpeng Yi,

Yibing Zhang,

Xinyu Xiang,

Qinglong Yan,

Han Xu,

Jiayi Ma; [pdf] [supp]
[bibtex]
@InProceedings{Yi_2025_ICCV, author = {Yi, Xunpeng and Zhang, Yibing and Xiang, Xinyu and Yan, Qinglong and Xu, Han and Ma, Jiayi}, title = {LUT-Fuse: Towards Extremely Fast Infrared and Visible Image Fusion via Distillation to Learnable Look-Up Tables}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14559-14568} }
DNF-Intrinsic: Deterministic Noise-Free Diffusion for Indoor Inverse Rendering: Rongjia Zheng,

Qing Zhang,

Chengjiang Long,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Rongjia and Zhang, Qing and Long, Chengjiang and Zheng, Wei-Shi}, title = {DNF-Intrinsic: Deterministic Noise-Free Diffusion for Indoor Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10342-10352} }
TextMaster: A Unified Framework for Realistic Text Editing via Glyph-Style Dual-Control: Zhenyu Yan,

Jian Wang,

Aoqiang Wang,

Yuhan Li,

Wenxiang Shang,

Zhu Hangcheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Zhenyu and Wang, Jian and Wang, Aoqiang and Li, Yuhan and Shang, Wenxiang and Hangcheng, Zhu}, title = {TextMaster: A Unified Framework for Realistic Text Editing via Glyph-Style Dual-Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16112-16121} }
Unraveling the Smoothness Properties of Diffusion Models: A Gaussian Mixture Perspective: Yingyu Liang,

Zhizhou Sha,

Zhenmei Shi,

Zhao Song,

Mingda Wan,

Yufa Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Yingyu and Sha, Zhizhou and Shi, Zhenmei and Song, Zhao and Wan, Mingda and Zhou, Yufa}, title = {Unraveling the Smoothness Properties of Diffusion Models: A Gaussian Mixture Perspective}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11436-11446} }
Scendi Score: Prompt-Aware Diversity Evaluation via Schur Complement of CLIP Embeddings: Azim Ospanov,

Mohammad Jalali,

Farzan Farnia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ospanov_2025_ICCV, author = {Ospanov, Azim and Jalali, Mohammad and Farnia, Farzan}, title = {Scendi Score: Prompt-Aware Diversity Evaluation via Schur Complement of CLIP Embeddings}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16927-16937} }
Context-Aware Academic Emotion Dataset and Benchmark: Luming Zhao,

Jingwen Xuan,

Jiamin Lou,

Yonghui Yu,

Wenwu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Luming and Xuan, Jingwen and Lou, Jiamin and Yu, Yonghui and Yang, Wenwu}, title = {Context-Aware Academic Emotion Dataset and Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13859-13868} }
Contrastive Test-Time Composition of Multiple LoRA Models for Image Generation: Tuna Han Salih Meral,

Enis Simsar,

Federico Tombari,

Pinar Yanardag; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meral_2025_ICCV, author = {Meral, Tuna Han Salih and Simsar, Enis and Tombari, Federico and Yanardag, Pinar}, title = {Contrastive Test-Time Composition of Multiple LoRA Models for Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18090-18100} }
Seeing Through Deepfakes: A Human-Inspired Framework for Multi-Face Detection: Juan Hu,

Shaojing Fan,

Terence Sim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Juan and Fan, Shaojing and Sim, Terence}, title = {Seeing Through Deepfakes: A Human-Inspired Framework for Multi-Face Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14517-14527} }
Distilling Parallel Gradients for Fast ODE Solvers of Diffusion Models: Beier Zhu,

Ruoyu Wang,

Tong Zhao,

Hanwang Zhang,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Beier and Wang, Ruoyu and Zhao, Tong and Zhang, Hanwang and Zhang, Chi}, title = {Distilling Parallel Gradients for Fast ODE Solvers of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19557-19566} }
PUMA: Empowering Unified MLLM with Multi-granular Visual Generation: Rongyao Fang,

Chengqi Duan,

Kun Wang,

Hao Li,

Linjiang Huang,

Hao Tian,

Xingyu Zeng,

Rui Zhao,

Jifeng Dai,

Hongsheng Li,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Rongyao and Duan, Chengqi and Wang, Kun and Li, Hao and Huang, Linjiang and Tian, Hao and Zeng, Xingyu and Zhao, Rui and Dai, Jifeng and Li, Hongsheng and Liu, Xihui}, title = {PUMA: Empowering Unified MLLM with Multi-granular Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15447-15457} }
Draw Your Mind: Personalized Generation via Condition-Level Modeling in Text-to-Image Diffusion Models: Hyungjin Kim,

Seokho Ahn,

Young-Duk Seo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Hyungjin and Ahn, Seokho and Seo, Young-Duk}, title = {Draw Your Mind: Personalized Generation via Condition-Level Modeling in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17171-17180} }
CopyrightShield: Enhancing Diffusion Model Security Against Copyright Infringement Attacks: Zhixiang Guo,

Siyuan Liang,

Aishan Liu,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Zhixiang and Liang, Siyuan and Liu, Aishan and Tao, Dacheng}, title = {CopyrightShield: Enhancing Diffusion Model Security Against Copyright Infringement Attacks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19417-19426} }
PixTalk: Controlling Photorealistic Image Processing and Editing with Language: Marcos V. Conde,

Zihao Lu,

Radu Timofte; [pdf] [supp]
[bibtex]
@InProceedings{Conde_2025_ICCV, author = {Conde, Marcos V. and Lu, Zihao and Timofte, Radu}, title = {PixTalk: Controlling Photorealistic Image Processing and Editing with Language}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19269-19279} }
Learning Streaming Video Representation via Multitask Training: Yibin Yan,

Jilan Xu,

Shangzhe Di,

Yikun Liu,

Yudi Shi,

Qirui Chen,

Zeqian Li,

Yifei Huang,

Weidi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Yibin and Xu, Jilan and Di, Shangzhe and Liu, Yikun and Shi, Yudi and Chen, Qirui and Li, Zeqian and Huang, Yifei and Xie, Weidi}, title = {Learning Streaming Video Representation via Multitask Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9900-9912} }
Pose-Star: Anatomy-Aware Editing for Open-World Fashion Images: Yuran Dong,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Yuran and Ye, Mang}, title = {Pose-Star: Anatomy-Aware Editing for Open-World Fashion Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15822-15831} }
Sliced Wasserstein Bridge for Open-Vocabulary Video Instance Segmentation: Zheyun Qin,

Deng Yu,

Chuanchen Luo,

Zhumin Chen; [pdf]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Zheyun and Yu, Deng and Luo, Chuanchen and Chen, Zhumin}, title = {Sliced Wasserstein Bridge for Open-Vocabulary Video Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12470-12478} }
Riemannian-Geometric Fingerprints of Generative Models: Hae Jin Song,

Laurent Itti; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_ICCV, author = {Song, Hae Jin and Itti, Laurent}, title = {Riemannian-Geometric Fingerprints of Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11425-11435} }
EasyControl: Adding Efficient and Flexible Control for Diffusion Transformer: Yuxuan Zhang,

Yirui Yuan,

Yiren Song,

Haofan Wang,

Jiaming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yuxuan and Yuan, Yirui and Song, Yiren and Wang, Haofan and Liu, Jiaming}, title = {EasyControl: Adding Efficient and Flexible Control for Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19513-19524} }
I2VControl: Disentangled and Unified Video Motion Synthesis Control: Wanquan Feng,

Tianhao Qi,

Jiawei Liu,

Mingzhen Sun,

Pengqi Tu,

Tianxiang Ma,

Fei Dai,

Songtao Zhao,

Siyu Zhou,

Qian He; [pdf] [arXiv]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Wanquan and Qi, Tianhao and Liu, Jiawei and Sun, Mingzhen and Tu, Pengqi and Ma, Tianxiang and Dai, Fei and Zhao, Songtao and Zhou, Siyu and He, Qian}, title = {I2VControl: Disentangled and Unified Video Motion Synthesis Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14051-14060} }
Democratizing Text-to-Image Masked Generative Models with Compact Text-Aware One-Dimensional Tokens: Dongwon Kim,

Ju He,

Qihang Yu,

Chenglin Yang,

Xiaohui Shen,

Suha Kwak,

Liang-Chieh Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Dongwon and He, Ju and Yu, Qihang and Yang, Chenglin and Shen, Xiaohui and Kwak, Suha and Chen, Liang-Chieh}, title = {Democratizing Text-to-Image Masked Generative Models with Compact Text-Aware One-Dimensional Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18442-18452} }
Aligning Global Semantics and Local Textures in Generative Video Enhancement: Zhikai Chen,

Fuchen Long,

Zhaofan Qiu,

Ting Yao,

Wengang Zhou,

Jiebo Luo,

Tao Mei; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Zhikai and Long, Fuchen and Qiu, Zhaofan and Yao, Ting and Zhou, Wengang and Luo, Jiebo and Mei, Tao}, title = {Aligning Global Semantics and Local Textures in Generative Video Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17087-17096} }
Reverse Convolution and Its Applications to Image Restoration: Xuhong Huang,

Shiqi Liu,

Kai Zhang,

Ying Tai,

Jian Yang,

Hui Zeng,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Xuhong and Liu, Shiqi and Zhang, Kai and Tai, Ying and Yang, Jian and Zeng, Hui and Zhang, Lei}, title = {Reverse Convolution and Its Applications to Image Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10507-10516} }
CreatiLayout: Siamese Multimodal Diffusion Transformer for Creative Layout-to-Image Generation: Hui Zhang,

Dexiang Hong,

Yitong Wang,

Jie Shao,

Xinglong Wu,

Zuxuan Wu,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hui and Hong, Dexiang and Wang, Yitong and Shao, Jie and Wu, Xinglong and Wu, Zuxuan and Jiang, Yu-Gang}, title = {CreatiLayout: Siamese Multimodal Diffusion Transformer for Creative Layout-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18487-18497} }
DyWA: Dynamics-adaptive World Action Model for Generalizable Non-prehensile Manipulation: Jiangran Lyu,

Ziming Li,

Xuesong Shi,

Chaoyi Xu,

Yizhou Wang,

He Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2025_ICCV, author = {Lyu, Jiangran and Li, Ziming and Shi, Xuesong and Xu, Chaoyi and Wang, Yizhou and Wang, He}, title = {DyWA: Dynamics-adaptive World Action Model for Generalizable Non-prehensile Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11058-11068} }
Flow Stochastic Segmentation Networks: Fabio De Sousa Ribeiro,

Omar Todd,

Charles Jones,

Avinash Kori,

Raghav Mehta,

Ben Glocker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{De_Sousa_Ribeiro_2025_ICCV, author = {De Sousa Ribeiro, Fabio and Todd, Omar and Jones, Charles and Kori, Avinash and Mehta, Raghav and Glocker, Ben}, title = {Flow Stochastic Segmentation Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14754-14765} }
USP: Unified Self-Supervised Pretraining for Image Generation and Understanding: Xiangxiang Chu,

Renda Li,

Yong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chu_2025_ICCV, author = {Chu, Xiangxiang and Li, Renda and Wang, Yong}, title = {USP: Unified Self-Supervised Pretraining for Image Generation and Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18475-18486} }
Go to Zero: Towards Zero-shot Motion Generation with Million-scale Data: Ke Fan,

Shunlin Lu,

Minyue Dai,

Runyi Yu,

Lixing Xiao,

Zhiyang Dou,

Junting Dong,

Lizhuang Ma,

Jingbo Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Ke and Lu, Shunlin and Dai, Minyue and Yu, Runyi and Xiao, Lixing and Dou, Zhiyang and Dong, Junting and Ma, Lizhuang and Wang, Jingbo}, title = {Go to Zero: Towards Zero-shot Motion Generation with Million-scale Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13336-13348} }
Beyond Brain Decoding: Visual-Semantic Reconstructions to Mental Creation Extension Based on fMRI: Haodong Jing,

Dongyao Jiang,

Yongqiang Ma,

Haibo Hua,

Bo Huang,

Nanning Zheng; [pdf]
[bibtex]
@InProceedings{Jing_2025_ICCV, author = {Jing, Haodong and Jiang, Dongyao and Ma, Yongqiang and Hua, Haibo and Huang, Bo and Zheng, Nanning}, title = {Beyond Brain Decoding: Visual-Semantic Reconstructions to Mental Creation Extension Based on fMRI}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19258-19268} }
DAViD: Modeling Dynamic Affordance of 3D Objects Using Pre-trained Video Diffusion Models: Hyeonwoo Kim,

Sangwon Baik,

Hanbyul Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Hyeonwoo and Baik, Sangwon and Joo, Hanbyul}, title = {DAViD: Modeling Dynamic Affordance of 3D Objects Using Pre-trained Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10330-10341} }
SKALD: Learning-Based Shot Assembly for Coherent Multi-Shot Video Creation: Chen-Yi Lu,

Md Mehrab Tanjim,

Ishita Dasgupta,

Somdeb Sarkhel,

Gang Wu,

Saayan Mitra,

Somali Chaterji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Chen-Yi and Tanjim, Md Mehrab and Dasgupta, Ishita and Sarkhel, Somdeb and Wu, Gang and Mitra, Saayan and Chaterji, Somali}, title = {SKALD: Learning-Based Shot Assembly for Coherent Multi-Shot Video Creation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17859-17868} }
Preacher: Paper-to-Video Agentic System: Jingwei Liu,

Ling Yang,

Hao Luo,

Fan Wang,

Hongyan Li,

Mengdi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Jingwei and Yang, Ling and Luo, Hao and Wang, Fan and Li, Hongyan and Wang, Mengdi}, title = {Preacher: Paper-to-Video Agentic System}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17129-17139} }
NeuralSVG: An Implicit Representation for Text-to-Vector Generation: Sagi Polaczek,

Yuval Alaluf,

Elad Richardson,

Yael Vinker,

Daniel Cohen-Or; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Polaczek_2025_ICCV, author = {Polaczek, Sagi and Alaluf, Yuval and Richardson, Elad and Vinker, Yael and Cohen-Or, Daniel}, title = {NeuralSVG: An Implicit Representation for Text-to-Vector Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15458-15468} }
Can We Achieve Efficient Diffusion Without Self-Attention? Distilling Self-Attention into Convolutions: Ziyi Dong,

Chengxing Zhou,

Weijian Deng,

Pengxu Wei,

Xiangyang Ji,

Liang Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Ziyi and Zhou, Chengxing and Deng, Weijian and Wei, Pengxu and Ji, Xiangyang and Lin, Liang}, title = {Can We Achieve Efficient Diffusion Without Self-Attention? Distilling Self-Attention into Convolutions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17401-17410} }
SIMS: Simulating Stylized Human-Scene Interactions with Retrieval-Augmented Script Generation: Wenjia Wang,

Liang Pan,

Zhiyang Dou,

Jidong Mei,

Zhouyingcheng Liao,

Yuke Lou,

Yifan Wu,

Lei Yang,

Jingbo Wang,

Taku Komura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Wenjia and Pan, Liang and Dou, Zhiyang and Mei, Jidong and Liao, Zhouyingcheng and Lou, Yuke and Wu, Yifan and Yang, Lei and Wang, Jingbo and Komura, Taku}, title = {SIMS: Simulating Stylized Human-Scene Interactions with Retrieval-Augmented Script Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14117-14127} }
DuoCLR: Dual-Surrogate Contrastive Learning for Skeleton-based Human Action Segmentation: Haitao Tian; [pdf] [arXiv]
[bibtex]
@InProceedings{Tian_2025_ICCV, author = {Tian, Haitao}, title = {DuoCLR: Dual-Surrogate Contrastive Learning for Skeleton-based Human Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13772-13782} }
SAM Encoder Breach by Adversarial Simplicial Complex Triggers Downstream Model Failures: Yi Qin,

Rui Wang,

Tao Huang,

Tong Xiao,

Liping Jing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Yi and Wang, Rui and Huang, Tao and Xiao, Tong and Jing, Liping}, title = {SAM Encoder Breach by Adversarial Simplicial Complex Triggers Downstream Model Failures}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10624-10634} }
Tracing Copied Pixels and Regularizing Patch Affinity in Copy Detection: Yichen Lu,

Siwei Nie,

Minlong Lu,

Xudong Yang,

Xiaobo Zhang,

Peng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Yichen and Nie, Siwei and Lu, Minlong and Yang, Xudong and Zhang, Xiaobo and Zhang, Peng}, title = {Tracing Copied Pixels and Regularizing Patch Affinity in Copy Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19248-19257} }
Reflect-DiT: Inference-Time Scaling for Text-to-Image Diffusion Transformers via In-Context Reflection: Shufan Li,

Konstantinos Kallidromitis,

Akash Gokul,

Arsh Koneru,

Yusuke Kato,

Kazuki Kozuka,

Aditya Grover; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Shufan and Kallidromitis, Konstantinos and Gokul, Akash and Koneru, Arsh and Kato, Yusuke and Kozuka, Kazuki and Grover, Aditya}, title = {Reflect-DiT: Inference-Time Scaling for Text-to-Image Diffusion Transformers via In-Context Reflection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15657-15668} }
HAMoBE: Hierarchical and Adaptive Mixture of Biometric Experts for Video-based Person ReID: Yiyang Su,

Yunping Shi,

Feng Liu,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_ICCV, author = {Su, Yiyang and Shi, Yunping and Liu, Feng and Liu, Xiaoming}, title = {HAMoBE: Hierarchical and Adaptive Mixture of Biometric Experts for Video-based Person ReID}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11525-11536} }
Neighboring Autoregressive Modeling for Efficient Visual Generation: Yefei He,

Yuanyu He,

Shaoxuan He,

Feng Chen,

Hong Zhou,

Kaipeng Zhang,

Bohan Zhuang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Yefei and He, Yuanyu and He, Shaoxuan and Chen, Feng and Zhou, Hong and Zhang, Kaipeng and Zhuang, Bohan}, title = {Neighboring Autoregressive Modeling for Efficient Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19000-19010} }
MagicColor: Multi-Instance Sketch Colorization: Yinhan Zhang,

Yue Ma,

Bingyuan Wang,

Qifeng Chen,

Zeyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yinhan and Ma, Yue and Wang, Bingyuan and Chen, Qifeng and Wang, Zeyu}, title = {MagicColor: Multi-Instance Sketch Colorization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15205-15217} }
Im2Haircut: Single-view Strand-based Hair Reconstruction for Human Avatars: Vanessa Sklyarova,

Egor Zakharov,

Malte Prinzler,

Giorgio Becherini,

Michael J. Black,

Justus Thies; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sklyarova_2025_ICCV, author = {Sklyarova, Vanessa and Zakharov, Egor and Prinzler, Malte and Becherini, Giorgio and Black, Michael J. and Thies, Justus}, title = {Im2Haircut: Single-view Strand-based Hair Reconstruction for Human Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10656-10665} }
Progressive Growing of Video Tokenizers for Temporally Compact Latent Spaces: Aniruddha Mahapatra,

Long Mai,

David Bourgin,

Yitian Zhang,

Feng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mahapatra_2025_ICCV, author = {Mahapatra, Aniruddha and Mai, Long and Bourgin, David and Zhang, Yitian and Liu, Feng}, title = {Progressive Growing of Video Tokenizers for Temporally Compact Latent Spaces}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17629-17639} }
DreamRenderer: Taming Multi-Instance Attribute Control in Large-Scale Text-to-Image Models: Dewei Zhou,

Mingwei Li,

Zongxin Yang,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Dewei and Li, Mingwei and Yang, Zongxin and Yang, Yi}, title = {DreamRenderer: Taming Multi-Instance Attribute Control in Large-Scale Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16712-16722} }
TurboVSR: Fantastic Video Upscalers and Where to Find Them: Zhongdao Wang,

Guodongfang Zhao,

Jingjing Ren,

Bailan Feng,

Shifeng Zhang,

Wenbo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zhongdao and Zhao, Guodongfang and Ren, Jingjing and Feng, Bailan and Zhang, Shifeng and Li, Wenbo}, title = {TurboVSR: Fantastic Video Upscalers and Where to Find Them}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18132-18142} }
AffordDexGrasp: Open-set Language-guided Dexterous Grasp with Generalizable-Instructive Affordance: Yi-Lin Wei,

Mu Lin,

Yuhao Lin,

Jian-Jian Jiang,

Xiao-Ming Wu,

Ling-An Zeng,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Yi-Lin and Lin, Mu and Lin, Yuhao and Jiang, Jian-Jian and Wu, Xiao-Ming and Zeng, Ling-An and Zheng, Wei-Shi}, title = {AffordDexGrasp: Open-set Language-guided Dexterous Grasp with Generalizable-Instructive Affordance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11818-11828} }
NeurOp-Diff: Continuous Remote Sensing Image Super-Resolution via Neural Operator Diffusion: Zihao Xu,

Yuzhi Tang,

Bowen Xu,

Qingquan Li; [pdf]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Zihao and Tang, Yuzhi and Xu, Bowen and Li, Qingquan}, title = {NeurOp-Diff: Continuous Remote Sensing Image Super-Resolution via Neural Operator Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12491-12501} }
JailbreakDiffBench: A Comprehensive Benchmark for Jailbreaking Diffusion Models: Xiaolong Jin,

Zixuan Weng,

Hanxi Guo,

Chenlong Yin,

Siyuan Cheng,

Guangyu Shen,

Xiangyu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Xiaolong and Weng, Zixuan and Guo, Hanxi and Yin, Chenlong and Cheng, Siyuan and Shen, Guangyu and Zhang, Xiangyu}, title = {JailbreakDiffBench: A Comprehensive Benchmark for Jailbreaking Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16461-16471} }
InfiniteYou: Flexible Photo Recrafting While Preserving Your Identity: Liming Jiang,

Qing Yan,

Yumin Jia,

Zichuan Liu,

Hao Kang,

Xin Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Liming and Yan, Qing and Jia, Yumin and Liu, Zichuan and Kang, Hao and Lu, Xin}, title = {InfiniteYou: Flexible Photo Recrafting While Preserving Your Identity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10898-10907} }
Generative Adversarial Diffusion: U-Chae Jun,

Jaeeun Ko,

Jiwoo Kang; [pdf] [supp]
[bibtex]
@InProceedings{Jun_2025_ICCV, author = {Jun, U-Chae and Ko, Jaeeun and Kang, Jiwoo}, title = {Generative Adversarial Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16786-16796} }
Few-Shot Image Quality Assessment via Adaptation of Vision-Language Models: Xudong Li,

Zihao Huang,

Yan Zhang,

Yunhang Shen,

Ke Li,

Xiawu Zheng,

Liujuan Cao,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xudong and Huang, Zihao and Zhang, Yan and Shen, Yunhang and Li, Ke and Zheng, Xiawu and Cao, Liujuan and Ji, Rongrong}, title = {Few-Shot Image Quality Assessment via Adaptation of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10442-10452} }
UIP2P: Unsupervised Instruction-based Image Editing via Edit Reversibility Constraint: Enis Simsar,

Alessio Tonioni,

Yongqin Xian,

Thomas Hofmann,

Federico Tombari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Simsar_2025_ICCV, author = {Simsar, Enis and Tonioni, Alessio and Xian, Yongqin and Hofmann, Thomas and Tombari, Federico}, title = {UIP2P: Unsupervised Instruction-based Image Editing via Edit Reversibility Constraint}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18895-18905} }
Beyond Spatial Frequency: Pixel-wise Temporal Frequency-based Deepfake Video Detection: Taehoon Kim,

Jongwook Choi,

Yonghyun Jeong,

Haeun Noh,

Jaejun Yoo,

Seungryul Baek,

Jongwon Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Taehoon and Choi, Jongwook and Jeong, Yonghyun and Noh, Haeun and Yoo, Jaejun and Baek, Seungryul and Choi, Jongwon}, title = {Beyond Spatial Frequency: Pixel-wise Temporal Frequency-based Deepfake Video Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11198-11207} }
TemCoCo: Temporally Consistent Multi-modal Video Fusion with Visual-Semantic Collaboration: Meiqi Gong,

Hao Zhang,

Xunpeng Yi,

Linfeng Tang,

Jiayi Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2025_ICCV, author = {Gong, Meiqi and Zhang, Hao and Yi, Xunpeng and Tang, Linfeng and Ma, Jiayi}, title = {TemCoCo: Temporally Consistent Multi-modal Video Fusion with Visual-Semantic Collaboration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14326-14335} }
Fine-Tuning Visual Autogressive Models for Subject-Driven Generation: Jiwoo Chung,

Sangeek Hyun,

Hyunjun Kim,

Eunseo Koh,

MinKyu Lee,

Jae-Pil Heo; [pdf] [supp]
[bibtex]
@InProceedings{Chung_2025_ICCV, author = {Chung, Jiwoo and Hyun, Sangeek and Kim, Hyunjun and Koh, Eunseo and Lee, MinKyu and Heo, Jae-Pil}, title = {Fine-Tuning Visual Autogressive Models for Subject-Driven Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19174-19184} }
Accelerating Diffusion Transformer via Gradient-Optimized Cache: Junxiang Qiu,

Lin Liu,

Shuo Wang,

Jinda Lu,

Kezhou Chen,

Yanbin Hao; [pdf] [arXiv]
[bibtex]
@InProceedings{Qiu_2025_ICCV, author = {Qiu, Junxiang and Liu, Lin and Wang, Shuo and Lu, Jinda and Chen, Kezhou and Hao, Yanbin}, title = {Accelerating Diffusion Transformer via Gradient-Optimized Cache}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17608-17617} }
Face Retouching with Diffusion Data Generation and Spectral Restorement: Zhidan Xu,

Xiaoqin Zhang,

Shijian Lu; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Zhidan and Zhang, Xiaoqin and Lu, Shijian}, title = {Face Retouching with Diffusion Data Generation and Spectral Restorement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14722-14731} }
Beyond Blur: A Fluid Perspective on Generative Diffusion Models: Grzegorz Gruszczynski,

Jakub Meixner,

Michal Wlodarczyk,

Przemyslaw Musialski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gruszczynski_2025_ICCV, author = {Gruszczynski, Grzegorz and Meixner, Jakub and Wlodarczyk, Michal and Musialski, Przemyslaw}, title = {Beyond Blur: A Fluid Perspective on Generative Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17818-17827} }
CanonSwap: High-Fidelity and Consistent Video Face Swapping via Canonical Space Modulation: Xiangyang Luo,

Ye Zhu,

Yunfei Liu,

Lijian Lin,

Cong Wan,

Zijian Cai,

Yu Li,

Shao-Lun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Xiangyang and Zhu, Ye and Liu, Yunfei and Lin, Lijian and Wan, Cong and Cai, Zijian and Li, Yu and Huang, Shao-Lun}, title = {CanonSwap: High-Fidelity and Consistent Video Face Swapping via Canonical Space Modulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10064-10074} }
Perceiving and Acting in First-Person: A Dataset and Benchmark for Egocentric Human-Object-Human Interactions: Liang Xu,

Chengqun Yang,

Zili Lin,

Fei Xu,

Yifan Liu,

Congsheng Xu,

Yiyi Zhang,

Jie Qin,

Xingdong Sheng,

Yunhui Liu,

Xin Jin,

Yichao Yan,

Wenjun Zeng,

Xiaokang Yang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Liang and Yang, Chengqun and Lin, Zili and Xu, Fei and Liu, Yifan and Xu, Congsheng and Zhang, Yiyi and Qin, Jie and Sheng, Xingdong and Liu, Yunhui and Jin, Xin and Yan, Yichao and Zeng, Wenjun and Yang, Xiaokang}, title = {Perceiving and Acting in First-Person: A Dataset and Benchmark for Egocentric Human-Object-Human Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12535-12548} }
G-DexGrasp: Generalizable Dexterous Grasping Synthesis Via Part-Aware Prior Retrieval and Prior-Assisted Generation: Juntao Jian,

Xiuping Liu,

Zixuan Chen,

Manyi Li,

Jian Liu,

Ruizhen Hu; [pdf] [supp]
[bibtex]
@InProceedings{Jian_2025_ICCV, author = {Jian, Juntao and Liu, Xiuping and Chen, Zixuan and Li, Manyi and Liu, Jian and Hu, Ruizhen}, title = {G-DexGrasp: Generalizable Dexterous Grasping Synthesis Via Part-Aware Prior Retrieval and Prior-Assisted Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11447-11457} }
Autoregressive Denoising Score Matching is a Good Video Anomaly Detector: Hanwen Zhang,

Congqi Cao,

Qinyi Lv,

Lingtong Min,

Yanning Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hanwen and Cao, Congqi and Lv, Qinyi and Min, Lingtong and Zhang, Yanning}, title = {Autoregressive Denoising Score Matching is a Good Video Anomaly Detector}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12057-12067} }
ConceptSplit: Decoupled Multi-Concept Personalization of Diffusion Models via Token-wise Adaptation and Attention Disentanglement: Habin Lim,

Yeongseob Won,

Juwon Seo,

Gyeong-Moon Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lim_2025_ICCV, author = {Lim, Habin and Won, Yeongseob and Seo, Juwon and Park, Gyeong-Moon}, title = {ConceptSplit: Decoupled Multi-Concept Personalization of Diffusion Models via Token-wise Adaptation and Attention Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18421-18430} }
Consistency Trajectory Matching for One-Step Generative Super-Resolution: Weiyi You,

Mingyang Zhang,

Leheng Zhang,

Xingyu Zhou,

Kexuan Shi,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2025_ICCV, author = {You, Weiyi and Zhang, Mingyang and Zhang, Leheng and Zhou, Xingyu and Shi, Kexuan and Gu, Shuhang}, title = {Consistency Trajectory Matching for One-Step Generative Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12747-12756} }
SpectralAR: Spectral Autoregressive Visual Generation: Yuanhui Huang,

Weiliang Chen,

Wenzhao Zheng,

Yueqi Duan,

Jie Zhou,

Jiwen Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Yuanhui and Chen, Weiliang and Zheng, Wenzhao and Duan, Yueqi and Zhou, Jie and Lu, Jiwen}, title = {SpectralAR: Spectral Autoregressive Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15842-15852} }
Punching Bag vs. Punching Person: Motion Transferability in Videos: Raiyaan Abdullah,

Jared Claypoole,

Michael Cogswell,

Ajay Divakaran,

Yogesh Rawat; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Abdullah_2025_ICCV, author = {Abdullah, Raiyaan and Claypoole, Jared and Cogswell, Michael and Divakaran, Ajay and Rawat, Yogesh}, title = {Punching Bag vs. Punching Person: Motion Transferability in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11348-11358} }
LOTA: Bit-Planes Guided AI-Generated Image Detection: Hongsong Wang,

Renxi Cheng,

Yang Zhang,

Chaolei Han,

Jie Gui; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Hongsong and Cheng, Renxi and Zhang, Yang and Han, Chaolei and Gui, Jie}, title = {LOTA: Bit-Planes Guided AI-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17246-17255} }
Supercharged One-step Text-to-Image Diffusion Models with Negative Prompts: Viet Nguyen,

Anh Nguyen,

Trung Dao,

Khoi Nguyen,

Cuong Pham,

Toan Tran,

Anh Tran; [pdf] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Viet and Nguyen, Anh and Dao, Trung and Nguyen, Khoi and Pham, Cuong and Tran, Toan and Tran, Anh}, title = {Supercharged One-step Text-to-Image Diffusion Models with Negative Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18004-18013} }
IMG: Calibrating Diffusion Models via Implicit Multimodal Guidance: Jiayi Guo,

Chuanhao Yan,

Xingqian Xu,

Yulin Wang,

Kai Wang,

Gao Huang,

Humphrey Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Jiayi and Yan, Chuanhao and Xu, Xingqian and Wang, Yulin and Wang, Kai and Huang, Gao and Shi, Humphrey}, title = {IMG: Calibrating Diffusion Models via Implicit Multimodal Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16079-16089} }
PUMPS: Skeleton-Agnostic Point-based Universal Motion Pre-Training for Synthesis in Human Motion Tasks: Clinton Ansun Mo,

Kun Hu,

Chengjiang Long,

Dong Yuan,

Wan-Chi Siu,

Zhiyong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mo_2025_ICCV, author = {Mo, Clinton Ansun and Hu, Kun and Long, Chengjiang and Yuan, Dong and Siu, Wan-Chi and Wang, Zhiyong}, title = {PUMPS: Skeleton-Agnostic Point-based Universal Motion Pre-Training for Synthesis in Human Motion Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14496-14506} }
When and Where do Data Poisons Attack Textual Inversion?: Jeremy Styborski,

Mingzhi Lyu,

Jiayou Lu,

Nupur Kapur,

Adams Wai-Kin Kong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Styborski_2025_ICCV, author = {Styborski, Jeremy and Lyu, Mingzhi and Lu, Jiayou and Kapur, Nupur and Kong, Adams Wai-Kin}, title = {When and Where do Data Poisons Attack Textual Inversion?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19439-19449} }
RAGD: Regional-Aware Diffusion Model for Text-to-Image Generation: Zhennan Chen,

Yajie Li,

Haofan Wang,

Zhibo Chen,

Zhengkai Jiang,

Jun Li,

Qian Wang,

Jian Yang,

Ying Tai; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Zhennan and Li, Yajie and Wang, Haofan and Chen, Zhibo and Jiang, Zhengkai and Li, Jun and Wang, Qian and Yang, Jian and Tai, Ying}, title = {RAGD: Regional-Aware Diffusion Model for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19331-19341} }
From Prompt to Progression: Taming Video Diffusion Models for Seamless Attribute Transition: Ling Lo,

Kelvin C.K. Chan,

Wen-Huang Cheng,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lo_2025_ICCV, author = {Lo, Ling and Chan, Kelvin C.K. and Cheng, Wen-Huang and Yang, Ming-Hsuan}, title = {From Prompt to Progression: Taming Video Diffusion Models for Seamless Attribute Transition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18651-18660} }
GlassWizard: Harvesting Diffusion Priors for Glass Surface Detection: Wenxue Li,

Tian Ye,

Xinyu Xiong,

Jinbin Bai,

Feilong Tang,

Wenxuan Song,

Zhaohu Xing,

Lie Ju,

Guanbin Li,

Lei Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Wenxue and Ye, Tian and Xiong, Xinyu and Bai, Jinbin and Tang, Feilong and Song, Wenxuan and Xing, Zhaohu and Ju, Lie and Li, Guanbin and Zhu, Lei}, title = {GlassWizard: Harvesting Diffusion Priors for Glass Surface Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17848-17858} }
Cross-Subject Mind Decoding from Inaccurate Representations: Yangyang Xu,

Bangzhen Liu,

Wenqi Shao,

Yong Du,

Shengfeng He,

Tingting Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Yangyang and Liu, Bangzhen and Shao, Wenqi and Du, Yong and He, Shengfeng and Zhu, Tingting}, title = {Cross-Subject Mind Decoding from Inaccurate Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15066-15075} }
ImageGen-CoT: Enhancing Text-to-Image In-context Learning with Chain-of-Thought Reasoning: Jiaqi Liao,

Zhengyuan Yang,

Linjie Li,

Dianqi Li,

Kevin Lin,

Yu Cheng,

Lijuan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Jiaqi and Yang, Zhengyuan and Li, Linjie and Li, Dianqi and Lin, Kevin and Cheng, Yu and Wang, Lijuan}, title = {ImageGen-CoT: Enhancing Text-to-Image In-context Learning with Chain-of-Thought Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17214-17223} }
Auto-Regressive Transformation for Image Alignment: Kanggeon Lee,

Soochahn Lee,

Kyoung Mu Lee; [pdf] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Kanggeon and Lee, Soochahn and Lee, Kyoung Mu}, title = {Auto-Regressive Transformation for Image Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13569-13579} }
Stable Virtual Camera: Generative View Synthesis with Diffusion Models: Jensen Zhou,

Hang Gao,

Vikram Voleti,

Aaryaman Vasishta,

Chun-Han Yao,

Mark Boss,

Philip Torr,

Christian Rupprecht,

Varun Jampani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Jensen and Gao, Hang and Voleti, Vikram and Vasishta, Aaryaman and Yao, Chun-Han and Boss, Mark and Torr, Philip and Rupprecht, Christian and Jampani, Varun}, title = {Stable Virtual Camera: Generative View Synthesis with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12405-12414} }
Edicho: Consistent Image Editing in the Wild: Qingyan Bai,

Hao Ouyang,

Yinghao Xu,

Qiuyu Wang,

Ceyuan Yang,

Ka Leong Cheng,

Yujun Shen,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2025_ICCV, author = {Bai, Qingyan and Ouyang, Hao and Xu, Yinghao and Wang, Qiuyu and Yang, Ceyuan and Cheng, Ka Leong and Shen, Yujun and Chen, Qifeng}, title = {Edicho: Consistent Image Editing in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15277-15287} }
T2Bs: Text-to-Character Blendshapes via Video Generation: Jiahao Luo,

Chaoyang Wang,

Michael Vasilkovsky,

Vladislav Shakhrai,

Di Liu,

Peiye Zhuang,

Sergey Tulyakov,

Peter Wonka,

Hsin-Ying Lee,

James Davis,

Jian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Jiahao and Wang, Chaoyang and Vasilkovsky, Michael and Shakhrai, Vladislav and Liu, Di and Zhuang, Peiye and Tulyakov, Sergey and Wonka, Peter and Lee, Hsin-Ying and Davis, James and Wang, Jian}, title = {T2Bs: Text-to-Character Blendshapes via Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13625-13637} }
QuEST: Low-bit Diffusion Model Quantization via Efficient Selective Finetuning: Haoxuan Wang,

Yuzhang Shang,

Zhihang Yuan,

Junyi Wu,

Junchi Yan,

Yan Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haoxuan and Shang, Yuzhang and Yuan, Zhihang and Wu, Junyi and Yan, Junchi and Yan, Yan}, title = {QuEST: Low-bit Diffusion Model Quantization via Efficient Selective Finetuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15542-15551} }
Shot-by-Shot: Film-Grammar-Aware Training-Free Audio Description Generation: Junyu Xie,

Tengda Han,

Max Bain,

Arsha Nagrani,

Eshika Khandelwal,

Gül Varol,

Weidi Xie,

Andrew Zisserman; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Junyu and Han, Tengda and Bain, Max and Nagrani, Arsha and Khandelwal, Eshika and Varol, G\"ul and Xie, Weidi and Zisserman, Andrew}, title = {Shot-by-Shot: Film-Grammar-Aware Training-Free Audio Description Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16503-16513} }
IGD: Instructional Graphic Design with Multimodal Layer Generation: Yadong Qu,

Shancheng Fang,

Yuxin Wang,

Xiaorui Wang,

Zhineng Chen,

Hongtao Xie,

Yongdong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_ICCV, author = {Qu, Yadong and Fang, Shancheng and Wang, Yuxin and Wang, Xiaorui and Chen, Zhineng and Xie, Hongtao and Zhang, Yongdong}, title = {IGD: Instructional Graphic Design with Multimodal Layer Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18218-18228} }
Instruction-based Image Editing with Planning, Reasoning, and Generation: Liya Ji,

Chenyang Qi,

Qifeng Chen; [pdf]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Liya and Qi, Chenyang and Chen, Qifeng}, title = {Instruction-based Image Editing with Planning, Reasoning, and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17506-17515} }
Enhancing Mamba Decoder with Bidirectional Interaction in Multi-Task Dense Prediction: Mang Cao,

Sanping Zhou,

Yizhe Li,

Ye Deng,

Wenli Huang,

Le Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Mang and Zhou, Sanping and Li, Yizhe and Deng, Ye and Huang, Wenli and Wang, Le}, title = {Enhancing Mamba Decoder with Bidirectional Interaction in Multi-Task Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18815-18824} }
FairHuman: Boosting Hand and Face Quality in Human Image Generation with Minimum Potential Delay Fairness in Diffusion Models: Yuxuan Wang,

Tianwei Cao,

Huayu Zhang,

Zhongjiang He,

Kongming Liang,

Zhanyu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuxuan and Cao, Tianwei and Zhang, Huayu and He, Zhongjiang and Liang, Kongming and Ma, Zhanyu}, title = {FairHuman: Boosting Hand and Face Quality in Human Image Generation with Minimum Potential Delay Fairness in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17046-17055} }
CODA: Repurposing Continuous VAEs for Discrete Tokenization: Zeyu Liu,

Zanlin Ni,

Yeguo Hua,

Xin Deng,

Xiao Ma,

Cheng Zhong,

Gao Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Zeyu and Ni, Zanlin and Hua, Yeguo and Deng, Xin and Ma, Xiao and Zhong, Cheng and Huang, Gao}, title = {CODA: Repurposing Continuous VAEs for Discrete Tokenization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18906-18916} }
GeoAvatar: Adaptive Geometrical Gaussian Splatting for 3D Head Avatar: SeungJun Moon,

Hah Min Lew,

Seungeun Lee,

Ji-Su Kang,

Gyeong-Moon Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2025_ICCV, author = {Moon, SeungJun and Lew, Hah Min and Lee, Seungeun and Kang, Ji-Su and Park, Gyeong-Moon}, title = {GeoAvatar: Adaptive Geometrical Gaussian Splatting for 3D Head Avatar}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12811-12821} }
Group-wise Scaling and Orthogonal Decomposition for Domain-Invariant Feature Extraction in Face Anti-Spoofing: Seungjin Jung,

Kanghee Lee,

Yonghyun Jeong,

Haeun Noh,

Jungmin Lee,

Jongwon Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_ICCV, author = {Jung, Seungjin and Lee, Kanghee and Jeong, Yonghyun and Noh, Haeun and Lee, Jungmin and Choi, Jongwon}, title = {Group-wise Scaling and Orthogonal Decomposition for Domain-Invariant Feature Extraction in Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13372-13381} }
SEGA: A Stepwise Evolution Paradigm for Content-Aware Layout Generation with Design Prior: Haoran Wang,

Bo Zhao,

Jinghui Wang,

Hanzhang Wang,

Huan Yang,

Wei Ji,

Hao Liu,

Xinyan Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haoran and Zhao, Bo and Wang, Jinghui and Wang, Hanzhang and Yang, Huan and Ji, Wei and Liu, Hao and Xiao, Xinyan}, title = {SEGA: A Stepwise Evolution Paradigm for Content-Aware Layout Generation with Design Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19321-19330} }
Modeling Human Gaze Behavior with Diffusion Models for Unified Scanpath Prediction: Giuseppe Cartella,

Vittorio Cuculo,

Alessandro D'Amelio,

Marcella Cornia,

Giuseppe Boccignone,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cartella_2025_ICCV, author = {Cartella, Giuseppe and Cuculo, Vittorio and D'Amelio, Alessandro and Cornia, Marcella and Boccignone, Giuseppe and Cucchiara, Rita}, title = {Modeling Human Gaze Behavior with Diffusion Models for Unified Scanpath Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16206-16216} }
Efficient Multi-Person Motion Prediction by Lightweight Spatial and Temporal Interactions: Yuanhong Zheng,

Ruixuan Yu,

Jian Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Yuanhong and Yu, Ruixuan and Sun, Jian}, title = {Efficient Multi-Person Motion Prediction by Lightweight Spatial and Temporal Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10844-10853} }
I2V3D: Controllable Image-to-video Generation with 3D Guidance: Zhiyuan Zhang,

Dongdong Chen,

Jing Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zhiyuan and Chen, Dongdong and Liao, Jing}, title = {I2V3D: Controllable Image-to-video Generation with 3D Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13360-13371} }
TAG-WM: Tamper-Aware Generative Image Watermarking via Diffusion Inversion Sensitivity: Yuzhuo Chen,

Zehua Ma,

Han Fang,

Weiming Zhang,

Nenghai Yu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yuzhuo and Ma, Zehua and Fang, Han and Zhang, Weiming and Yu, Nenghai}, title = {TAG-WM: Tamper-Aware Generative Image Watermarking via Diffusion Inversion Sensitivity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16723-16732} }
IMoRe: Implicit Program-Guided Reasoning for Human Motion Q&A: Chen Li,

Chinthani Sugandhika,

Yeo Keat Ee,

Eric Peh,

Hao Zhang,

Hong Yang,

Deepu Rajan,

Basura Fernando; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Chen and Sugandhika, Chinthani and Ee, Yeo Keat and Peh, Eric and Zhang, Hao and Yang, Hong and Rajan, Deepu and Fernando, Basura}, title = {IMoRe: Implicit Program-Guided Reasoning for Human Motion Q\&A}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12987-12996} }
Fine-Grained 3D Gaussian Head Avatars Modeling from Static Captures via Joint Reconstruction and Registration: Yuan Sun,

Xuan Wang,

Cong Wang,

WeiLi Zhang,

Yanbo Fan,

Yu Guo,

Fei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Yuan and Wang, Xuan and Wang, Cong and Zhang, WeiLi and Fan, Yanbo and Guo, Yu and Wang, Fei}, title = {Fine-Grained 3D Gaussian Head Avatars Modeling from Static Captures via Joint Reconstruction and Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14293-14304} }
DynamicFace: High-Quality and Consistent Face Swapping for Image and Video using Composable 3D Facial Priors: Runqi Wang,

Yang Chen,

Sijie Xu,

Tianyao He,

Wei Zhu,

Dejia Song,

Nemo Chen,

Xu Tang,

Yao Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Runqi and Chen, Yang and Xu, Sijie and He, Tianyao and Zhu, Wei and Song, Dejia and Chen, Nemo and Tang, Xu and Hu, Yao}, title = {DynamicFace: High-Quality and Consistent Face Swapping for Image and Video using Composable 3D Facial Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13438-13447} }
Recognizing Actions from Robotic View for Natural Human-Robot Interaction: Ziyi Wang,

Peiming Li,

Hong Liu,

Zhichao Deng,

Can Wang,

Jun Liu,

Junsong Yuan,

Mengyuan Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ziyi and Li, Peiming and Liu, Hong and Deng, Zhichao and Wang, Can and Liu, Jun and Yuan, Junsong and Liu, Mengyuan}, title = {Recognizing Actions from Robotic View for Natural Human-Robot Interaction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14218-14227} }
SAGI: Semantically Aligned and Uncertainty Guided AI Image Inpainting: Paschalis Giakoumoglou,

Dimitrios Karageorgiou,

Symeon Papadopoulos,

Panagiotis C. Petrantonakis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Giakoumoglou_2025_ICCV, author = {Giakoumoglou, Paschalis and Karageorgiou, Dimitrios and Papadopoulos, Symeon and Petrantonakis, Panagiotis C.}, title = {SAGI: Semantically Aligned and Uncertainty Guided AI Image Inpainting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16090-16101} }
RAGDiffusion: Faithful Cloth Generation via External Knowledge Assimilation: Yuhan Li,

Xianfeng Tan,

Wenxiang Shang,

Yubo Wu,

Jian Wang,

Xuanhong Chen,

Yi Zhang,

Hangcheng Zhu,

Bingbing Ni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yuhan and Tan, Xianfeng and Shang, Wenxiang and Wu, Yubo and Wang, Jian and Chen, Xuanhong and Zhang, Yi and Zhu, Hangcheng and Ni, Bingbing}, title = {RAGDiffusion: Faithful Cloth Generation via External Knowledge Assimilation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17485-17495} }
Dynamic Typography: Bringing Text to Life via Video Diffusion Prior: Zichen Liu,

Yihao Meng,

Hao Ouyang,

Yue Yu,

Bolin Zhao,

Daniel Cohen-Or,

Huamin Qu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Zichen and Meng, Yihao and Ouyang, Hao and Yu, Yue and Zhao, Bolin and Cohen-Or, Daniel and Qu, Huamin}, title = {Dynamic Typography: Bringing Text to Life via Video Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14787-14797} }
Occlusion-robust Stylization for Drawing-based 3D Animation: Sunjae Yoon,

Gwanhyeong Koo,

Younghwan Lee,

Ji Woo Hong,

Chang D. Yoo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoon_2025_ICCV, author = {Yoon, Sunjae and Koo, Gwanhyeong and Lee, Younghwan and Hong, Ji Woo and Yoo, Chang D.}, title = {Occlusion-robust Stylization for Drawing-based 3D Animation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12263-12273} }
Augmented and Softened Matching for Unsupervised Visible-Infrared Person Re-Identification: Zhiqi Pang,

Chunyu Wang,

Lingling Zhao,

Junjie Wang; [pdf] [supp]
[bibtex]
@InProceedings{Pang_2025_ICCV, author = {Pang, Zhiqi and Wang, Chunyu and Zhao, Lingling and Wang, Junjie}, title = {Augmented and Softened Matching for Unsupervised Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11100-11109} }
StableCodec: Taming One-Step Diffusion for Extreme Image Compression: Tianyu Zhang,

Xin Luo,

Li Li,

Dong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Tianyu and Luo, Xin and Li, Li and Liu, Dong}, title = {StableCodec: Taming One-Step Diffusion for Extreme Image Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17379-17389} }
MVTrajecter: Multi-View Pedestrian Tracking with Trajectory Motion Cost and Trajectory Appearance Cost: Taiga Yamane,

Ryo Masumura,

Satoshi Suzuki,

Shota Orihashi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yamane_2025_ICCV, author = {Yamane, Taiga and Masumura, Ryo and Suzuki, Satoshi and Orihashi, Shota}, title = {MVTrajecter: Multi-View Pedestrian Tracking with Trajectory Motion Cost and Trajectory Appearance Cost}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13270-13280} }
Learning Efficient and Generalizable Human Representation with Human Gaussian Model: Yifan Liu,

Shengjun Zhang,

Chensheng Dai,

Yang Chen,

Hao Liu,

Chen Li,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yifan and Zhang, Shengjun and Dai, Chensheng and Chen, Yang and Liu, Hao and Li, Chen and Duan, Yueqi}, title = {Learning Efficient and Generalizable Human Representation with Human Gaussian Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11797-11806} }
DexH2R: A Benchmark for Dynamic Dexterous Grasping in Human-to-Robot Handover: Youzhuo Wang,

Jiayi Ye,

Chuyang Xiao,

Yiming Zhong,

Heng Tao,

Hang Yu,

Yumeng Liu,

Jingyi Yu,

Yuexin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Youzhuo and Ye, Jiayi and Xiao, Chuyang and Zhong, Yiming and Tao, Heng and Yu, Hang and Liu, Yumeng and Yu, Jingyi and Ma, Yuexin}, title = {DexH2R: A Benchmark for Dynamic Dexterous Grasping in Human-to-Robot Handover}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12702-12712} }
Mobile Video Diffusion: Haitam Ben Yahia,

Denis Korzhenkov,

Ioannis Lelekas,

Amir Ghodrati,

Amirhossein Habibian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ben_Yahia_2025_ICCV, author = {Ben Yahia, Haitam and Korzhenkov, Denis and Lelekas, Ioannis and Ghodrati, Amir and Habibian, Amirhossein}, title = {Mobile Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19450-19460} }
Towards Human-like Virtual Beings: Simulating Human Behavior in 3D Scenes: Chen Liang,

Wenguan Wang,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Chen and Wang, Wenguan and Yang, Yi}, title = {Towards Human-like Virtual Beings: Simulating Human Behavior in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10753-10763} }
HiGarment: Cross-modal Harmony Based Diffusion Model for Flat Sketch to Realistic Garment Image: Junyi Guo,

Jingxuan Zhang,

Fangyu Wu,

Huanda Lu,

Qiufeng Wang,

Wenmian Yang,

Eng Gee Lim,

Dongming Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Junyi and Zhang, Jingxuan and Wu, Fangyu and Lu, Huanda and Wang, Qiufeng and Yang, Wenmian and Lim, Eng Gee and Lu, Dongming}, title = {HiGarment: Cross-modal Harmony Based Diffusion Model for Flat Sketch to Realistic Garment Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18542-18551} }
Scale Your Instructions: Enhance the Instruction-Following Fidelity of Unified Image Generation Model by Self-Adaptive Attention Scaling: Chao Zhou,

Tianyi Wei,

Nenghai Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Chao and Wei, Tianyi and Yu, Nenghai}, title = {Scale Your Instructions: Enhance the Instruction-Following Fidelity of Unified Image Generation Model by Self-Adaptive Attention Scaling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15171-15181} }
From Enhancement to Understanding: Build a Generalized Bridge for Low-light Vision via Semantically Consistent Unsupervised Fine-tuning: Sen Wang,

Shao Zeng,

Tianjun Gu,

Zhizhong Zhang,

Ruixin Zhang,

Shouhong Ding,

Jingyun Zhang,

Jun Wang,

Xin Tan,

Yuan Xie,

Lizhuang Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Sen and Zeng, Shao and Gu, Tianjun and Zhang, Zhizhong and Zhang, Ruixin and Ding, Shouhong and Zhang, Jingyun and Wang, Jun and Tan, Xin and Xie, Yuan and Ma, Lizhuang}, title = {From Enhancement to Understanding: Build a Generalized Bridge for Low-light Vision via Semantically Consistent Unsupervised Fine-tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13804-13814} }
Translation of Text Embedding via Delta Vector to Suppress Strongly Entangled Content in Text-to-Image Diffusion Models: Eunseo Koh,

Seunghoo Hong,

Tae-Young Kim,

Simon S. Woo,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koh_2025_ICCV, author = {Koh, Eunseo and Hong, Seunghoo and Kim, Tae-Young and Woo, Simon S. and Heo, Jae-Pil}, title = {Translation of Text Embedding via Delta Vector to Suppress Strongly Entangled Content in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15365-15374} }
Training-free Geometric Image Editing on Diffusion Models: Hanshen Zhu,

Zhen Zhu,

Kaile Zhang,

Yiming Gong,

Yuliang Liu,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Hanshen and Zhu, Zhen and Zhang, Kaile and Gong, Yiming and Liu, Yuliang and Bai, Xiang}, title = {Training-free Geometric Image Editing on Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19130-19140} }
Harmonizing Visual Representations for Unified Multimodal Understanding and Generation: Size Wu,

Wenwei Zhang,

Lumin Xu,

Sheng Jin,

Zhonghua Wu,

Qingyi Tao,

Wentao Liu,

Wei Li,

Chen Change Loy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Size and Zhang, Wenwei and Xu, Lumin and Jin, Sheng and Wu, Zhonghua and Tao, Qingyi and Liu, Wentao and Li, Wei and Loy, Chen Change}, title = {Harmonizing Visual Representations for Unified Multimodal Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17739-17750} }
Enpowering Your Pansharpening Models with Generalizability: Unified Distribution is All You Need: Yongchuan Cui,

Peng Liu,

Hui Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Cui_2025_ICCV, author = {Cui, Yongchuan and Liu, Peng and Zhang, Hui}, title = {Enpowering Your Pansharpening Models with Generalizability: Unified Distribution is All You Need}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11850-11860} }
KV-Edit: Training-Free Image Editing for Precise Background Preservation: Tianrui Zhu,

Shiyi Zhang,

Jiawei Shao,

Yansong Tang; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Tianrui and Zhang, Shiyi and Shao, Jiawei and Tang, Yansong}, title = {KV-Edit: Training-Free Image Editing for Precise Background Preservation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16607-16617} }
Structure Matters: Revisiting Boundary Refinement in Video Object Segmentation: Guanyi Qin,

Ziyue Wang,

Daiyun Shen,

Haofeng Liu,

Hantao Zhou,

Junde Wu,

Runze Hu,

Yueming Jin; [pdf] [arXiv]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Guanyi and Wang, Ziyue and Shen, Daiyun and Liu, Haofeng and Zhou, Hantao and Wu, Junde and Hu, Runze and Jin, Yueming}, title = {Structure Matters: Revisiting Boundary Refinement in Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14431-14442} }
Hate in Plain Sight: On the Risks of Moderating AI-Generated Hateful Illusions: Yiting Qu,

Ziqing Yang,

Yihan Ma,

Michael Backes,

Savvas Zannettou,

Yang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_ICCV, author = {Qu, Yiting and Yang, Ziqing and Ma, Yihan and Backes, Michael and Zannettou, Savvas and Zhang, Yang}, title = {Hate in Plain Sight: On the Risks of Moderating AI-Generated Hateful Illusions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19617-19627} }
Steering Guidance for Personalized Text-to-Image Diffusion Models: Sunghyun Park,

Seokeon Choi,

Hyoungwoo Park,

Sungrack Yun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Sunghyun and Choi, Seokeon and Park, Hyoungwoo and Yun, Sungrack}, title = {Steering Guidance for Personalized Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15907-15916} }
Improving Rectified Flow with Boundary Conditions: Xixi Hu,

Runlong Liao,

Keyang Xu,

Bo Liu,

Yeqing Li,

Eugene Ie,

Hongliang Fei,

Qiang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Xixi and Liao, Runlong and Xu, Keyang and Liu, Bo and Li, Yeqing and Ie, Eugene and Fei, Hongliang and Liu, Qiang}, title = {Improving Rectified Flow with Boundary Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18177-18186} }
Multimodal Prompt Alignment for Facial Expression Recognition: Fuyan Ma,

Yiran He,

Bin Sun,

Shutao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Fuyan and He, Yiran and Sun, Bin and Li, Shutao}, title = {Multimodal Prompt Alignment for Facial Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12581-12591} }
Laboring on less labors: RPCA Paradigm for Pan-sharpening: Honghui Xu,

Chuangjie Fang,

Yibin Wang,

Jie Wu,

Jianwei Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Honghui and Fang, Chuangjie and Wang, Yibin and Wu, Jie and Zheng, Jianwei}, title = {Laboring on less labors: RPCA Paradigm for Pan-sharpening}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11393-11402} }
MVQA: Mamba with Unified Sampling for Efficient Video Quality Assessment: Yachun Mi,

Yu Li,

Weicheng Meng,

Chaofeng Chen,

Chen Hui,

Shaohui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mi_2025_ICCV, author = {Mi, Yachun and Li, Yu and Meng, Weicheng and Chen, Chaofeng and Hui, Chen and Liu, Shaohui}, title = {MVQA: Mamba with Unified Sampling for Efficient Video Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18498-18509} }
AV-Flow: Transforming Text to Audio-Visual Human-like Interactions: Aggelina Chatziagapi,

Louis-Philippe Morency,

Hongyu Gong,

Michael Zollhöfer,

Dimitris Samaras,

Alexander Richard; [pdf] [supp]
[bibtex]
@InProceedings{Chatziagapi_2025_ICCV, author = {Chatziagapi, Aggelina and Morency, Louis-Philippe and Gong, Hongyu and Zollh\"ofer, Michael and Samaras, Dimitris and Richard, Alexander}, title = {AV-Flow: Transforming Text to Audio-Visual Human-like Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14270-14282} }
What we need is explicit controllability: Training 3D gaze estimator using only facial images: Tingwei Li,

Jun Bao,

Zhenzhong Kuang,

Buyu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Tingwei and Bao, Jun and Kuang, Zhenzhong and Liu, Buyu}, title = {What we need is explicit controllability: Training 3D gaze estimator using only facial images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11414-11424} }
WaveMamba: Wavelet-Driven Mamba Fusion for RGB-Infrared Object Detection: Haodong Zhu,

Wenhao Dong,

Linlin Yang,

Hong Li,

Yuguang Yang,

Yangyang Ren,

Qingcheng Zhu,

Zichao Feng,

Changbai Li,

Shaohui Lin,

Runqi Wang,

Xiaoyan Luo,

Baochang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Haodong and Dong, Wenhao and Yang, Linlin and Li, Hong and Yang, Yuguang and Ren, Yangyang and Zhu, Qingcheng and Feng, Zichao and Li, Changbai and Lin, Shaohui and Wang, Runqi and Luo, Xiaoyan and Zhang, Baochang}, title = {WaveMamba: Wavelet-Driven Mamba Fusion for RGB-Infrared Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11219-11229} }
AU-Blendshape for Fine-grained Stylized 3D Facial Expression Manipulation: Hao Li,

Ju Dai,

Feng Zhou,

Kaida Ning,

Lei Li,

Junjun Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Hao and Dai, Ju and Zhou, Feng and Ning, Kaida and Li, Lei and Pan, Junjun}, title = {AU-Blendshape for Fine-grained Stylized 3D Facial Expression Manipulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12605-12614} }
Rethinking Discrete Tokens: Treating Them as Conditions for Continuous Autoregressive Image Synthesis: Peng Zheng,

Junke Wang,

Yi Chang,

Yizhou Yu,

Rui Ma,

Zuxuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Peng and Wang, Junke and Chang, Yi and Yu, Yizhou and Ma, Rui and Wu, Zuxuan}, title = {Rethinking Discrete Tokens: Treating Them as Conditions for Continuous Autoregressive Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17390-17400} }
MotionLab: Unified Human Motion Generation and Editing via the Motion-Condition-Motion Paradigm: Ziyan Guo,

Zeyu Hu,

De Wen Soh,

Na Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Ziyan and Hu, Zeyu and Soh, De Wen and Zhao, Na}, title = {MotionLab: Unified Human Motion Generation and Editing via the Motion-Condition-Motion Paradigm}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13869-13879} }
DisCoRD: Discrete Tokens to Continuous Motion via Rectified Flow Decoding: Jungbin Cho,

Junwan Kim,

Jisoo Kim,

Minseo Kim,

Mingu Kang,

Sungeun Hong,

Tae-Hyun Oh,

Youngjae Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cho_2025_ICCV, author = {Cho, Jungbin and Kim, Junwan and Kim, Jisoo and Kim, Minseo and Kang, Mingu and Hong, Sungeun and Oh, Tae-Hyun and Yu, Youngjae}, title = {DisCoRD: Discrete Tokens to Continuous Motion via Rectified Flow Decoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14602-14612} }
TREAD: Token Routing for Efficient Architecture-agnostic Diffusion Training: Felix Krause,

Timy Phan,

Ming Gui,

Stefan Andreas Baumann,

Vincent Tao Hu,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Krause_2025_ICCV, author = {Krause, Felix and Phan, Timy and Gui, Ming and Baumann, Stefan Andreas and Hu, Vincent Tao and Ommer, Bj\"orn}, title = {TREAD: Token Routing for Efficient Architecture-agnostic Diffusion Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15703-15713} }
Controllable Weather Synthesis and Removal with Video Diffusion Models: Chih-Hao Lin,

Zian Wang,

Ruofan Liang,

Yuxuan Zhang,

Sanja Fidler,

Shenlong Wang,

Zan Gojcic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Chih-Hao and Wang, Zian and Liang, Ruofan and Zhang, Yuxuan and Fidler, Sanja and Wang, Shenlong and Gojcic, Zan}, title = {Controllable Weather Synthesis and Removal with Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13580-13591} }
Clink! Chop! Thud! - Learning Object Sounds from Real-World Interactions: Mengyu Yang,

Yiming Chen,

Haozheng Pei,

Siddhant Agarwal,

Arun Balajee Vasudevan,

James Hays; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Mengyu and Chen, Yiming and Pei, Haozheng and Agarwal, Siddhant and Vasudevan, Arun Balajee and Hays, James}, title = {Clink! Chop! Thud! - Learning Object Sounds from Real-World Interactions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14549-14558} }
FaceShield: Defending Facial Image against Deepfake Threats: Jaehwan Jeong,

Sumin In,

Sieun Kim,

Hannie Shin,

Jongheon Jeong,

Sang Ho Yoon,

Jaewook Chung,

Sangpil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_ICCV, author = {Jeong, Jaehwan and In, Sumin and Kim, Sieun and Shin, Hannie and Jeong, Jongheon and Yoon, Sang Ho and Chung, Jaewook and Kim, Sangpil}, title = {FaceShield: Defending Facial Image against Deepfake Threats}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10364-10374} }
SEAL: Semantic Aware Image Watermarking: Kasra Arabi,

R. Teal Witter,

Chinmay Hegde,

Niv Cohen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Arabi_2025_ICCV, author = {Arabi, Kasra and Witter, R. Teal and Hegde, Chinmay and Cohen, Niv}, title = {SEAL: Semantic Aware Image Watermarking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16196-16205} }
Forecasting Continuous Non-Conservative Dynamical Systems in SO(3): Lennart Bastian,

Mohammad Rashed,

Nassir Navab,

Tolga Birdal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bastian_2025_ICCV, author = {Bastian, Lennart and Rashed, Mohammad and Navab, Nassir and Birdal, Tolga}, title = {Forecasting Continuous Non-Conservative Dynamical Systems in SO(3)}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14845-14855} }
TextSSR: Diffusion-based Data Synthesis for Scene Text Recognition: Xingsong Ye,

Yongkun Du,

Yunbo Tao,

Zhineng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_ICCV, author = {Ye, Xingsong and Du, Yongkun and Tao, Yunbo and Chen, Zhineng}, title = {TextSSR: Diffusion-based Data Synthesis for Scene Text Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17464-17473} }
Collaborative Instance Object Navigation: Leveraging Uncertainty-Awareness to Minimize Human-Agent Dialogues: Francesco Taioli,

Edoardo Zorzi,

Gianni Franchi,

Alberto Castellini,

Alessandro Farinelli,

Marco Cristani,

Yiming Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Taioli_2025_ICCV, author = {Taioli, Francesco and Zorzi, Edoardo and Franchi, Gianni and Castellini, Alberto and Farinelli, Alessandro and Cristani, Marco and Wang, Yiming}, title = {Collaborative Instance Object Navigation: Leveraging Uncertainty-Awareness to Minimize Human-Agent Dialogues}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18781-18792} }
TITAN-Guide: Taming Inference-Time Alignment for Guided Text-to-Video Diffusion Models: Christian Simon,

Masato Ishii,

Akio Hayakawa,

Zhi Zhong,

Shusuke Takahashi,

Takashi Shibuya,

Yuki Mitsufuji; [pdf] [supp]
[bibtex]
@InProceedings{Simon_2025_ICCV, author = {Simon, Christian and Ishii, Masato and Hayakawa, Akio and Zhong, Zhi and Takahashi, Shusuke and Shibuya, Takashi and Mitsufuji, Yuki}, title = {TITAN-Guide: Taming Inference-Time Alignment for Guided Text-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16662-16671} }
ART: Adaptive Relation Tuning for Generalized Relation Prediction: Gopika Sudhakaran,

Hikaru Shindo,

Patrick Schramowski,

Simone Schaub-Meyer,

Kristian Kersting,

Stefan Roth; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sudhakaran_2025_ICCV, author = {Sudhakaran, Gopika and Shindo, Hikaru and Schramowski, Patrick and Schaub-Meyer, Simone and Kersting, Kristian and Roth, Stefan}, title = {ART: Adaptive Relation Tuning for Generalized Relation Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16323-16332} }
DimensionX: Create Any 3D and 4D Scenes from a Single Image with Decoupled Video Diffusion: Wenqiang Sun,

Shuo Chen,

Fangfu Liu,

Zilong Chen,

Yueqi Duan,

Jun Zhu,

Jun Zhang,

Yikai Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Wenqiang and Chen, Shuo and Liu, Fangfu and Chen, Zilong and Duan, Yueqi and Zhu, Jun and Zhang, Jun and Wang, Yikai}, title = {DimensionX: Create Any 3D and 4D Scenes from a Single Image with Decoupled Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13695-13706} }
SparseFlex: High-Resolution and Arbitrary-Topology 3D Shape Modeling: Xianglong He,

Zi-Xin Zou,

Chia-Hao Chen,

Yuan-Chen Guo,

Ding Liang,

Chun Yuan,

Wanli Ouyang,

Yan-Pei Cao,

Yangguang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Xianglong and Zou, Zi-Xin and Chen, Chia-Hao and Guo, Yuan-Chen and Liang, Ding and Yuan, Chun and Ouyang, Wanli and Cao, Yan-Pei and Li, Yangguang}, title = {SparseFlex: High-Resolution and Arbitrary-Topology 3D Shape Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14822-14833} }
LoRA.rar: Learning to Merge LoRAs via Hypernetworks for Subject-Style Conditioned Image Generation: Donald Shenaj,

Ondrej Bohdal,

Mete Ozay,

Pietro Zanuttigh,

Umberto Michieli; [pdf] [supp]
[bibtex]
@InProceedings{Shenaj_2025_ICCV, author = {Shenaj, Donald and Bohdal, Ondrej and Ozay, Mete and Zanuttigh, Pietro and Michieli, Umberto}, title = {LoRA.rar: Learning to Merge LoRAs via Hypernetworks for Subject-Style Conditioned Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16132-16142} }
Motion Synthesis with Sparse and Flexible Keyjoint Control: Inwoo Hwang,

Jinseok Bae,

Donggeun Lim,

Young Min Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hwang_2025_ICCV, author = {Hwang, Inwoo and Bae, Jinseok and Lim, Donggeun and Kim, Young Min}, title = {Motion Synthesis with Sparse and Flexible Keyjoint Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13203-13213} }
EVOLVE: Event-Guided Deformable Feature Transfer and Dual-Memory Refinement for Low-Light Video Object Segmentation: Jong-Hyeon Baek,

Jiwon Oh,

Yeong Jun Koh; [pdf] [supp]
[bibtex]
@InProceedings{Baek_2025_ICCV, author = {Baek, Jong-Hyeon and Oh, Jiwon and Koh, Yeong Jun}, title = {EVOLVE: Event-Guided Deformable Feature Transfer and Dual-Memory Refinement for Low-Light Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11273-11282} }
DiTaiListener: Controllable High Fidelity Listener Video Generation with Diffusion: Maksim Siniukov,

Di Chang,

Minh Tran,

Hongkun Gong,

Ashutosh Chaubey,

Mohammad Soleymani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Siniukov_2025_ICCV, author = {Siniukov, Maksim and Chang, Di and Tran, Minh and Gong, Hongkun and Chaubey, Ashutosh and Soleymani, Mohammad}, title = {DiTaiListener: Controllable High Fidelity Listener Video Generation with Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11991-12001} }
ISP2HRNet: Learning to Reconstruct High Resolution Image from Irregularly Sampled Pixels via Hierarchical Gradient Learning: Yuanlin Wang,

Ruiqin Xiong,

Rui Zhao,

Jin Wang,

Xiaopeng Fan,

Tiejun Huang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuanlin and Xiong, Ruiqin and Zhao, Rui and Wang, Jin and Fan, Xiaopeng and Huang, Tiejun}, title = {ISP2HRNet: Learning to Reconstruct High Resolution Image from Irregularly Sampled Pixels via Hierarchical Gradient Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11547-11557} }
Light-A-Video: Training-free Video Relighting via Progressive Light Fusion: Yujie Zhou,

Jiazi Bu,

Pengyang Ling,

Pan Zhang,

Tong Wu,

Qidong Huang,

Jinsong Li,

Xiaoyi Dong,

Yuhang Zang,

Yuhang Cao,

Anyi Rao,

Jiaqi Wang,

Li Niu; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Yujie and Bu, Jiazi and Ling, Pengyang and Zhang, Pan and Wu, Tong and Huang, Qidong and Li, Jinsong and Dong, Xiaoyi and Zang, Yuhang and Cao, Yuhang and Rao, Anyi and Wang, Jiaqi and Niu, Li}, title = {Light-A-Video: Training-free Video Relighting via Progressive Light Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13315-13325} }
Deterministic Object Pose Confidence Region Estimation: Jinghao Wang,

Zhang Li,

Zi Wang,

Banglei Guan,

Yang Shang,

Qifeng Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jinghao and Li, Zhang and Wang, Zi and Guan, Banglei and Shang, Yang and Yu, Qifeng}, title = {Deterministic Object Pose Confidence Region Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14866-14875} }
MedVSR: Medical Video Super-Resolution with Cross State-Space Propagation: Xinyu Liu,

Guolei Sun,

Cheng Wang,

Yixuan Yuan,

Ender Konukoglu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Xinyu and Sun, Guolei and Wang, Cheng and Yuan, Yixuan and Konukoglu, Ender}, title = {MedVSR: Medical Video Super-Resolution with Cross State-Space Propagation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11697-11707} }
Di[M]O: Distilling Masked Diffusion Models into One-step Generator: Yuanzhi Zhu,

Xi Wang,

Stéphane Lathuilière,

Vicky Kalogeiton; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yuanzhi and Wang, Xi and Lathuili\`ere, St\'ephane and Kalogeiton, Vicky}, title = {Di[M]O: Distilling Masked Diffusion Models into One-step Generator}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18606-18618} }
Q-Norm: Robust Representation Learning via Quality-Adaptive Normalization: Lanning Zhang,

Ying Zhou,

Fei Gao,

Ziyun Li,

Maoying Qiao,

Jinlan Xu,

Nannan Wang; [pdf]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Lanning and Zhou, Ying and Gao, Fei and Li, Ziyun and Qiao, Maoying and Xu, Jinlan and Wang, Nannan}, title = {Q-Norm: Robust Representation Learning via Quality-Adaptive Normalization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13901-13911} }
VoiceCraft-Dub: Automated Video Dubbing with Neural Codec Language Models: Kim Sung-Bin,

Jeongsoo Choi,

Puyuan Peng,

Joon Son Chung,

Tae-Hyun Oh,

David Harwath; [pdf] [supp]
[bibtex]
@InProceedings{Sung-Bin_2025_ICCV, author = {Sung-Bin, Kim and Choi, Jeongsoo and Peng, Puyuan and Chung, Joon Son and Oh, Tae-Hyun and Harwath, David}, title = {VoiceCraft-Dub: Automated Video Dubbing with Neural Codec Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14623-14632} }
SUV: Suppressing Undesired Video Content via Semantic Modulation Based on Text Embeddings: Xiang Lv,

Mingwen Shao,

Lingzhuang Meng,

Chang Liu,

Yecong Wan,

Xinyuan Chen; [pdf] [supp]
[bibtex]
@InProceedings{Lv_2025_ICCV, author = {Lv, Xiang and Shao, Mingwen and Meng, Lingzhuang and Liu, Chang and Wan, Yecong and Chen, Xinyuan}, title = {SUV: Suppressing Undesired Video Content via Semantic Modulation Based on Text Embeddings}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18357-18366} }
FonTS: Text Rendering With Typography and Style Controls: Wenda Shi,

Yiren Song,

Dengming Zhang,

Jiaming Liu,

Xingxing Zou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Wenda and Song, Yiren and Zhang, Dengming and Liu, Jiaming and Zou, Xingxing}, title = {FonTS: Text Rendering With Typography and Style Controls}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18463-18474} }
PRIMAL: Physically Reactive and Interactive Motor Model for Avatar Learning: Yan Zhang,

Yao Feng,

Alpár Cseke,

Nitin Saini,

Nathan Bajandas,

Nicolas Heron,

Michael J. Black; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yan and Feng, Yao and Cseke, Alp\'ar and Saini, Nitin and Bajandas, Nathan and Heron, Nicolas and Black, Michael J.}, title = {PRIMAL: Physically Reactive and Interactive Motor Model for Avatar Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12725-12736} }
HOMO-Feature: Cross-Arbitrary-Modal Image Matching with Homomorphism of Organized Major Orientation: Chenzhong Gao,

Wei Li,

Desheng Weng; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Chenzhong and Li, Wei and Weng, Desheng}, title = {HOMO-Feature: Cross-Arbitrary-Modal Image Matching with Homomorphism of Organized Major Orientation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10538-10548} }
AFUNet: Cross-Iterative Alignment-Fusion Synergy for HDR Reconstruction via Deep Unfolding Paradigm: Xinyue Li,

Zhangkai Ni,

Wenhan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xinyue and Ni, Zhangkai and Yang, Wenhan}, title = {AFUNet: Cross-Iterative Alignment-Fusion Synergy for HDR Reconstruction via Deep Unfolding Paradigm}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10666-10675} }
DiT4SR: Taming Diffusion Transformer for Real-World Image Super-Resolution: Zheng-Peng Duan,

Jiawei Zhang,

Xin Jin,

Ziheng Zhang,

Zheng Xiong,

Dongqing Zou,

Jimmy S. Ren,

Chunle Guo,

Chongyi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duan_2025_ICCV, author = {Duan, Zheng-Peng and Zhang, Jiawei and Jin, Xin and Zhang, Ziheng and Xiong, Zheng and Zou, Dongqing and Ren, Jimmy S. and Guo, Chunle and Li, Chongyi}, title = {DiT4SR: Taming Diffusion Transformer for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18948-18958} }
Video Individual Counting for Moving Drones: Yaowu Fan,

Jia Wan,

Tao Han,

Antoni B. Chan,

Andy J. Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Yaowu and Wan, Jia and Han, Tao and Chan, Antoni B. and Ma, Andy J.}, title = {Video Individual Counting for Moving Drones}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12284-12293} }
Bridging Class Imbalance and Partial Labeling via Spectral-Balanced Energy Propagation for Skeleton-based Action Recognition: Yandan Wang,

Chenqi Guo,

Yinglong Ma,

Jiangyan Chen,

Yuan Gao,

Weiming Dong; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yandan and Guo, Chenqi and Ma, Yinglong and Chen, Jiangyan and Gao, Yuan and Dong, Weiming}, title = {Bridging Class Imbalance and Partial Labeling via Spectral-Balanced Energy Propagation for Skeleton-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10162-10172} }
ARIG: Autoregressive Interactive Head Generation for Real-time Conversations: Ying Guo,

Xi Liu,

Cheng Zhen,

Pengfei Yan,

Xiaoming Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Ying and Liu, Xi and Zhen, Cheng and Yan, Pengfei and Wei, Xiaoming}, title = {ARIG: Autoregressive Interactive Head Generation for Real-time Conversations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12956-12965} }
Text-IRSTD: Leveraging Semantic Text to Promote Infrared Small Target Detection in Complex Scenes: Feng Huang,

Shuyuan Zheng,

Zhaobing Qiu,

Huanxian Liu,

Huanxin Bai,

Liqiong Chen; [pdf]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Feng and Zheng, Shuyuan and Qiu, Zhaobing and Liu, Huanxian and Bai, Huanxin and Chen, Liqiong}, title = {Text-IRSTD: Leveraging Semantic Text to Promote Infrared Small Target Detection in Complex Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10635-10644} }
Dual-Process Image Generation: Grace Luo,

Jonathan Granskog,

Aleksander Holynski,

Trevor Darrell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Grace and Granskog, Jonathan and Holynski, Aleksander and Darrell, Trevor}, title = {Dual-Process Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17972-17983} }
TrackVerse: A Large-Scale Object-Centric Video Dataset for Image-Level Representation Learning: Yibing Wei,

Samuel Church,

Victor Suciu,

Jinhong Lin,

Cheng-En Wu,

Pedro Morgado; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Yibing and Church, Samuel and Suciu, Victor and Lin, Jinhong and Wu, Cheng-En and Morgado, Pedro}, title = {TrackVerse: A Large-Scale Object-Centric Video Dataset for Image-Level Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11153-11163} }
HumanSAM: Classifying Human-centric Forgery Videos in Human Spatial, Appearance, and Motion Anomaly: Chang Liu,

Yunfan Ye,

Fan Zhang,

Qingyang Zhou,

Yuchuan Luo,

Zhiping Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Chang and Ye, Yunfan and Zhang, Fan and Zhou, Qingyang and Luo, Yuchuan and Cai, Zhiping}, title = {HumanSAM: Classifying Human-centric Forgery Videos in Human Spatial, Appearance, and Motion Anomaly}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14028-14038} }
An Information-Theoretic Regularizer for Lossy Neural Image Compression: Yingwen Zhang,

Meng Wang,

Xihua Sheng,

Peilin Chen,

Junru Li,

Li Zhang,

Shiqi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yingwen and Wang, Meng and Sheng, Xihua and Chen, Peilin and Li, Junru and Zhang, Li and Wang, Shiqi}, title = {An Information-Theoretic Regularizer for Lossy Neural Image Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15573-15582} }
DC-AE 1.5: Accelerating Diffusion Model Convergence with Structured Latent Space: Junyu Chen,

Dongyun Zou,

Wenkun He,

Junsong Chen,

Enze Xie,

Song Han,

Han Cai; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Junyu and Zou, Dongyun and He, Wenkun and Chen, Junsong and Xie, Enze and Han, Song and Cai, Han}, title = {DC-AE 1.5: Accelerating Diffusion Model Convergence with Structured Latent Space}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19628-19637} }
V2M4: 4D Mesh Animation Reconstruction from a Single Monocular Video: Jianqi Chen,

Biao Zhang,

Xiangjun Tang,

Peter Wonka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jianqi and Zhang, Biao and Tang, Xiangjun and Wonka, Peter}, title = {V2M4: 4D Mesh Animation Reconstruction from a Single Monocular Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11643-11653} }
MUSE: Multi-Subject Unified Synthesis via Explicit Layout Semantic Expansion: Fei Peng,

Junqiang Wu,

Yan Li,

Tingting Gao,

Di Zhang,

Huiyuan Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_ICCV, author = {Peng, Fei and Wu, Junqiang and Li, Yan and Gao, Tingting and Zhang, Di and Fu, Huiyuan}, title = {MUSE: Multi-Subject Unified Synthesis via Explicit Layout Semantic Expansion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15885-15895} }
ESSENTIAL: Episodic and Semantic Memory Integration for Video Class-Incremental Learning: Jongseo Lee,

Kyungho Bae,

Kyle Min,

Gyeong-Moon Park,

Jinwoo Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Jongseo and Bae, Kyungho and Min, Kyle and Park, Gyeong-Moon and Choi, Jinwoo}, title = {ESSENTIAL: Episodic and Semantic Memory Integration for Video Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17546-17556} }
Continual Personalization for Diffusion Models: Yu-Chien Liao,

Jr-Jen Chen,

Chi-Pin Huang,

Ci-Siang Lin,

Meng-Lin Wu,

Yu-Chiang Frank Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Yu-Chien and Chen, Jr-Jen and Huang, Chi-Pin and Lin, Ci-Siang and Wu, Meng-Lin and Wang, Yu-Chiang Frank}, title = {Continual Personalization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15511-15520} }
Beyond Perspective: Neural 360-Degree Video Compression: Andy Regensky,

Marc Windsheimer,

Fabian Brand,

Andre Kaup; [pdf] [supp]
[bibtex]
@InProceedings{Regensky_2025_ICCV, author = {Regensky, Andy and Windsheimer, Marc and Brand, Fabian and Kaup, Andre}, title = {Beyond Perspective: Neural 360-Degree Video Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16143-16153} }
An Inversion-based Measure of Memorization for Diffusion Models: Zhe Ma,

Qingming Li,

Xuhong Zhang,

Tianyu Du,

Ruixiao Lin,

Zonghui Wang,

Shouling Ji,

Wenzhi Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Zhe and Li, Qingming and Zhang, Xuhong and Du, Tianyu and Lin, Ruixiao and Wang, Zonghui and Ji, Shouling and Chen, Wenzhi}, title = {An Inversion-based Measure of Memorization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16959-16969} }
TimeBooth: Disentangled Facial Invariant Representation for Diverse and Personalized Face Aging: Zepeng Su,

Zhulin Liu,

Zongyan Zhang,

Tong Zhang,

C.L.Philip Chen; [pdf] [supp]
[bibtex]
@InProceedings{Su_2025_ICCV, author = {Su, Zepeng and Liu, Zhulin and Zhang, Zongyan and Zhang, Tong and Chen, C.L.Philip}, title = {TimeBooth: Disentangled Facial Invariant Representation for Diverse and Personalized Face Aging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12147-12157} }
Training-free and Adaptive Sparse Attention for Efficient Long Video Generation: Yifei Xia,

Suhan Ling,

Fangcheng Fu,

Yujie Wang,

Huixia Li,

Xuefeng Xiao,

Bin Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_ICCV, author = {Xia, Yifei and Ling, Suhan and Fu, Fangcheng and Wang, Yujie and Li, Huixia and Xiao, Xuefeng and Cui, Bin}, title = {Training-free and Adaptive Sparse Attention for Efficient Long Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15982-15993} }
Parametric Shadow Control for Portrait Generation in Text-to-Image Diffusion Models: Haoming Cai,

Tsung-Wei Huang,

Shiv Gehlot,

Brandon Y. Feng,

Sachin Shah,

Guan-Ming Su,

Christopher Metzler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2025_ICCV, author = {Cai, Haoming and Huang, Tsung-Wei and Gehlot, Shiv and Feng, Brandon Y. and Shah, Sachin and Su, Guan-Ming and Metzler, Christopher}, title = {Parametric Shadow Control for Portrait Generation in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18207-18217} }
Beyond Next-Token: Next-X Prediction for Autoregressive Visual Generation: Sucheng Ren,

Qihang Yu,

Ju He,

Xiaohui Shen,

Alan Yuille,

Liang-Chieh Chen; [pdf] [supp]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Sucheng and Yu, Qihang and He, Ju and Shen, Xiaohui and Yuille, Alan and Chen, Liang-Chieh}, title = {Beyond Next-Token: Next-X Prediction for Autoregressive Visual Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15781-15791} }
FiVE-Bench: A Fine-grained Video Editing Benchmark for Evaluating Emerging Diffusion and Rectified Flow Models: Minghan Li,

Chenxi Xie,

Yichen Wu,

Lei Zhang,

Mengyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Minghan and Xie, Chenxi and Wu, Yichen and Zhang, Lei and Wang, Mengyu}, title = {FiVE-Bench: A Fine-grained Video Editing Benchmark for Evaluating Emerging Diffusion and Rectified Flow Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16672-16681} }
Sparse Fine-Tuning of Transformers for Generative Tasks: Wei Chen,

Jingxi Yu,

Zichen Miao,

Qiang Qiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Wei and Yu, Jingxi and Miao, Zichen and Qiu, Qiang}, title = {Sparse Fine-Tuning of Transformers for Generative Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18703-18713} }
Scalable Dual Fingerprinting for Hierarchical Attribution of Text-to-Image Models: Jianwei Fei,

Yunshu Dai,

Peipeng Yu,

Zhe Kong,

Jiantao Zhou,

Zhihua Xia; [pdf]
[bibtex]
@InProceedings{Fei_2025_ICCV, author = {Fei, Jianwei and Dai, Yunshu and Yu, Peipeng and Kong, Zhe and Zhou, Jiantao and Xia, Zhihua}, title = {Scalable Dual Fingerprinting for Hierarchical Attribution of Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15025-15034} }
UniFuse: A Unified All-in-One Framework for Multi-Modal Medical Image Fusion Under Diverse Degradations and Misalignments: Dayong Su,

Yafei Zhang,

Huafeng Li,

Jinxing Li,

Yu Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_ICCV, author = {Su, Dayong and Zhang, Yafei and Li, Huafeng and Li, Jinxing and Liu, Yu}, title = {UniFuse: A Unified All-in-One Framework for Multi-Modal Medical Image Fusion Under Diverse Degradations and Misalignments}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14238-14247} }
MOSCATO: Predicting Multiple Object State Change Through Actions: Parnian Zameni,

Yuhan Shen,

Ehsan Elhamifar; [pdf] [supp]
[bibtex]
@InProceedings{Zameni_2025_ICCV, author = {Zameni, Parnian and Shen, Yuhan and Elhamifar, Ehsan}, title = {MOSCATO: Predicting Multiple Object State Change Through Actions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11600-11611} }
LoftUp: Learning a Coordinate-Based Feature Upsampler for Vision Foundation Models: Haiwen Huang,

Anpei Chen,

Volodymyr Havrylov,

Andreas Geiger,

Dan Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Haiwen and Chen, Anpei and Havrylov, Volodymyr and Geiger, Andreas and Zhang, Dan}, title = {LoftUp: Learning a Coordinate-Based Feature Upsampler for Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {9913-9923} }
Scalable Image Tokenization with Index Backpropagation Quantization: Fengyuan Shi,

Zhuoyan Luo,

Yixiao Ge,

Yujiu Yang,

Ying Shan,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Fengyuan and Luo, Zhuoyan and Ge, Yixiao and Yang, Yujiu and Shan, Ying and Wang, Limin}, title = {Scalable Image Tokenization with Index Backpropagation Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16037-16046} }
InsViE-1M: Effective Instruction-based Video Editing with Elaborate Dataset Construction: Yuhui Wu,

Liyi Chen,

Ruibin Li,

Shihao Wang,

Chenxi Xie,

Lei Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Yuhui and Chen, Liyi and Li, Ruibin and Wang, Shihao and Xie, Chenxi and Zhang, Lei}, title = {InsViE-1M: Effective Instruction-based Video Editing with Elaborate Dataset Construction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16692-16701} }
Morph: A Motion-free Physics Optimization Framework for Human Motion Generation: Zhuo Li,

Mingshuang Luo,

Ruibing Hou,

Xin Zhao,

Hao Liu,

Hong Chang,

Zimo Liu,

Chen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhuo and Luo, Mingshuang and Hou, Ruibing and Zhao, Xin and Liu, Hao and Chang, Hong and Liu, Zimo and Li, Chen}, title = {Morph: A Motion-free Physics Optimization Framework for Human Motion Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14580-14589} }
Precise Action-to-Video Generation Through Visual Action Prompts: Yuang Wang,

Chao Wen,

Haoyu Guo,

Sida Peng,

Minghan Qin,

Hujun Bao,

Xiaowei Zhou,

Ruizhen Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuang and Wen, Chao and Guo, Haoyu and Peng, Sida and Qin, Minghan and Bao, Hujun and Zhou, Xiaowei and Hu, Ruizhen}, title = {Precise Action-to-Video Generation Through Visual Action Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12713-12724} }
FramePainter: Endowing Interactive Image Editing with Video Diffusion Priors: Yabo Zhang,

Xinpeng Zhou,

Yihan Zeng,

Hang Xu,

Hui Li,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yabo and Zhou, Xinpeng and Zeng, Yihan and Xu, Hang and Li, Hui and Zuo, Wangmeng}, title = {FramePainter: Endowing Interactive Image Editing with Video Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18121-18131} }
LMM4LMM: Benchmarking and Evaluating Large-multimodal Image Generation with LMMs: Jiarui Wang,

Huiyu Duan,

Yu Zhao,

Juntong Wang,

Guangtao Zhai,

Xiongkuo Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jiarui and Duan, Huiyu and Zhao, Yu and Wang, Juntong and Zhai, Guangtao and Min, Xiongkuo}, title = {LMM4LMM: Benchmarking and Evaluating Large-multimodal Image Generation with LMMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17312-17323} }
GaussianSpeech: Audio-Driven Personalized 3D Gaussian Avatars: Shivangi Aneja,

Artem Sevastopolsky,

Tobias Kirschstein,

Justus Thies,

Angela Dai,

Matthias Nießner; [pdf] [supp]
[bibtex]
@InProceedings{Aneja_2025_ICCV, author = {Aneja, Shivangi and Sevastopolsky, Artem and Kirschstein, Tobias and Thies, Justus and Dai, Angela and Nie{\ss}ner, Matthias}, title = {GaussianSpeech: Audio-Driven Personalized 3D Gaussian Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13065-13075} }
Reusing Computation in Text-to-Image Diffusion for Efficient Generation of Image Sets: Dale Decatur,

Thibault Groueix,

Wang Yifan,

Rana Hanocka,

Vladimir Kim,

Matheus Gadelha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Decatur_2025_ICCV, author = {Decatur, Dale and Groueix, Thibault and Yifan, Wang and Hanocka, Rana and Kim, Vladimir and Gadelha, Matheus}, title = {Reusing Computation in Text-to-Image Diffusion for Efficient Generation of Image Sets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16482-16491} }
Tree-NeRV: Efficient Non-Uniform Sampling for Neural Video Representation via Tree-Structured Feature Grids: Jiancheng Zhao,

Yifan Zhan,

Qingtian Zhu,

Mingze Ma,

Muyao Niu,

Zunian Wan,

Xiang Ji,

Yinqiang Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Jiancheng and Zhan, Yifan and Zhu, Qingtian and Ma, Mingze and Niu, Muyao and Wan, Zunian and Ji, Xiang and Zheng, Yinqiang}, title = {Tree-NeRV: Efficient Non-Uniform Sampling for Neural Video Representation via Tree-Structured Feature Grids}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15076-15085} }
Scalable Ranked Preference Optimization for Text-to-Image Generation: Shyamgopal Karthik,

Huseyin Coskun,

Zeynep Akata,

Sergey Tulyakov,

Jian Ren,

Anil Kag; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karthik_2025_ICCV, author = {Karthik, Shyamgopal and Coskun, Huseyin and Akata, Zeynep and Tulyakov, Sergey and Ren, Jian and Kag, Anil}, title = {Scalable Ranked Preference Optimization for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18399-18410} }
QR-LoRA: Efficient and Disentangled Fine-tuning via QR Decomposition for Customized Generation: Jiahui Yang,

Yongjia Ma,

Donglin Di,

Jianxun Cui,

Hao Li,

Wei Chen,

Yan Xie,

Xun Yang,

Wangmeng Zuo; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Jiahui and Ma, Yongjia and Di, Donglin and Cui, Jianxun and Li, Hao and Chen, Wei and Xie, Yan and Yang, Xun and Zuo, Wangmeng}, title = {QR-LoRA: Efficient and Disentangled Fine-tuning via QR Decomposition for Customized Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17587-17597} }
GestureLSM: Latent Shortcut based Co-Speech Gesture Generation with Spatial-Temporal Modeling: Pinxin Liu,

Luchuan Song,

Junhua Huang,

Haiyang Liu,

Chenliang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Pinxin and Song, Luchuan and Huang, Junhua and Liu, Haiyang and Xu, Chenliang}, title = {GestureLSM: Latent Shortcut based Co-Speech Gesture Generation with Spatial-Temporal Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10929-10939} }
WAVE: Warp-Based View Guidance for Consistent Novel View Synthesis Using a Single Image: Jiwoo Park,

Tae Eun Choi,

Youngjun Jun,

Seong Jae Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Jiwoo and Choi, Tae Eun and Jun, Youngjun and Hwang, Seong Jae}, title = {WAVE: Warp-Based View Guidance for Consistent Novel View Synthesis Using a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11906-11915} }
Blended Point Cloud Diffusion for Localized Text-guided Shape Editing: Etai Sella,

Noam Atia,

Ron Mokady,

Hadar Averbuch-Elor; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sella_2025_ICCV, author = {Sella, Etai and Atia, Noam and Mokady, Ron and Averbuch-Elor, Hadar}, title = {Blended Point Cloud Diffusion for Localized Text-guided Shape Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19119-19129} }
AnimateAnyMesh: A Feed-Forward 4D Foundation Model for Text-Driven Universal Mesh Animation: Zijie Wu,

Chaohui Yu,

Fan Wang,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Zijie and Yu, Chaohui and Wang, Fan and Bai, Xiang}, title = {AnimateAnyMesh: A Feed-Forward 4D Foundation Model for Text-Driven Universal Mesh Animation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13557-13568} }
Frequency-Aware Autoregressive Modeling for Efficient High-Resolution Image Synthesis: Zhuokun Chen,

Jugang Fan,

Zhuowei Yu,

Bohan Zhuang,

Mingkui Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Zhuokun and Fan, Jugang and Yu, Zhuowei and Zhuang, Bohan and Tan, Mingkui}, title = {Frequency-Aware Autoregressive Modeling for Efficient High-Resolution Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {17140-17149} }
Efficient Track Anything: Yunyang Xiong,

Chong Zhou,

Xiaoyu Xiang,

Lemeng Wu,

Chenchen Zhu,

Zechun Liu,

Saksham Suri,

Balakrishnan Varadarajan,

Ramya Akula,

Forrest Iandola,

Raghuraman Krishnamoorthi,

Bilge Soran,

Vikas Chandra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2025_ICCV, author = {Xiong, Yunyang and Zhou, Chong and Xiang, Xiaoyu and Wu, Lemeng and Zhu, Chenchen and Liu, Zechun and Suri, Saksham and Varadarajan, Balakrishnan and Akula, Ramya and Iandola, Forrest and Krishnamoorthi, Raghuraman and Soran, Bilge and Chandra, Vikas}, title = {Efficient Track Anything}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11513-11524} }
CameraCtrl II: Dynamic Scene Exploration via Camera-controlled Video Diffusion Models: Hao He,

Ceyuan Yang,

Shanchuan Lin,

Yinghao Xu,

Meng Wei,

Liangke Gui,

Qi Zhao,

Gordon Wetzstein,

Lu Jiang,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Hao and Yang, Ceyuan and Lin, Shanchuan and Xu, Yinghao and Wei, Meng and Gui, Liangke and Zhao, Qi and Wetzstein, Gordon and Jiang, Lu and Li, Hongsheng}, title = {CameraCtrl II: Dynamic Scene Exploration via Camera-controlled Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13416-13426} }
PLADIS: Pushing the Limits of Attention in Diffusion Models at Inference Time by Leveraging Sparsity: Kwanyoung Kim,

Byeongsu Sim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Kwanyoung and Sim, Byeongsu}, title = {PLADIS: Pushing the Limits of Attention in Diffusion Models at Inference Time by Leveraging Sparsity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {16238-16248} }
Robust Test-Time Adaptation for Single Image Denoising Using Deep Gaussian Prior: Qing Ma,

Pengwei Liang,

Xiong Zhou,

Jiayi Ma,

Junjun Jiang,

Zhe Peng; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Qing and Liang, Pengwei and Zhou, Xiong and Ma, Jiayi and Jiang, Junjun and Peng, Zhe}, title = {Robust Test-Time Adaptation for Single Image Denoising Using Deep Gaussian Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11230-11240} }
Scene Graph Guided Generation: Enable Accurate Relations Generation in Text-to-Image Models via Textural Rectification: Guibao Shen,

Luozhou Wang,

Jiantao Lin,

Wenhang Ge,

Chaozhe Zhang,

Xin Tao,

Di Zhang,

Pengfei Wan,

Guangyong Chen,

Yijun Li,

Ying-cong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_ICCV, author = {Shen, Guibao and Wang, Luozhou and Lin, Jiantao and Ge, Wenhang and Zhang, Chaozhe and Tao, Xin and Zhang, Di and Wan, Pengfei and Chen, Guangyong and Li, Yijun and Chen, Ying-cong}, title = {Scene Graph Guided Generation: Enable Accurate Relations Generation in Text-to-Image Models via Textural Rectification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15437-15446} }
Disentangled Clothed Avatar Generation with Layered Representation: Weitian Zhang,

Yichao Yan,

Sijing Wu,

Manwen Liao,

Xiaokang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Weitian and Yan, Yichao and Wu, Sijing and Liao, Manwen and Yang, Xiaokang}, title = {Disentangled Clothed Avatar Generation with Layered Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11327-11338} }
Teleportraits: Training-Free People Insertion into Any Scene: Jialu Gao,

K J Joseph,

Fernando De La Torre; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Jialu and Joseph, K J and De La Torre, Fernando}, title = {Teleportraits: Training-Free People Insertion into Any Scene}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18866-18875} }
Towards Explicit Exoskeleton for the Reconstruction of Complicated 3D Human Avatars: Yifan Zhan,

Qingtian Zhu,

Muyao Niu,

Mingze Ma,

Jiancheng Zhao,

Zhihang Zhong,

Xiao Sun,

Yu Qiao,

Yinqiang Zheng; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhan_2025_ICCV, author = {Zhan, Yifan and Zhu, Qingtian and Niu, Muyao and Ma, Mingze and Zhao, Jiancheng and Zhong, Zhihang and Sun, Xiao and Qiao, Yu and Zheng, Yinqiang}, title = {Towards Explicit Exoskeleton for the Reconstruction of Complicated 3D Human Avatars}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14259-14269} }
Capturing head avatar with hand contacts from a monocular video: Haonan He,

Yufeng Zheng,

Jie Song; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Haonan and Zheng, Yufeng and Song, Jie}, title = {Capturing head avatar with hand contacts from a monocular video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13099-13108} }
Event-guided HDR Reconstruction with Diffusion Priors: Yixin Yang,

Jiawei Zhang,

Yang Zhang,

Yunxuan Wei,

Dongqing Zou,

Jimmy S. Ren,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Yixin and Zhang, Jiawei and Zhang, Yang and Wei, Yunxuan and Zou, Dongqing and Ren, Jimmy S. and Shi, Boxin}, title = {Event-guided HDR Reconstruction with Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11787-11796} }
MoMaps: Semantics-Aware Scene Motion Generation with Motion Maps: Jiahui Lei,

Kyle Genova,

George Kopanas,

Noah Snavely,

Leonidas Guibas; [pdf] [arXiv]
[bibtex]
@InProceedings{Lei_2025_ICCV, author = {Lei, Jiahui and Genova, Kyle and Kopanas, George and Snavely, Noah and Guibas, Leonidas}, title = {MoMaps: Semantics-Aware Scene Motion Generation with Motion Maps}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10022-10031} }
WarpHE4D: Dense 4D Head Map toward Full Head Reconstruction: Jongseob Yun,

Yong-Hoon Kwon,

Min-Gyu Park,

Ju-Mi Kang,

Min-Ho Lee,

Inho Chang,

Ju Hong Yoon,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Yun_2025_ICCV, author = {Yun, Jongseob and Kwon, Yong-Hoon and Park, Min-Gyu and Kang, Ju-Mi and Lee, Min-Ho and Chang, Inho and Yoon, Ju Hong and Yoon, Kuk-Jin}, title = {WarpHE4D: Dense 4D Head Map toward Full Head Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {11480-11490} }
Perceive, Understand and Restore: Real-World Image Super-Resolution with Autoregressive Multimodal Generative Models: Hongyang Wei,

Shuaizheng Liu,

Chun Yuan,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Hongyang and Liu, Shuaizheng and Yuan, Chun and Zhang, Lei}, title = {Perceive, Understand and Restore: Real-World Image Super-Resolution with Autoregressive Multimodal Generative Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18640-18650} }
TRKT: Weakly Supervised Dynamic Scene Graph Generation with Temporal-enhanced Relation-aware Knowledge Transferring: Zhu Xu,

Ting Lei,

Zhimin Li,

Guan Wang,

Qingchao Chen,

Yuxin Peng,

Yang Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Zhu and Lei, Ting and Li, Zhimin and Wang, Guan and Chen, Qingchao and Peng, Yuxin and Liu, Yang}, title = {TRKT: Weakly Supervised Dynamic Scene Graph Generation with Temporal-enhanced Relation-aware Knowledge Transferring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15812-15821} }
TARO: Timestep-Adaptive Representation Alignment with Onset-Aware Conditioning for Synchronized Video-to-Audio Synthesis: Tri Ton,

Ji Woo Hong,

Chang D. Yoo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ton_2025_ICCV, author = {Ton, Tri and Hong, Ji Woo and Yoo, Chang D.}, title = {TARO: Timestep-Adaptive Representation Alignment with Onset-Aware Conditioning for Synchronized Video-to-Audio Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {14228-14237} }
ModSkill: Physical Character Skill Modularization: Yiming Huang,

Zhiyang Dou,

Lingjie Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Yiming and Dou, Zhiyang and Liu, Lingjie}, title = {ModSkill: Physical Character Skill Modularization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12394-12404} }
CAFA: a Controllable Automatic Foley Artist: Roi Benita,

Michael Finkelson,

Tavi Halperin,

Gleb Sterkin,

Yossi Adi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Benita_2025_ICCV, author = {Benita, Roi and Finkelson, Michael and Halperin, Tavi and Sterkin, Gleb and Adi, Yossi}, title = {CAFA: a Controllable Automatic Foley Artist}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15917-15926} }
TF-TI2I: Training-Free Text-and-Image-to-Image Generation via Multi-Modal Implicit-Context Learning In Text-to-Image Models: Teng-Fang Hsiao,

Bo-Kai Ruan,

Yi-Lun Wu,

Tzu-Ling Lin,

Hong-Han Shuai; [pdf] [supp]
[bibtex]
@InProceedings{Hsiao_2025_ICCV, author = {Hsiao, Teng-Fang and Ruan, Bo-Kai and Wu, Yi-Lun and Lin, Tzu-Ling and Shuai, Hong-Han}, title = {TF-TI2I: Training-Free Text-and-Image-to-Image Generation via Multi-Modal Implicit-Context Learning In Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {18377-18387} }
CARP: Visuomotor Policy Learning via Coarse-to-Fine Autoregressive Prediction: Zhefei Gong,

Pengxiang Ding,

Shangke Lyu,

Siteng Huang,

Mingyang Sun,

Wei Zhao,

Zhaoxin Fan,

Donglin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2025_ICCV, author = {Gong, Zhefei and Ding, Pengxiang and Lyu, Shangke and Huang, Siteng and Sun, Mingyang and Zhao, Wei and Fan, Zhaoxin and Wang, Donglin}, title = {CARP: Visuomotor Policy Learning via Coarse-to-Fine Autoregressive Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13460-13470} }
MagicMotion: Controllable Video Generation with Dense-to-Sparse Trajectory Guidance: Quanhao Li,

Zhen Xing,

Rui Wang,

Hui Zhang,

Qi Dai,

Zuxuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Quanhao and Xing, Zhen and Wang, Rui and Zhang, Hui and Dai, Qi and Wu, Zuxuan}, title = {MagicMotion: Controllable Video Generation with Dense-to-Sparse Trajectory Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12112-12123} }
Less Static, More Private: Towards Transferable Privacy-Preserving Action Recognition by Generative Decoupled Learning: Zhi-Wei Xia,

Kun-Yu Lin,

Yuan-Ming Li,

Wei-Jin Huang,

Xian-Tuo Tan,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Xia_2025_ICCV, author = {Xia, Zhi-Wei and Lin, Kun-Yu and Li, Yuan-Ming and Huang, Wei-Jin and Tan, Xian-Tuo and Zheng, Wei-Shi}, title = {Less Static, More Private: Towards Transferable Privacy-Preserving Action Recognition by Generative Decoupled Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12894-12903} }
VISION-XL: High Definition Video Inverse Problem Solver using Latent Image Diffusion Models: Taesung Kwon,

Jong Chul Ye; [pdf] [supp]
[bibtex]
@InProceedings{Kwon_2025_ICCV, author = {Kwon, Taesung and Ye, Jong Chul}, title = {VISION-XL: High Definition Video Inverse Problem Solver using Latent Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10465-10474} }
Mixture of Experts Guided by Gaussian Splatters Matters: A new Approach to Weakly-Supervised Video Anomaly Detection: Giacomo D' Amicantonio,

Snehashis Majhi,

Quan Kong,

Lorenzo Garattoni,

Gianpiero Francesca,

Francois Bremond,

Egor Bondarev; [pdf] [supp]
[bibtex]
@InProceedings{Amicantonio_2025_ICCV, author = {Amicantonio, Giacomo D' and Majhi, Snehashis and Kong, Quan and Garattoni, Lorenzo and Francesca, Gianpiero and Bremond, Francois and Bondarev, Egor}, title = {Mixture of Experts Guided by Gaussian Splatters Matters: A new Approach to Weakly-Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10275-10285} }
Att-Adapter: A Robust and Precise Domain-Specific Multi-Attributes T2I Diffusion Adapter via Conditional Variational Autoencoder: Wonwoong Cho,

Yan-Ying Chen,

Matthew Klenk,

David I. Inouye,

Yanxia Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2025_ICCV, author = {Cho, Wonwoong and Chen, Yan-Ying and Klenk, Matthew and Inouye, David I. and Zhang, Yanxia}, title = {Att-Adapter: A Robust and Precise Domain-Specific Multi-Attributes T2I Diffusion Adapter via Conditional Variational Autoencoder}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {15626-15635} }
PERSONA: Personalized Whole-Body 3D Avatar with Pose-Driven Deformations from a Single Image: Geonhee Sim,

Gyeongsik Moon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sim_2025_ICCV, author = {Sim, Geonhee and Moon, Gyeongsik}, title = {PERSONA: Personalized Whole-Body 3D Avatar with Pose-Driven Deformations from a Single Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {12670-12680} }
Blind Video Super-Resolution based on Implicit Kernels: Qiang Zhu,

Yuxuan Jiang,

Shuyuan Zhu,

Fan Zhang,

David Bull,

Bing Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Qiang and Jiang, Yuxuan and Zhu, Shuyuan and Zhang, Fan and Bull, David and Zeng, Bing}, title = {Blind Video Super-Resolution based on Implicit Kernels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10971-10981} }
AMDANet: Attention-Driven Multi-Perspective Discrepancy Alignment for RGB-Infrared Image Fusion and Segmentation: Haifeng Zhong,

Fan Tang,

Zhuo Chen,

Hyung Jin Chang,

Yixing Gao; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Haifeng and Tang, Fan and Chen, Zhuo and Chang, Hyung Jin and Gao, Yixing}, title = {AMDANet: Attention-Driven Multi-Perspective Discrepancy Alignment for RGB-Infrared Image Fusion and Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10645-10655} }
Local Scale Equivariance with Latent Deep Equilibrium Canonicalizer: Md Ashiqur Rahman,

Chiao-An Yang,

Michael N. Cheng,

Lim Jun Hao,

Jeremiah Jiang,

Teck-Yian Lim,

Raymond A. Yeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rahman_2025_ICCV, author = {Rahman, Md Ashiqur and Yang, Chiao-An and Cheng, Michael N. and Hao, Lim Jun and Jiang, Jeremiah and Lim, Teck-Yian and Yeh, Raymond A.}, title = {Local Scale Equivariance with Latent Deep Equilibrium Canonicalizer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10527-10537} }
TACO: Taming Diffusion for in-the-wild Video Amodal Completion: Ruijie Lu,

Yixin Chen,

Yu Liu,

Jiaxiang Tang,

Junfeng Ni,

Diwen Wan,

Gang Zeng,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Ruijie and Chen, Yixin and Liu, Yu and Tang, Jiaxiang and Ni, Junfeng and Wan, Diwen and Zeng, Gang and Huang, Siyuan}, title = {TACO: Taming Diffusion for in-the-wild Video Amodal Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13638-13650} }
Scoring, Remember, and Reference: Catching Camouflaged Objects in Videos: Yu'ang Feng,

Shuyong Gao,

Fuzhen Yan,

Yicheng Song,

Lingyi Hong,

Junjie Hu,

Wenqiang Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Yu'ang and Gao, Shuyong and Yan, Fuzhen and Song, Yicheng and Hong, Lingyi and Hu, Junjie and Zhang, Wenqiang}, title = {Scoring, Remember, and Reference: Catching Camouflaged Objects in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {13043-13052} }
Task-Oriented Human Grasp Synthesis via Context- and Task-Aware Diffusers: An-Lun Liu,

Yu-Wei Chao,

Yi-Ting Chen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, An-Lun and Chao, Yu-Wei and Chen, Yi-Ting}, title = {Task-Oriented Human Grasp Synthesis via Context- and Task-Aware Diffusers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {10375-10385} }; Back