Human-Interactive Generation and Editing


MFT-VITON: High-Fidelity Virtual Try-On with Minimal Input via a Mask-Free Transformer-Diffusion Model
Zhenchen Wan,
Yanwu Xu,
Dongting Hu,
Weilun Cheng,
Tianxi Chen,
Zhaoqing Wang,
Feng Liu,
Tongliang Liu,
Mingming Gong
[pdf]
[bibtex]
@InProceedings{Wan_2025_ICCV, author = {Wan, Zhenchen and Xu, Yanwu and Hu, Dongting and Cheng, Weilun and Chen, Tianxi and Wang, Zhaoqing and Liu, Feng and Liu, Tongliang and Gong, Mingming}, title = {MFT-VITON: High-Fidelity Virtual Try-On with Minimal Input via a Mask-Free Transformer-Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {1985-1994} }

Beyond Flat Text: Dual Self-inherited Guidance for Visual Text Generation
Minxing Luo,
Zixun Xia,
Liaojun Chen,
Zhenhang Li,
Weichao Zeng,
Jianye Wang,
Wentao Cheng,
Yaxing Wang,
Yu Zhou,
Jian Yang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Minxing and Xia, Zixun and Chen, Liaojun and Li, Zhenhang and Zeng, Weichao and Wang, Jianye and Cheng, Wentao and Wang, Yaxing and Zhou, Yu and Yang, Jian}, title = {Beyond Flat Text: Dual Self-inherited Guidance for Visual Text Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {1916-1925} }

GenEscape: Hierarchical Multi-Agent Generation of Escape Room Puzzles
Mengyi Shan,
Brian Curless,
Ira Kemelmacher-Shlizerman,
Steve Seitz
[pdf] [arXiv]
[bibtex]
@InProceedings{Shan_2025_ICCV, author = {Shan, Mengyi and Curless, Brian and Kemelmacher-Shlizerman, Ira and Seitz, Steve}, title = {GenEscape: Hierarchical Multi-Agent Generation of Escape Room Puzzles}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {2005-2013} }

DreamHOI: Subject-Driven Generation of 3D Human-Object Interactions with Diffusion Priors
Hanwen Zhu,
Ruining Li,
Tomas Jakab
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Hanwen and Li, Ruining and Jakab, Tomas}, title = {DreamHOI: Subject-Driven Generation of 3D Human-Object Interactions with Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {1895-1905} }

Axes-and-Tags: LLM-Driven Design Galleries for Generative Content
Asanshay Gupta,
Vishnu Sarukkai,
Kayvon Fatahalian
[pdf] [supp]
[bibtex]
@InProceedings{Gupta_2025_ICCV, author = {Gupta, Asanshay and Sarukkai, Vishnu and Fatahalian, Kayvon}, title = {Axes-and-Tags: LLM-Driven Design Galleries for Generative Content}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {1975-1984} }

Controllable Pedestrian Video Editing for Multi-View Driving Scenarios via Motion Sequence
Danzhen Fu,
Jiagao Hu,
Daiguo Zhou,
Fei Wang,
Zepeng Wang,
Wenhua Liao
[pdf] [arXiv]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Danzhen and Hu, Jiagao and Zhou, Daiguo and Wang, Fei and Wang, Zepeng and Liao, Wenhua}, title = {Controllable Pedestrian Video Editing for Multi-View Driving Scenarios via Motion Sequence}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {1840-1849} }

Concat-ID: Towards Universal Identity-Preserving Video Synthesis
Yong Zhong,
Zhuoyi Yang,
Jiayan Teng,
Xiaotao Gu,
Chongxuan Li
[pdf] [supp]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Yong and Yang, Zhuoyi and Teng, Jiayan and Gu, Xiaotao and Li, Chongxuan}, title = {Concat-ID: Towards Universal Identity-Preserving Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {1906-1915} }

Enhancing Identity Preservation in Portrait Generation via Reward Optimization
Yang Liu,
Hongyu Zang,
Chao Xu,
Baigui Sun,
Shan Luo
[pdf]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yang and Zang, Hongyu and Xu, Chao and Sun, Baigui and Luo, Shan}, title = {Enhancing Identity Preservation in Portrait Generation via Reward Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {1995-2004} }

DepthDance: Complex-pose Human Image Animation with Appearance-agnostic Depth Guidance
Yingjie Xi,
Zhengze Xu,
Zhao Wang,
Xiaosong Yang,
Jinsong Lan,
Jian Jun Zhang,
Mengting Chen
[pdf]
[bibtex]
@InProceedings{Xi_2025_ICCV, author = {Xi, Yingjie and Xu, Zhengze and Wang, Zhao and Yang, Xiaosong and Lan, Jinsong and Zhang, Jian Jun and Chen, Mengting}, title = {DepthDance: Complex-pose Human Image Animation with Appearance-agnostic Depth Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {1926-1936} }

Sketch-to-Layout: Sketch-Guided Multimodal Layout Generation
Riccardo Brioschi,
Aleksandr Alekseev,
Emanuele Nevali,
Berkay Döner,
Omar El Malki,
Blagoj Mitrevski,
Leandro Kieliger,
Mark Collier,
Andrii Maksai,
Jesse Berent,
Claudiu Cristian Musat,
Efi Kokiopoulou
[pdf] [supp]
[bibtex]
@InProceedings{Brioschi_2025_ICCV, author = {Brioschi, Riccardo and Alekseev, Aleksandr and Nevali, Emanuele and D\"oner, Berkay and El Malki, Omar and Mitrevski, Blagoj and Kieliger, Leandro and Collier, Mark and Maksai, Andrii and Berent, Jesse and Musat, Claudiu Cristian and Kokiopoulou, Efi}, title = {Sketch-to-Layout: Sketch-Guided Multimodal Layout Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {1872-1884} }

StyleBooth: Image Style Editing with Multimodal Instruction
Zhen Han,
Chaojie Mao,
Zeyinzi Jiang,
Yulin Pan,
Jingfeng Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Zhen and Mao, Chaojie and Jiang, Zeyinzi and Pan, Yulin and Zhang, Jingfeng}, title = {StyleBooth: Image Style Editing with Multimodal Instruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {1947-1957} }

UniPaint: Unified Space-time Video Inpainting via Mixture-of-Experts
Zhen Wan,
Chenyang Qi,
Zhiheng Liu,
Tao Gui,
Yue Ma
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wan_2025_ICCV, author = {Wan, Zhen and Qi, Chenyang and Liu, Zhiheng and Gui, Tao and Ma, Yue}, title = {UniPaint: Unified Space-time Video Inpainting via Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {1861-1871} }

Model as a Game: On Numerical and Spatial Consistency for Generative Games
Jingye Chen,
Yuzhong Zhao,
Yupan Huang,
Lei Cui,
Li Dong,
Tengchao Lv,
Qifeng Chen,
Furu Wei
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jingye and Zhao, Yuzhong and Huang, Yupan and Cui, Lei and Dong, Li and Lv, Tengchao and Chen, Qifeng and Wei, Furu}, title = {Model as a Game: On Numerical and Spatial Consistency for Generative Games}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {1937-1946} }

2D Instance Editing in 3D Space
Yuhuan Xie,
Aoxuan Pan,
Mingxian Lin,
Wei Huang,
Yi-Hua Huang,
Xiaojuan Qi
[pdf] [arXiv]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Yuhuan and Pan, Aoxuan and Lin, Mingxian and Huang, Wei and Huang, Yi-Hua and Qi, Xiaojuan}, title = {2D Instance Editing in 3D Space}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {1967-1974} }

SEED-Story: Multimodal Long Story Generation with Large Language Model
Shuai Yang,
Yuying Ge,
Yang Li,
Yukang Chen,
Yixiao Ge,
Ying Shan,
Ying-Cong Chen
[pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Shuai and Ge, Yuying and Li, Yang and Chen, Yukang and Ge, Yixiao and Shan, Ying and Chen, Ying-Cong}, title = {SEED-Story: Multimodal Long Story Generation with Large Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {1850-1860} }

Null text-guided interactive image editing for diffusion models
Jing Wang,
Hao Luo
[pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jing and Luo, Hao}, title = {Null text-guided interactive image editing for diffusion models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {1885-1894} }

ACE++: Instruction-Based Image Creation and Editing via Context-Aware Content Filling
Chaojie Mao,
Jingfeng Zhang,
Yulin Pan,
Zeyinzi Jiang,
Zhen Han,
Yu Liu,
Jingren Zhou
[pdf] [supp]
[bibtex]
@InProceedings{Mao_2025_ICCV, author = {Mao, Chaojie and Zhang, Jingfeng and Pan, Yulin and Jiang, Zeyinzi and Han, Zhen and Liu, Yu and Zhou, Jingren}, title = {ACE++: Instruction-Based Image Creation and Editing via Context-Aware Content Filling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {1958-1966} }