Multimodal Reasoning and Slow Thinking in Large Model Era: Towards System 2 and Beyond


Multimodal Dual-domain Learning for Image Fusion
Heng Wang,
Mingxin Jin,
Cong Wang,
Yuan Yuan
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Heng and Jin, Mingxin and Wang, Cong and Yuan, Yuan}, title = {Multimodal Dual-domain Learning for Image Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6486-6495} }

Multimodal Monocular Geometric Clues-Informed Multi-View Stereo Network
Wanjuan Su,
Shili Xiong
[pdf]
[bibtex]
@InProceedings{Su_2025_ICCV, author = {Su, Wanjuan and Xiong, Shili}, title = {Multimodal Monocular Geometric Clues-Informed Multi-View Stereo Network}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6496-6505} }

How Well Do Vision-Language Models Understand Cities? A Comparative Study on Spatial Reasoning from Street-View Images
Juneyoung Ro,
Namwoo Kim,
Yoonjin Yoon
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ro_2025_ICCV, author = {Ro, Juneyoung and Kim, Namwoo and Yoon, Yoonjin}, title = {How Well Do Vision-Language Models Understand Cities? A Comparative Study on Spatial Reasoning from Street-View Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6476-6485} }

Test-time Prompt Refinement for Text-to-Image Models
Mohammed Abdul Hafeez Khan,
Yash Jain,
Siddhartha Bhattacharyya,
Vibhav Vineet
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khan_2025_ICCV, author = {Khan, Mohammed Abdul Hafeez and Jain, Yash and Bhattacharyya, Siddhartha and Vineet, Vibhav}, title = {Test-time Prompt Refinement for Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6506-6516} }

MARS2 2025 Challenge on Multimodal Reasoning: Datasets, Methods, Results, Discussion, and Outlook
Peng Xu,
Shengwu Xiong,
Jiajun Zhang,
Yaxiong Chen,
Bowen Zhou,
Chen Change Loy,
David Clifton,
Kyoung Mu Lee,
Luc Van Gool,
Ruiming He,
Ruilin Yao,
Xinwei Long,
Jirui Huang,
Kai Tian,
Sa Yang,
Yihua Shao,
Jin Feng,
Yue Zhong,
Jiakai Zhou,
Cheng Tang,
Tianyu Zou,
Yifang Zhang,
Junming Liang,
Guoyou Li,
Zhaoxiang Wang,
Qiang Zhou,
Yichen Zhao,
Shili Xiong,
Hyeongjin Nam,
Jaerin Lee,
Jaeyoung Chung,
JoonKyu Park,
Junghun Oh,
Kanggeon Lee,
Wooseok Lee,
Juneyoung Ro,
Turghun Osman,
Can Hu,
Chaoyang Liao,
Cheng Chen,
Chengcheng Han,
Chenhao Qiu,
Chong Peng,
Cong Xu,
Dailin Li,
Feiyu Wang,
Feng Gao,
Guibo Zhu,
Guopeng Tang,
Haibo Lu,
Han Fang,
Han Qi,
Hanxiao Wu,
Haobo Cheng,
Hongbo Sun,
Hongyao Chen,
Huayong Hu,
Hui Li,
Jiaheng Ma,
Jiang Yu,
Jianing Wang,
Jie Yang,
Jing He,
Jinglin Zhou,
Jingxuan Li,
Josef Kittler,
Lihao Zheng,
Linnan Zhao,
Mengxi Jia,
Muyang Yan,
Thanh Thien Nguyen,
Pu Luo,
Qi Li,
Shien Song,
Shijie Dong,
Shuai Shao,
Shutao Li,
Taofeng Xue,
Tianyang Xu,
Tianyi Gao,
Tingting Li,
Wei Zhang,
Weiyang Su,
Xiaodong Dong,
Xiao-Jun Wu,
Xiaopeng Zhou,
Xin Chen,
Xin Wei,
Xinyi You,
Xudong Kang,
Xujie Zhou,
Xusheng Liu,
Yanan Wang,
Yanbin Huang,
Yang Liu,
Yang Yang,
Yanglin Deng,
Yashu Kang,
Ye Yuan,
Yi Wen,
Yicen Tian,
Yilin Tao,
Yin Tang,
Yipeng Lin,
Yiqing Wang,
Yiting Xi,
Yongkang Yu,
Yumei Li,
Yuxin Qin,
Yuying Chen,
Yuzhe Cen,
Zhaofan Zou,
Zhaohong Liu,
Zhehao Shen,
Zhenglin Du,
Zhengyang Li,
Zhenni Huang,
Zhenwei Shao,
Zhilong Song,
Zhiyong Feng,
Zhiyu Wang,
Zhou Yu,
Ziang Li,
Zihan Zhai,
Zijian Zhang,
Ziyang Peng,
Ziyun Xiao,
Zongshu Li
[pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Peng and Xiong, Shengwu and Zhang, Jiajun and Chen, Yaxiong and Zhou, Bowen and Loy, Chen Change and Clifton, David and Lee, Kyoung Mu and Van Gool, Luc and He, Ruiming and Yao, Ruilin and Long, Xinwei and Huang, Jirui and Tian, Kai and Yang, Sa and Shao, Yihua and Feng, Jin and Zhong, Yue and Zhou, Jiakai and Tang, Cheng and Zou, Tianyu and Zhang, Yifang and Liang, Junming and Li, Guoyou and Wang, Zhaoxiang and Zhou, Qiang and Zhao, Yichen and Xiong, Shili and Nam, Hyeongjin and Lee, Jaerin and Chung, Jaeyoung and Park, JoonKyu and Oh, Junghun and Lee, Kanggeon and Lee, Wooseok and Ro, Juneyoung and Osman, Turghun and Hu, Can and Liao, Chaoyang and Chen, Cheng and Han, Chengcheng and Qiu, Chenhao and Peng, Chong and Xu, Cong and Li, Dailin and Wang, Feiyu and Gao, Feng and Zhu, Guibo and Tang, Guopeng and Lu, Haibo and Fang, Han and Qi, Han and Wu, Hanxiao and Cheng, Haobo and Sun, Hongbo and Chen, Hongyao and Hu, Huayong and Li, Hui and Ma, Jiaheng and Yu, Jiang and Wang, Jianing and Yang, Jie and He, Jing and Zhou, Jinglin and Li, Jingxuan and Kittler, Josef and Zheng, Lihao and Zhao, Linnan and Jia, Mengxi and Yan, Muyang and Nguyen, Thanh Thien and Luo, Pu and Li, Qi and Song, Shien and Dong, Shijie and Shao, Shuai and Li, Shutao and Xue, Taofeng and Xu, Tianyang and Gao, Tianyi and Li, Tingting and Zhang, Wei and Su, Weiyang and Dong, Xiaodong and Wu, Xiao-Jun and Zhou, Xiaopeng and Chen, Xin and Wei, Xin and You, Xinyi and Kang, Xudong and Zhou, Xujie and Liu, Xusheng and Wang, Yanan and Huang, Yanbin and Liu, Yang and Yang, Yang and Deng, Yanglin and Kang, Yashu and Yuan, Ye and Wen, Yi and Tian, Yicen and Tao, Yilin and Tang, Yin and Lin, Yipeng and Wang, Yiqing and Xi, Yiting and Yu, Yongkang and Li, Yumei and Qin, Yuxin and Chen, Yuying and Cen, Yuzhe and Zou, Zhaofan and Liu, Zhaohong and Shen, Zhehao and Du, Zhenglin and Li, Zhengyang and Huang, Zhenni and Shao, Zhenwei and Song, Zhilong and Feng, Zhiyong and Wang, Zhiyu and Yu, Zhou and Li, Ziang and Zhai, Zihan and Zhang, Zijian and Peng, Ziyang and Xiao, Ziyun and Li, Zongshu}, title = {MARS2 2025 Challenge on Multimodal Reasoning: Datasets, Methods, Results, Discussion, and Outlook}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {6517-6546} }