Multimodal Algorithmic Reasoning Workshop


Focus Ambiguity in Visual Questions: A Disambiguation Problem, Not Instance Segmentation
Yu-Yun Tseng,
Danna Gurari
[pdf] [supp]
[bibtex]
@InProceedings{Tseng_2026_CVPR, author = {Tseng, Yu-Yun and Gurari, Danna}, title = {Focus Ambiguity in Visual Questions: A Disambiguation Problem, Not Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11507-11515} }

SPR-128K: A New Benchmark for Spatial Plausibility Reasoning with Multimodal Large Language Models
Zhiyuan Hu,
Zheng Sun,
Yi Wei,
Long Yu
[pdf] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Zhiyuan and Sun, Zheng and Wei, Yi and Yu, Long}, title = {SPR-128K: A New Benchmark for Spatial Plausibility Reasoning with Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11543-11552} }

When Negation Is a Geometry Problem in Vision Language Models
Fawaz Sammani,
Tzoulio Chamiti,
Paul Gavrikov,
Nikos Deligiannis
[pdf] [arXiv]
[bibtex]
@InProceedings{Sammani_2026_CVPR, author = {Sammani, Fawaz and Chamiti, Tzoulio and Gavrikov, Paul and Deligiannis, Nikos}, title = {When Negation Is a Geometry Problem in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11553-11562} }

POVQA: Preference-Optimized Video Question Answering with Rationales for Data Efficiency
Ashim Dahal,
Ankit Ghimire,
Saydul Akbar Murad,
Nick Rahimi
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dahal_2026_CVPR, author = {Dahal, Ashim and Ghimire, Ankit and Murad, Saydul Akbar and Rahimi, Nick}, title = {POVQA: Preference-Optimized Video Question Answering with Rationales for Data Efficiency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11533-11542} }

On Robustness and Chain-of-Thought Consistency of RL-Finetuned VLMs
Rosie Zhao,
Anshul Shah,
Xiaoyu Zhu,
Xinke Deng,
Zhongyu Jiang,
Yang Yang,
Joerg Liebelt,
Arnab Mondal
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Rosie and Shah, Anshul and Zhu, Xiaoyu and Deng, Xinke and Jiang, Zhongyu and Yang, Yang and Liebelt, Joerg and Mondal, Arnab}, title = {On Robustness and Chain-of-Thought Consistency of RL-Finetuned VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11523-11532} }

HoliGround: Holistic Assessment for Grounded Chain-of-Thought
Tom Hodemon,
Mohamed Chaouch,
Aboubacar Tuo,
Angelique Loesch
[pdf]
[bibtex]
@InProceedings{Hodemon_2026_CVPR, author = {Hodemon, Tom and Chaouch, Mohamed and Tuo, Aboubacar and Loesch, Angelique}, title = {HoliGround: Holistic Assessment for Grounded Chain-of-Thought}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11516-11522} }