Cognitive Foundations for Multimodal Models
Vision-Language Pretraining with Structured Distractor Augmentation-
[pdf]
[bibtex]@InProceedings{Yadla_2026_CVPR, author = {Yadla, Prasanth}, title = {Vision-Language Pretraining with Structured Distractor Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11095-11103} }
Can Vision-Language Models Count? A Synthetic Benchmark and Analysis of Attention-Based Interventions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sengupta_2026_CVPR, author = {Sengupta, Saurav and Moradinasab, Nazanin and Liu, Jiebei and E Brown, Donald}, title = {Can Vision-Language Models Count? A Synthetic Benchmark and Analysis of Attention-Based Interventions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11076-11084} }
Latent-Stability Gated SAM: Detecting Hallucinated Segmentations under Domain Shift-
[pdf]
[bibtex]@InProceedings{Imran_2026_CVPR, author = {Imran, Muhammad and Lee, Yugyung}, title = {Latent-Stability Gated SAM: Detecting Hallucinated Segmentations under Domain Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11068-11075} }
Multimodal Graph-of-Thoughts: Hypothesis-Verification Graphs for Multimodal Reasoning in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Belyaeva_2026_CVPR, author = {Belyaeva, Irina}, title = {Multimodal Graph-of-Thoughts: Hypothesis-Verification Graphs for Multimodal Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11048-11057} }
Action Without Interaction: Probing the Physical Foundations of Video LMMs via Contact-Release Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Harari_2026_CVPR, author = {Harari, Daniel and Sidorov, Michael and Shterental, Chen and David, Liel and Gebreselasie, Abrham Kahsay and Khan, Muhammad Haris}, title = {Action Without Interaction: Probing the Physical Foundations of Video LMMs via Contact-Release Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11058-11067} }
MEBench: A Novel Benchmark for Understanding Mutual Exclusivity Bias in Vision-Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Thai_2026_CVPR, author = {Thai, Anh and Stojanov, Stefan and Huang, Zixuan and Boote, Bikram and Rehg, James M.}, title = {MEBench: A Novel Benchmark for Understanding Mutual Exclusivity Bias in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11085-11094} }

