CVPR 2026 Open Access Repository

Cognitive Foundations for Multimodal Models

Vision-Language Pretraining with Structured Distractor Augmentation: Prasanth Yadla; [pdf]
[bibtex]
@InProceedings{Yadla_2026_CVPR, author = {Yadla, Prasanth}, title = {Vision-Language Pretraining with Structured Distractor Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11095-11103} }
Can Vision-Language Models Count? A Synthetic Benchmark and Analysis of Attention-Based Interventions: Saurav Sengupta,

Nazanin Moradinasab,

Jiebei Liu,

Donald E Brown; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sengupta_2026_CVPR, author = {Sengupta, Saurav and Moradinasab, Nazanin and Liu, Jiebei and E Brown, Donald}, title = {Can Vision-Language Models Count? A Synthetic Benchmark and Analysis of Attention-Based Interventions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11076-11084} }
Latent-Stability Gated SAM: Detecting Hallucinated Segmentations under Domain Shift: Muhammad Imran,

Yugyung Lee; [pdf]
[bibtex]
@InProceedings{Imran_2026_CVPR, author = {Imran, Muhammad and Lee, Yugyung}, title = {Latent-Stability Gated SAM: Detecting Hallucinated Segmentations under Domain Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11068-11075} }
Multimodal Graph-of-Thoughts: Hypothesis-Verification Graphs for Multimodal Reasoning in Vision-Language Models: Irina Belyaeva; [pdf] [supp]
[bibtex]
@InProceedings{Belyaeva_2026_CVPR, author = {Belyaeva, Irina}, title = {Multimodal Graph-of-Thoughts: Hypothesis-Verification Graphs for Multimodal Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11048-11057} }
Action Without Interaction: Probing the Physical Foundations of Video LMMs via Contact-Release Detection: Daniel Harari,

Michael Sidorov,

Chen Shterental,

Liel David,

Abrham Kahsay Gebreselasie,

Muhammad Haris Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Harari_2026_CVPR, author = {Harari, Daniel and Sidorov, Michael and Shterental, Chen and David, Liel and Gebreselasie, Abrham Kahsay and Khan, Muhammad Haris}, title = {Action Without Interaction: Probing the Physical Foundations of Video LMMs via Contact-Release Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11058-11067} }
MEBench: A Novel Benchmark for Understanding Mutual Exclusivity Bias in Vision-Language Models: Anh Thai,

Stefan Stojanov,

Zixuan Huang,

Bikram Boote,

James M. Rehg; [pdf] [arXiv]
[bibtex]
@InProceedings{Thai_2026_CVPR, author = {Thai, Anh and Stojanov, Stefan and Huang, Zixuan and Boote, Bikram and Rehg, James M.}, title = {MEBench: A Novel Benchmark for Understanding Mutual Exclusivity Bias in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11085-11094} }