CVPR 2026 Open Access Repository

DataMFM: Emerging Directions in Data for Multimodal Foundation Models

TimeCausality: Evaluating the Causal Ability in Time Dimension for Vision Language Models: Zeqing Wang,

Shiyuan Zhang,

Chengpei Tang,

Keze Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zeqing and Zhang, Shiyuan and Tang, Chengpei and Wang, Keze}, title = {TimeCausality: Evaluating the Causal Ability in Time Dimension for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4396-4406} }
VLA-AD: Agentic Vision-Language Foundation Models for Context-Aware Anomaly Detection: Asma Belhadi,

Youcef Djenouri,

Ahmed Nabil Belbachir; [pdf]
[bibtex]
@InProceedings{Belhadi_2026_CVPR, author = {Belhadi, Asma and Djenouri, Youcef and Belbachir, Ahmed Nabil}, title = {VLA-AD: Agentic Vision-Language Foundation Models for Context-Aware Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4322-4331} }
Scalable Parallel Prompting for Complex AV Video Captioning: April Yang,

Roberto Amoroso,

Nikita Durasov,

Devansh Bisla,

Sandipan Kundu,

Elmar Haussmann,

Ruchi Bhargava,

Maying Shen,

Nadine Chang,

Jose M. Alvarez; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, April and Amoroso, Roberto and Durasov, Nikita and Bisla, Devansh and Kundu, Sandipan and Haussmann, Elmar and Bhargava, Ruchi and Shen, Maying and Chang, Nadine and Alvarez, Jose M.}, title = {Scalable Parallel Prompting for Complex AV Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4407-4416} }
Longitudinal Multimodal Modeling for Alzheimer's Disease with Pre-trained Brain Latent Diffusion and Mixture-of-Experts Fusion: Zeqing Li,

Linlin Gao,

Liming Dong,

Hao Huang; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zeqing and Gao, Linlin and Dong, Liming and Huang, Hao}, title = {Longitudinal Multimodal Modeling for Alzheimer's Disease with Pre-trained Brain Latent Diffusion and Mixture-of-Experts Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4343-4350} }
Adversarial Feedback from Segmentation Network to Siamese Diffusion for Improving Polyp Segmentation: Kairi Osaki,

Kazuhiro Hotta; [pdf]
[bibtex]
@InProceedings{Osaki_2026_CVPR, author = {Osaki, Kairi and Hotta, Kazuhiro}, title = {Adversarial Feedback from Segmentation Network to Siamese Diffusion for Improving Polyp Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4351-4360} }
Cineaste: A Fine-grained Contextual Movie Question Answering Benchmark with Automated Data Curation: Nisarg A Shah,

Amir Ziai,

Chaitanya Ekanadham,

Vishal M. Patel; [pdf]
[bibtex]
@InProceedings{A_Shah_2026_CVPR, author = {A Shah, Nisarg and Ziai, Amir and Ekanadham, Chaitanya and Patel, Vishal M.}, title = {Cineaste: A Fine-grained Contextual Movie Question Answering Benchmark with Automated Data Curation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4376-4386} }
Uncertainty-Guided Data Curation for 3D Object Detection: Nikita Durasov,

Rafid Mahmood,

Jiwoong Choi,

Marc T. Law,

James Lucas,

Pascal Fua,

Jose M. Alvarez; [pdf] [supp]
[bibtex]
@InProceedings{Durasov_2026_CVPR, author = {Durasov, Nikita and Mahmood, Rafid and Choi, Jiwoong and Law, Marc T. and Lucas, James and Fua, Pascal and Alvarez, Jose M.}, title = {Uncertainty-Guided Data Curation for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4332-4342} }
Learning Multimodal Priors with Shared Vector Quantization for Incomplete Multimodal Diagnosis: Yian Wang,

Linlin Gao,

Zeqing Li; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yian and Gao, Linlin and Li, Zeqing}, title = {Learning Multimodal Priors with Shared Vector Quantization for Incomplete Multimodal Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4387-4395} }
VLM Reality Check: A Causal Counterfactual Benchmark for Diagnosing Cognitive Biases in Vision-Language Models: Ayan Sar,

Pranav Puri,

Anurag Kaushish,

Sampurna Roy,

Sumit Aich,

Tanupriya Choudhury,

Ajith Abraham; [pdf] [supp]
[bibtex]
@InProceedings{Sar_2026_CVPR, author = {Sar, Ayan and Puri, Pranav and Kaushish, Anurag and Roy, Sampurna and Aich, Sumit and Choudhury, Tanupriya and Abraham, Ajith}, title = {VLM Reality Check: A Causal Counterfactual Benchmark for Diagnosing Cognitive Biases in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4361-4375} }
AdGaze-3500: Evaluating Large Multimodal Models' Ability to Predict Human Attention to Ads: Jianping Ye,

Michel Wedel; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Jianping and Wedel, Michel}, title = {AdGaze-3500: Evaluating Large Multimodal Models' Ability to Predict Human Attention to Ads}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4417-4427} }