DataMFM: Emerging Directions in Data for Multimodal Foundation Models


TimeCausality: Evaluating the Causal Ability in Time Dimension for Vision Language Models
Zeqing Wang,
Shiyuan Zhang,
Chengpei Tang,
Keze Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zeqing and Zhang, Shiyuan and Tang, Chengpei and Wang, Keze}, title = {TimeCausality: Evaluating the Causal Ability in Time Dimension for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4396-4406} }

VLA-AD: Agentic Vision-Language Foundation Models for Context-Aware Anomaly Detection
Asma Belhadi,
Youcef Djenouri,
Ahmed Nabil Belbachir
[pdf]
[bibtex]
@InProceedings{Belhadi_2026_CVPR, author = {Belhadi, Asma and Djenouri, Youcef and Belbachir, Ahmed Nabil}, title = {VLA-AD: Agentic Vision-Language Foundation Models for Context-Aware Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4322-4331} }

Scalable Parallel Prompting for Complex AV Video Captioning
April Yang,
Roberto Amoroso,
Nikita Durasov,
Devansh Bisla,
Sandipan Kundu,
Elmar Haussmann,
Ruchi Bhargava,
Maying Shen,
Nadine Chang,
Jose M. Alvarez
[pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, April and Amoroso, Roberto and Durasov, Nikita and Bisla, Devansh and Kundu, Sandipan and Haussmann, Elmar and Bhargava, Ruchi and Shen, Maying and Chang, Nadine and Alvarez, Jose M.}, title = {Scalable Parallel Prompting for Complex AV Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4407-4416} }

Longitudinal Multimodal Modeling for Alzheimer's Disease with Pre-trained Brain Latent Diffusion and Mixture-of-Experts Fusion
Zeqing Li,
Linlin Gao,
Liming Dong,
Hao Huang
[pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zeqing and Gao, Linlin and Dong, Liming and Huang, Hao}, title = {Longitudinal Multimodal Modeling for Alzheimer's Disease with Pre-trained Brain Latent Diffusion and Mixture-of-Experts Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4343-4350} }

Adversarial Feedback from Segmentation Network to Siamese Diffusion for Improving Polyp Segmentation
Kairi Osaki,
Kazuhiro Hotta
[pdf]
[bibtex]
@InProceedings{Osaki_2026_CVPR, author = {Osaki, Kairi and Hotta, Kazuhiro}, title = {Adversarial Feedback from Segmentation Network to Siamese Diffusion for Improving Polyp Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4351-4360} }

Cineaste: A Fine-grained Contextual Movie Question Answering Benchmark with Automated Data Curation
Nisarg A Shah,
Amir Ziai,
Chaitanya Ekanadham,
Vishal M. Patel
[pdf]
[bibtex]
@InProceedings{A_Shah_2026_CVPR, author = {A Shah, Nisarg and Ziai, Amir and Ekanadham, Chaitanya and Patel, Vishal M.}, title = {Cineaste: A Fine-grained Contextual Movie Question Answering Benchmark with Automated Data Curation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4376-4386} }

Uncertainty-Guided Data Curation for 3D Object Detection
Nikita Durasov,
Rafid Mahmood,
Jiwoong Choi,
Marc T. Law,
James Lucas,
Pascal Fua,
Jose M. Alvarez
[pdf] [supp]
[bibtex]
@InProceedings{Durasov_2026_CVPR, author = {Durasov, Nikita and Mahmood, Rafid and Choi, Jiwoong and Law, Marc T. and Lucas, James and Fua, Pascal and Alvarez, Jose M.}, title = {Uncertainty-Guided Data Curation for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4332-4342} }

Learning Multimodal Priors with Shared Vector Quantization for Incomplete Multimodal Diagnosis
Yian Wang,
Linlin Gao,
Zeqing Li
[pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yian and Gao, Linlin and Li, Zeqing}, title = {Learning Multimodal Priors with Shared Vector Quantization for Incomplete Multimodal Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4387-4395} }

VLM Reality Check: A Causal Counterfactual Benchmark for Diagnosing Cognitive Biases in Vision-Language Models
Ayan Sar,
Pranav Puri,
Anurag Kaushish,
Sampurna Roy,
Sumit Aich,
Tanupriya Choudhury,
Ajith Abraham
[pdf] [supp]
[bibtex]
@InProceedings{Sar_2026_CVPR, author = {Sar, Ayan and Puri, Pranav and Kaushish, Anurag and Roy, Sampurna and Aich, Sumit and Choudhury, Tanupriya and Abraham, Ajith}, title = {VLM Reality Check: A Causal Counterfactual Benchmark for Diagnosing Cognitive Biases in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4361-4375} }

AdGaze-3500: Evaluating Large Multimodal Models' Ability to Predict Human Attention to Ads
Jianping Ye,
Michel Wedel
[pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Jianping and Wedel, Michel}, title = {AdGaze-3500: Evaluating Large Multimodal Models' Ability to Predict Human Attention to Ads}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4417-4427} }