DataMFM: Emerging Directions in Data for Multimodal Foundation Models
TimeCausality: Evaluating the Causal Ability in Time Dimension for Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zeqing and Zhang, Shiyuan and Tang, Chengpei and Wang, Keze}, title = {TimeCausality: Evaluating the Causal Ability in Time Dimension for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4396-4406} }
VLA-AD: Agentic Vision-Language Foundation Models for Context-Aware Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Belhadi_2026_CVPR, author = {Belhadi, Asma and Djenouri, Youcef and Belbachir, Ahmed Nabil}, title = {VLA-AD: Agentic Vision-Language Foundation Models for Context-Aware Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4322-4331} }
Scalable Parallel Prompting for Complex AV Video Captioning-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, April and Amoroso, Roberto and Durasov, Nikita and Bisla, Devansh and Kundu, Sandipan and Haussmann, Elmar and Bhargava, Ruchi and Shen, Maying and Chang, Nadine and Alvarez, Jose M.}, title = {Scalable Parallel Prompting for Complex AV Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4407-4416} }
Longitudinal Multimodal Modeling for Alzheimer's Disease with Pre-trained Brain Latent Diffusion and Mixture-of-Experts Fusion-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zeqing and Gao, Linlin and Dong, Liming and Huang, Hao}, title = {Longitudinal Multimodal Modeling for Alzheimer's Disease with Pre-trained Brain Latent Diffusion and Mixture-of-Experts Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4343-4350} }
Adversarial Feedback from Segmentation Network to Siamese Diffusion for Improving Polyp Segmentation-
[pdf]
[bibtex]@InProceedings{Osaki_2026_CVPR, author = {Osaki, Kairi and Hotta, Kazuhiro}, title = {Adversarial Feedback from Segmentation Network to Siamese Diffusion for Improving Polyp Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4351-4360} }
Cineaste: A Fine-grained Contextual Movie Question Answering Benchmark with Automated Data Curation-
[pdf]
[bibtex]@InProceedings{A_Shah_2026_CVPR, author = {A Shah, Nisarg and Ziai, Amir and Ekanadham, Chaitanya and Patel, Vishal M.}, title = {Cineaste: A Fine-grained Contextual Movie Question Answering Benchmark with Automated Data Curation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4376-4386} }
Uncertainty-Guided Data Curation for 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Durasov_2026_CVPR, author = {Durasov, Nikita and Mahmood, Rafid and Choi, Jiwoong and Law, Marc T. and Lucas, James and Fua, Pascal and Alvarez, Jose M.}, title = {Uncertainty-Guided Data Curation for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4332-4342} }
Learning Multimodal Priors with Shared Vector Quantization for Incomplete Multimodal Diagnosis-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yian and Gao, Linlin and Li, Zeqing}, title = {Learning Multimodal Priors with Shared Vector Quantization for Incomplete Multimodal Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4387-4395} }
VLM Reality Check: A Causal Counterfactual Benchmark for Diagnosing Cognitive Biases in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Sar_2026_CVPR, author = {Sar, Ayan and Puri, Pranav and Kaushish, Anurag and Roy, Sampurna and Aich, Sumit and Choudhury, Tanupriya and Abraham, Ajith}, title = {VLM Reality Check: A Causal Counterfactual Benchmark for Diagnosing Cognitive Biases in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4361-4375} }
AdGaze-3500: Evaluating Large Multimodal Models' Ability to Predict Human Attention to Ads-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Jianping and Wedel, Michel}, title = {AdGaze-3500: Evaluating Large Multimodal Models' Ability to Predict Human Attention to Ads}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4417-4427} }

