Expanding Horizons in AI Benchmarking: Multimodal Approaches
KOFFVQA: An Objectively Evaluated Free-form VQA Benchmark for Large Vision-Language Models in the Korean Language-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Yoonshik and Jung, Jaeyoon}, title = {KOFFVQA: An Objectively Evaluated Free-form VQA Benchmark for Large Vision-Language Models in the Korean Language}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {575-585} }
Behind the Magic, MERLIM: Multi-modal Evaluation Benchmark for Large Image-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Villa_2025_CVPR, author = {Villa, Andr\'es and L\'eon, Juan and Soto, Alvaro and Ghanem, Bernard}, title = {Behind the Magic, MERLIM: Multi-modal Evaluation Benchmark for Large Image-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {492-502} }
Beyond Raw Videos: Understanding Edited Videos with Large Multimodal Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Lu and Zhu, Sijie and Li, Chunyuan and Kuo, Chia-Wen and Chen, Fan and Wang, Xinyao and Chen, Guang and Du, Dawei and Yuan, Ye and Wen, Longyin}, title = {Beyond Raw Videos: Understanding Edited Videos with Large Multimodal Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {503-512} }
Revisiting Multi-Modal LLM Evaluation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Jian and Srivastava, Shikhar and Chen, Junyu and Shrestha, Robik and Acharya, Manoj and Kafle, Kushal and Kanan, Christopher}, title = {Revisiting Multi-Modal LLM Evaluation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {555-564} }
Revisiting Referring Expression Comprehension Evaluation in the Era of Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Jierun and Wei, Fangyun and Zhao, Jinjing and Song, Sizhe and Wu, Bohuai and Peng, Zhuoxuan and Chan, S.-H. Gary and Zhang, Hongyang}, title = {Revisiting Referring Expression Comprehension Evaluation in the Era of Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {513-524} }
Choosing `Right' from Wrong: A Closer Look at Selection Bias in Spatial Multiple-Choice Questions in Large Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zeno_2025_CVPR, author = {Zeno, Giselle and Jedidi, Nour and Gomez, Steven}, title = {Choosing `Right' from Wrong: A Closer Look at Selection Bias in Spatial Multiple-Choice Questions in Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {535-544} }
Quantum Federated Learning for Multimodal Data: A Modality-Agnostic Approach-
[pdf]
[bibtex]@InProceedings{Pokharel_2025_CVPR, author = {Pokharel, Atit and Rahman, Ratun and Morris, Thomas and Nguyen, Dinh C.}, title = {Quantum Federated Learning for Multimodal Data: A Modality-Agnostic Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {545-554} }
MerCulture: A Comprehensive Benchmark to Evaluate Vision-Language Models on Cultural Understanding in Singapore-
[pdf]
[bibtex]@InProceedings{Tushar_2025_CVPR, author = {Tushar, Pranav and Pandey, Eshan and Austria, Lyka Diane Bala and Loo, Yin Yin and Lim, Jing Hao and Atmosukarto, Indriyati and Lock, Donny Soh Cheng}, title = {MerCulture: A Comprehensive Benchmark to Evaluate Vision-Language Models on Cultural Understanding in Singapore}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {565-574} }
TextInVision: Text and Prompt Complexity Driven Visual Text Generation Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fallah_2025_CVPR, author = {Fallah, Forouzan and Patel, Maitreya and Chatterjee, Agneet and Morariu, Vlad and Baral, Chitta and Yang, Yezhou}, title = {TextInVision: Text and Prompt Complexity Driven Visual Text Generation Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {525-534} }