CVPR 2025 Open Access Repository

Expanding Horizons in AI Benchmarking: Multimodal Approaches

KOFFVQA: An Objectively Evaluated Free-form VQA Benchmark for Large Vision-Language Models in the Korean Language: Yoonshik Kim,

Jaeyoon Jung; [pdf] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Yoonshik and Jung, Jaeyoon}, title = {KOFFVQA: An Objectively Evaluated Free-form VQA Benchmark for Large Vision-Language Models in the Korean Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {575-585} }
Behind the Magic, MERLIM: Multi-modal Evaluation Benchmark for Large Image-Language Models: Andrés Villa,

Juan Léon,

Alvaro Soto,

Bernard Ghanem; [pdf] [supp]
[bibtex]
@InProceedings{Villa_2025_CVPR, author = {Villa, Andr\'es and L\'eon, Juan and Soto, Alvaro and Ghanem, Bernard}, title = {Behind the Magic, MERLIM: Multi-modal Evaluation Benchmark for Large Image-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {492-502} }
Beyond Raw Videos: Understanding Edited Videos with Large Multimodal Model: Lu Xu,

Sijie Zhu,

Chunyuan Li,

Chia-Wen Kuo,

Fan Chen,

Xinyao Wang,

Guang Chen,

Dawei Du,

Ye Yuan,

Longyin Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Lu and Zhu, Sijie and Li, Chunyuan and Kuo, Chia-Wen and Chen, Fan and Wang, Xinyao and Chen, Guang and Du, Dawei and Yuan, Ye and Wen, Longyin}, title = {Beyond Raw Videos: Understanding Edited Videos with Large Multimodal Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {503-512} }
Revisiting Multi-Modal LLM Evaluation: Jian Lu,

Shikhar Srivastava,

Junyu Chen,

Robik Shrestha,

Manoj Acharya,

Kushal Kafle,

Christopher Kanan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Jian and Srivastava, Shikhar and Chen, Junyu and Shrestha, Robik and Acharya, Manoj and Kafle, Kushal and Kanan, Christopher}, title = {Revisiting Multi-Modal LLM Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {555-564} }
Revisiting Referring Expression Comprehension Evaluation in the Era of Large Multimodal Models: Jierun Chen,

Fangyun Wei,

Jinjing Zhao,

Sizhe Song,

Bohuai Wu,

Zhuoxuan Peng,

S.-H. Gary Chan,

Hongyang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Jierun and Wei, Fangyun and Zhao, Jinjing and Song, Sizhe and Wu, Bohuai and Peng, Zhuoxuan and Chan, S.-H. Gary and Zhang, Hongyang}, title = {Revisiting Referring Expression Comprehension Evaluation in the Era of Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {513-524} }
Choosing `Right' from Wrong: A Closer Look at Selection Bias in Spatial Multiple-Choice Questions in Large Multimodal Models: Giselle Zeno,

Nour Jedidi,

Steven Gomez; [pdf] [supp]
[bibtex]
@InProceedings{Zeno_2025_CVPR, author = {Zeno, Giselle and Jedidi, Nour and Gomez, Steven}, title = {Choosing `Right' from Wrong: A Closer Look at Selection Bias in Spatial Multiple-Choice Questions in Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {535-544} }
Quantum Federated Learning for Multimodal Data: A Modality-Agnostic Approach: Atit Pokharel,

Ratun Rahman,

Thomas Morris,

Dinh C. Nguyen; [pdf] [arXiv]
[bibtex]
@InProceedings{Pokharel_2025_CVPR, author = {Pokharel, Atit and Rahman, Ratun and Morris, Thomas and Nguyen, Dinh C.}, title = {Quantum Federated Learning for Multimodal Data: A Modality-Agnostic Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {545-554} }
MerCulture: A Comprehensive Benchmark to Evaluate Vision-Language Models on Cultural Understanding in Singapore: Pranav Tushar,

Eshan Pandey,

Lyka Diane Bala Austria,

Yin Yin Loo,

Jing Hao Lim,

Indriyati Atmosukarto,

Donny Soh Cheng Lock; [pdf]
[bibtex]
@InProceedings{Tushar_2025_CVPR, author = {Tushar, Pranav and Pandey, Eshan and Austria, Lyka Diane Bala and Loo, Yin Yin and Lim, Jing Hao and Atmosukarto, Indriyati and Lock, Donny Soh Cheng}, title = {MerCulture: A Comprehensive Benchmark to Evaluate Vision-Language Models on Cultural Understanding in Singapore}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {565-574} }
TextInVision: Text and Prompt Complexity Driven Visual Text Generation Benchmark: Forouzan Fallah,

Maitreya Patel,

Agneet Chatterjee,

Vlad Morariu,

Chitta Baral,

Yezhou Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fallah_2025_CVPR, author = {Fallah, Forouzan and Patel, Maitreya and Chatterjee, Agneet and Morariu, Vlad and Baral, Chitta and Yang, Yezhou}, title = {TextInVision: Text and Prompt Complexity Driven Visual Text Generation Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {525-534} }