Navigating the Future: Ensuring Trustworthiness in Multi-Modal Open-World Intelligence


Benchmarking Multi-modal Semantic Segmentation under Sensor Failures: Missing and Noisy Modality Robustness
Chenfei Liao,
Kaiyu Lei,
Xu Zheng,
Junha Moon,
Zhixiong Wang,
Yixuan Wang,
Danda Pani Paudel,
Luc Van Gool,
Xuming Hu
[pdf] [arXiv]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Chenfei and Lei, Kaiyu and Zheng, Xu and Moon, Junha and Wang, Zhixiong and Wang, Yixuan and Paudel, Danda Pani and Van Gool, Luc and Hu, Xuming}, title = {Benchmarking Multi-modal Semantic Segmentation under Sensor Failures: Missing and Noisy Modality Robustness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1576-1586} }

IBD: Alleviating Hallucinations in Large Vision-Language Models via Image-Biased Decoding
Lanyun Zhu,
Deyi Ji,
Tianrun Chen,
Peng Xu,
Jieping Ye,
Jun Liu
[pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Lanyun and Ji, Deyi and Chen, Tianrun and Xu, Peng and Ye, Jieping and Liu, Jun}, title = {IBD: Alleviating Hallucinations in Large Vision-Language Models via Image-Biased Decoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1624-1633} }

Vision Language Models for Massive MIMO Semantic Communication
Stephen D. Liang
[pdf]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Stephen D.}, title = {Vision Language Models for Massive MIMO Semantic Communication}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1669-1679} }

On the Robustness of GUI Grounding Models Against Image Attacks
Haoren Zhao,
Tianyi Chen,
Zhen Wang
[pdf] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Haoren and Chen, Tianyi and Wang, Zhen}, title = {On the Robustness of GUI Grounding Models Against Image Attacks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1618-1623} }

Attention-Guided Hierarchical Defense for Multimodal Attacks in Vision-Language Models
Long Chen,
Yuling Chen,
Yun Luo,
Hui Dou,
Xinyang Zhong
[pdf]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Long and Chen, Yuling and Luo, Yun and Dou, Hui and Zhong, Xinyang}, title = {Attention-Guided Hierarchical Defense for Multimodal Attacks in Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1607-1617} }

A Survey of State of the Art Large Vision Language Models: Benchmark Evaluations and Challenges
Zongxia Li,
Xiyang Wu,
Hongyang Du,
Fuxiao Liu,
Huy Nghiem,
Guangyao Shi
[pdf]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zongxia and Wu, Xiyang and Du, Hongyang and Liu, Fuxiao and Nghiem, Huy and Shi, Guangyao}, title = {A Survey of State of the Art Large Vision Language Models: Benchmark Evaluations and Challenges}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1587-1606} }

Machine Unlearning in Hyperbolic vs. Euclidean Multimodal Contrastive Learning: Adapting Alignment Calibration to MERU
Àlex Pujol Vidal,
Kamal Nasrollahi,
Thomas B. Moeslund,
Sergio Escalera
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vidal_2025_CVPR, author = {Vidal, \`Alex Pujol and Nasrollahi, Kamal and Moeslund, Thomas B. and Escalera, Sergio}, title = {Machine Unlearning in Hyperbolic vs. Euclidean Multimodal Contrastive Learning: Adapting Alignment Calibration to MERU}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1644-1653} }

Multimodal Generalized Category Discovery
Yuchang Su,
Renping Zhou,
Siyu Huang,
Xingjian Li,
Tianyang Wang,
Ziyue Wang,
Min Xu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_CVPR, author = {Su, Yuchang and Zhou, Renping and Huang, Siyu and Li, Xingjian and Wang, Tianyang and Wang, Ziyue and Xu, Min}, title = {Multimodal Generalized Category Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1634-1643} }

Prompt the Missing: Prompt-Based Robust Audio-Visual Classification under Uncertain Modalities
Eunju Park
[pdf]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Eunju}, title = {Prompt the Missing: Prompt-Based Robust Audio-Visual Classification under Uncertain Modalities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1654-1662} }

HARMONY: Hidden Activation Representations and Model Output-Aware Uncertainty Estimation for Vision-Language Models
Erum Mushtaq,
Zalan Fabian,
Yavuz Faruk Bakman,
Anil Ramakrishna,
Mahdi Soltanolkotabi,
Salman Avestimehr
[pdf]
[bibtex]
@InProceedings{Mushtaq_2025_CVPR, author = {Mushtaq, Erum and Fabian, Zalan and Bakman, Yavuz Faruk and Ramakrishna, Anil and Soltanolkotabi, Mahdi and Avestimehr, Salman}, title = {HARMONY: Hidden Activation Representations and Model Output-Aware Uncertainty Estimation for Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1663-1668} }