CVPR 2025 Open Access Repository

Navigating the Future: Ensuring Trustworthiness in Multi-Modal Open-World Intelligence

Benchmarking Multi-modal Semantic Segmentation under Sensor Failures: Missing and Noisy Modality Robustness: Chenfei Liao,

Kaiyu Lei,

Xu Zheng,

Junha Moon,

Zhixiong Wang,

Yixuan Wang,

Danda Pani Paudel,

Luc Van Gool,

Xuming Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Chenfei and Lei, Kaiyu and Zheng, Xu and Moon, Junha and Wang, Zhixiong and Wang, Yixuan and Paudel, Danda Pani and Van Gool, Luc and Hu, Xuming}, title = {Benchmarking Multi-modal Semantic Segmentation under Sensor Failures: Missing and Noisy Modality Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1576-1586} }
IBD: Alleviating Hallucinations in Large Vision-Language Models via Image-Biased Decoding: Lanyun Zhu,

Deyi Ji,

Tianrun Chen,

Peng Xu,

Jieping Ye,

Jun Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Lanyun and Ji, Deyi and Chen, Tianrun and Xu, Peng and Ye, Jieping and Liu, Jun}, title = {IBD: Alleviating Hallucinations in Large Vision-Language Models via Image-Biased Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1624-1633} }
Vision Language Models for Massive MIMO Semantic Communication: Stephen D. Liang; [pdf]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Stephen D.}, title = {Vision Language Models for Massive MIMO Semantic Communication}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1675-1685} }
On the Robustness of GUI Grounding Models Against Image Attacks: Haoren Zhao,

Tianyi Chen,

Zhen Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Haoren and Chen, Tianyi and Wang, Zhen}, title = {On the Robustness of GUI Grounding Models Against Image Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1618-1623} }
Attention-Guided Hierarchical Defense for Multimodal Attacks in Vision-Language Models: Long Chen,

Yuling Chen,

Yun Luo,

Hui Dou,

Xinyang Zhong; [pdf]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Long and Chen, Yuling and Luo, Yun and Dou, Hui and Zhong, Xinyang}, title = {Attention-Guided Hierarchical Defense for Multimodal Attacks in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1607-1617} }
A Survey of State of the Art Large Vision Language Models: Benchmark Evaluations and Challenges: Zongxia Li,

Xiyang Wu,

Hongyang Du,

Fuxiao Liu,

Huy Nghiem,

Guangyao Shi; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zongxia and Wu, Xiyang and Du, Hongyang and Liu, Fuxiao and Nghiem, Huy and Shi, Guangyao}, title = {A Survey of State of the Art Large Vision Language Models: Benchmark Evaluations and Challenges}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1587-1606} }
Machine Unlearning in Hyperbolic vs. Euclidean Multimodal Contrastive Learning: Adapting Alignment Calibration to MERU: Àlex Pujol Vidal,

Kamal Nasrollahi,

Thomas B. Moeslund,

Sergio Escalera; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vidal_2025_CVPR, author = {Vidal, \`Alex Pujol and Nasrollahi, Kamal and Moeslund, Thomas B. and Escalera, Sergio}, title = {Machine Unlearning in Hyperbolic vs. Euclidean Multimodal Contrastive Learning: Adapting Alignment Calibration to MERU}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1644-1653} }
Multimodal Generalized Category Discovery: Yuchang Su,

Renping Zhou,

Siyu Huang,

Xingjian Li,

Tianyang Wang,

Ziyue Wang,

Min Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_CVPR, author = {Su, Yuchang and Zhou, Renping and Huang, Siyu and Li, Xingjian and Wang, Tianyang and Wang, Ziyue and Xu, Min}, title = {Multimodal Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1634-1643} }
Prompt the Missing: Prompt-Based Robust Audio-Visual Classification under Uncertain Modalities: Eunju Park; [pdf]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Eunju}, title = {Prompt the Missing: Prompt-Based Robust Audio-Visual Classification under Uncertain Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1654-1662} }
HARMONY: Hidden Activation Representations and Model Output-Aware Uncertainty Estimation for Vision-Language Models: Erum Mushtaq,

Zalan Fabian,

Yavuz Faruk Bakman,

Anil Ramakrishna,

Mahdi Soltanolkotabi,

Salman Avestimehr; [pdf] [arXiv]
[bibtex]
@InProceedings{Mushtaq_2025_CVPR, author = {Mushtaq, Erum and Fabian, Zalan and Bakman, Yavuz Faruk and Ramakrishna, Anil and Soltanolkotabi, Mahdi and Avestimehr, Salman}, title = {HARMONY: Hidden Activation Representations and Model Output-Aware Uncertainty Estimation for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {1669-1674} }