WACV 2025 Open Access Repository

3rd Workshop on Large Language and Vision Models for Autonomous Driving

SenseRAG: Constructing Environmental Knowledge Bases with Proactive Querying for LLM-Based Autonomous Driving: Xuewen Luo,

Chenxi Liu,

Fan Ding,

Fengze Yang,

Yang Zhou,

Junnyong Loo,

Hwa Hui Tew; [pdf] [arXiv]
[bibtex]
@InProceedings{Luo_2025_WACV, author = {Luo, Xuewen and Liu, Chenxi and Ding, Fan and Yang, Fengze and Zhou, Yang and Loo, Junnyong and Tew, Hwa Hui}, title = {SenseRAG: Constructing Environmental Knowledge Bases with Proactive Querying for LLM-Based Autonomous Driving}, booktitle = {Proceedings of the Winter Conference on Applications of Computer Vision (WACV) Workshops}, month = {February}, year = {2025}, pages = {989-996} }
VLMine: Long-Tail Data Mining with Vision Language Models: Mao Ye,

Greg P. Meyer,

Zaiwei Zhang,

Dennis Park,

Siva Karthik Mustikovela,

Yuning Chai,

Eric Wolff; [pdf] [arXiv]
[bibtex]
@InProceedings{Ye_2025_WACV, author = {Ye, Mao and Meyer, Greg P. and Zhang, Zaiwei and Park, Dennis and Mustikovela, Siva Karthik and Chai, Yuning and Wolff, Eric}, title = {VLMine: Long-Tail Data Mining with Vision Language Models}, booktitle = {Proceedings of the Winter Conference on Applications of Computer Vision (WACV) Workshops}, month = {February}, year = {2025}, pages = {1072-1082} }
Glimpse of MCQ based VQA in Road & Traffic Scenarios: Ambarish Parthasarathy,

Athira Krishnan R,

Sumukha BG; [pdf]
[bibtex]
@InProceedings{Parthasarathy_2025_WACV, author = {Parthasarathy, Ambarish and R, Athira Krishnan and BG, Sumukha}, title = {Glimpse of MCQ based VQA in Road \& Traffic Scenarios}, booktitle = {Proceedings of the Winter Conference on Applications of Computer Vision (WACV) Workshops}, month = {February}, year = {2025}, pages = {997-1000} }
Position: Prospective of Autonomous Driving - Multimodal LLMs World Models Embodied Intelligence AI Alignment and Mamba: Yunsheng Ma,

Wenqian Ye,

Can Cui,

Haiming Zhang,

Shuo Xing,

Fucai Ke,

Jinhong Wang,

Chenglin Miao,

Jintai Chen,

Hamid Rezatofighi,

Zhen Li,

Guangtao Zheng,

Chao Zheng,

Tianjiao He,

Manmohan Chandraker,

Burhaneddin Yaman,

Xin Ye,

Hang Zhao,

Xu Cao; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_WACV, author = {Ma, Yunsheng and Ye, Wenqian and Cui, Can and Zhang, Haiming and Xing, Shuo and Ke, Fucai and Wang, Jinhong and Miao, Chenglin and Chen, Jintai and Rezatofighi, Hamid and Li, Zhen and Zheng, Guangtao and Zheng, Chao and He, Tianjiao and Chandraker, Manmohan and Yaman, Burhaneddin and Ye, Xin and Zhao, Hang and Cao, Xu}, title = {Position: Prospective of Autonomous Driving - Multimodal LLMs World Models Embodied Intelligence AI Alignment and Mamba}, booktitle = {Proceedings of the Winter Conference on Applications of Computer Vision (WACV) Workshops}, month = {February}, year = {2025}, pages = {1010-1026} }
Scenario Understanding of Traffic Scenes Through Large Visual Language Models: Esteban Rivera,

Jannik Lübberstedt,

Nico Uhlemann,

Markus Lienkamp; [pdf]
[bibtex]
@InProceedings{Rivera_2025_WACV, author = {Rivera, Esteban and L\"ubberstedt, Jannik and Uhlemann, Nico and Lienkamp, Markus}, title = {Scenario Understanding of Traffic Scenes Through Large Visual Language Models}, booktitle = {Proceedings of the Winter Conference on Applications of Computer Vision (WACV) Workshops}, month = {February}, year = {2025}, pages = {1037-1045} }
ScVLM: Enhancing Vision-Language Model for Safety-Critical Event Understanding: Liang Shi,

Boyu Jiang,

Tong Zeng,

Feng Guo; [pdf] [arXiv]
[bibtex]
@InProceedings{Shi_2025_WACV, author = {Shi, Liang and Jiang, Boyu and Zeng, Tong and Guo, Feng}, title = {ScVLM: Enhancing Vision-Language Model for Safety-Critical Event Understanding}, booktitle = {Proceedings of the Winter Conference on Applications of Computer Vision (WACV) Workshops}, month = {February}, year = {2025}, pages = {1061-1071} }
OpenEMMA: Open-Source Multimodal Model for End-to-End Autonomous Driving: Shuo Xing,

Chengyuan Qian,

Yuping Wang,

Hongyuan Hua,

Kexin Tian,

Yang Zhou,

Zhengzhong Tu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xing_2025_WACV, author = {Xing, Shuo and Qian, Chengyuan and Wang, Yuping and Hua, Hongyuan and Tian, Kexin and Zhou, Yang and Tu, Zhengzhong}, title = {OpenEMMA: Open-Source Multimodal Model for End-to-End Autonomous Driving}, booktitle = {Proceedings of the Winter Conference on Applications of Computer Vision (WACV) Workshops}, month = {February}, year = {2025}, pages = {1001-1009} }
Query3D: LLM-Powered Open-Vocabulary Scene Segmentation with Language Embedded 3D Gaussians: Amirhosein Chahe,

Lifeng Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Chahe_2025_WACV, author = {Chahe, Amirhosein and Zhou, Lifeng}, title = {Query3D: LLM-Powered Open-Vocabulary Scene Segmentation with Language Embedded 3D Gaussians}, booktitle = {Proceedings of the Winter Conference on Applications of Computer Vision (WACV) Workshops}, month = {February}, year = {2025}, pages = {1051-1060} }
Enhancing Weakly-Supervised Object Detection on Static Images through (Hallucinated) Motion: Cagri Gungor,

Adriana Kovashka; [pdf] [arXiv]
[bibtex]
@InProceedings{Gungor_2025_WACV, author = {Gungor, Cagri and Kovashka, Adriana}, title = {Enhancing Weakly-Supervised Object Detection on Static Images through (Hallucinated) Motion}, booktitle = {Proceedings of the Winter Conference on Applications of Computer Vision (WACV) Workshops}, month = {February}, year = {2025}, pages = {1046-1050} }
Language-Driven Active Learning for Diverse Open-Set 3D Object Detection: Ross Greer,

Bjørk Antoniussen,

Andreas Møgelmose,

Mohan Trivedi; [pdf]
[bibtex]
@InProceedings{Greer_2025_WACV, author = {Greer, Ross and Antoniussen, Bj{\o}rk and M{\o}gelmose, Andreas and Trivedi, Mohan}, title = {Language-Driven Active Learning for Diverse Open-Set 3D Object Detection}, booktitle = {Proceedings of the Winter Conference on Applications of Computer Vision (WACV) Workshops}, month = {February}, year = {2025}, pages = {980-988} }
Evaluating Multimodal Vision-Language Model Prompting Strategies for Visual Question Answering in Road Scene Understanding: Aryan Keskar,

Srinivasa Perisetla,

Ross Greer; [pdf]
[bibtex]
@InProceedings{Keskar_2025_WACV, author = {Keskar, Aryan and Perisetla, Srinivasa and Greer, Ross}, title = {Evaluating Multimodal Vision-Language Model Prompting Strategies for Visual Question Answering in Road Scene Understanding}, booktitle = {Proceedings of the Winter Conference on Applications of Computer Vision (WACV) Workshops}, month = {February}, year = {2025}, pages = {1027-1036} }