Workshop on Distillation of Foundation Models for Autonomous Driving


ReasonDrive: Efficient Visual Question Answering for Autonomous Vehicles with Reasoning-Enhanced Small Vision-Language Models
Amirhosein Chahe,
Lifeng Zhou
[pdf] [arXiv]
[bibtex]
@InProceedings{Chahe_2025_CVPR, author = {Chahe, Amirhosein and Zhou, Lifeng}, title = {ReasonDrive: Efficient Visual Question Answering for Autonomous Vehicles with Reasoning-Enhanced Small Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3870-3879} }

Clip4Retrofit: Enabling Real-Time Image Labeling on Edge Devices via Cross-Architecture CLIP Distillation
Li Zhong,
Ahmed Ghazal,
Jun-Jun Wan,
Frederik Zilly,
Patrick Mackens,
Joachim Vollrath,
Bogdan Coseriu
[pdf] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Li and Ghazal, Ahmed and Wan, Jun-Jun and Zilly, Frederik and Mackens, Patrick and Vollrath, Joachim and Coseriu, Bogdan}, title = {Clip4Retrofit: Enabling Real-Time Image Labeling on Edge Devices via Cross-Architecture CLIP Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3829-3837} }

ReferGPT: Towards Zero-Shot Referring Multi-Object Tracking
Tzoulio Chamiti,
Leandro Di Bella,
Adrian Munteanu,
Nikos Deligiannis
[pdf] [arXiv]
[bibtex]
@InProceedings{Chamiti_2025_CVPR, author = {Chamiti, Tzoulio and Di Bella, Leandro and Munteanu, Adrian and Deligiannis, Nikos}, title = {ReferGPT: Towards Zero-Shot Referring Multi-Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3849-3858} }

Robusto-1 Dataset: Comparing Humans and VLMs on real out-of-distribution Autonomous Driving VQA from Peru
Dunant Cusipuma,
David Ortega,
Victor Flores-Benites,
Arturo Deza
[pdf] [supp]
[bibtex]
@InProceedings{Cusipuma_2025_CVPR, author = {Cusipuma, Dunant and Ortega, David and Flores-Benites, Victor and Deza, Arturo}, title = {Robusto-1 Dataset: Comparing Humans and VLMs on real out-of-distribution Autonomous Driving VQA from Peru}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3817-3828} }

Harmonizing Attention Fields with Knowledge Distillation for Multi-View 3D Object Detection
Yafei Qi,
Menghao Yang,
Fan Wu,
Chen Wang,
Yongmin Zhang
[pdf]
[bibtex]
@InProceedings{Qi_2025_CVPR, author = {Qi, Yafei and Yang, Menghao and Wu, Fan and Wang, Chen and Zhang, Yongmin}, title = {Harmonizing Attention Fields with Knowledge Distillation for Multi-View 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3759-3767} }

Fairness-Aware Boosting Model for Imbalanced 3D Point Cloud Segmentation in Autonomous Driving
Elahe Yahyapour,
Chengbo Ai
[pdf] [supp]
[bibtex]
@InProceedings{Yahyapour_2025_CVPR, author = {Yahyapour, Elahe and Ai, Chengbo}, title = {Fairness-Aware Boosting Model for Imbalanced 3D Point Cloud Segmentation in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3808-3816} }

CleanMAP: Distilling Multimodal LLMs for Confidence-Driven Crowdsourced HD Map Updates
Ankit Kumar Shaw,
Kun Jiang,
Tuopu Wen,
Chandan Kumar Sah,
Yining Shi,
Mengmeng Yang,
Diange Yang,
Xiaoli Lian
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shaw_2025_CVPR, author = {Shaw, Ankit Kumar and Jiang, Kun and Wen, Tuopu and Sah, Chandan Kumar and Shi, Yining and Yang, Mengmeng and Yang, Diange and Lian, Xiaoli}, title = {CleanMAP: Distilling Multimodal LLMs for Confidence-Driven Crowdsourced HD Map Updates}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3798-3807} }

ZFusion: An Effective Fuser of Camera and 4D Radar for 3D Object Perception in Autonomous Driving
Sheng Yang,
Tong Zhan,
Shichen Qiao,
Jicheng Gong,
Qing Yang,
Jian Wang,
Yanfeng Lu
[pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Sheng and Zhan, Tong and Qiao, Shichen and Gong, Jicheng and Yang, Qing and Wang, Jian and Lu, Yanfeng}, title = {ZFusion: An Effective Fuser of Camera and 4D Radar for 3D Object Perception in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3768-3777} }

Talk2Traffic: Interactive and Editable Traffic Scenario Generation for Autonomous Driving with Multimodal Large Language Model
Zihao Sheng,
Zilin Huang,
Yansong Qu,
Yue Leng,
Sikai Chen
[pdf]
[bibtex]
@InProceedings{Sheng_2025_CVPR, author = {Sheng, Zihao and Huang, Zilin and Qu, Yansong and Leng, Yue and Chen, Sikai}, title = {Talk2Traffic: Interactive and Editable Traffic Scenario Generation for Autonomous Driving with Multimodal Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3788-3797} }

RAD: Retrieval-Augmented Decision-Making of Meta-Actions with Vision-Language Models in Autonomous Driving
Yujin Wang,
Quanfeng Liu,
Zhengxin Jiang,
Tianyi Wang,
Junfeng Jiao,
Hongqing Chu,
Bingzhao Gao,
Hong Chen
[pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yujin and Liu, Quanfeng and Jiang, Zhengxin and Wang, Tianyi and Jiao, Junfeng and Chu, Hongqing and Gao, Bingzhao and Chen, Hong}, title = {RAD: Retrieval-Augmented Decision-Making of Meta-Actions with Vision-Language Models in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3838-3848} }

PF3Det: A Prompted Foundation Feature Assisted Visual LiDAR 3D Detector
Kaidong Li,
Tianxiao Zhang,
Kuan-Chuan Peng,
Guanghui Wang
[pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Kaidong and Zhang, Tianxiao and Peng, Kuan-Chuan and Wang, Guanghui}, title = {PF3Det: A Prompted Foundation Feature Assisted Visual LiDAR 3D Detector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3778-3787} }

Drive4C: A Closed-Loop Benchmark on What Foundation Models Really Need to Be Capable of for Language-Guided Autonomous Driving
Tin Stribor Sohn,
Maximilian Dillitzer,
Johannes Bach,
Jason J. Corso,
Tim Brühl,
Robin Schwager,
Tim Dieter Eberhardt,
Eric Sax
[pdf]
[bibtex]
@InProceedings{Sohn_2025_CVPR, author = {Sohn, Tin Stribor and Dillitzer, Maximilian and Bach, Johannes and Corso, Jason J. and Br\"uhl, Tim and Schwager, Robin and Eberhardt, Tim Dieter and Sax, Eric}, title = {Drive4C: A Closed-Loop Benchmark on What Foundation Models Really Need to Be Capable of for Language-Guided Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3859-3869} }