Workshop on Distillation of Foundation Models for Autonomous Driving
ReasonDrive: Efficient Visual Question Answering for Autonomous Vehicles with Reasoning-Enhanced Small Vision-Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chahe_2025_CVPR, author = {Chahe, Amirhosein and Zhou, Lifeng}, title = {ReasonDrive: Efficient Visual Question Answering for Autonomous Vehicles with Reasoning-Enhanced Small Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3870-3879} }
Clip4Retrofit: Enabling Real-Time Image Labeling on Edge Devices via Cross-Architecture CLIP Distillation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_CVPR, author = {Zhong, Li and Ghazal, Ahmed and Wan, Jun-Jun and Zilly, Frederik and Mackens, Patrick and Vollrath, Joachim and Coseriu, Bogdan}, title = {Clip4Retrofit: Enabling Real-Time Image Labeling on Edge Devices via Cross-Architecture CLIP Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3829-3837} }
ReferGPT: Towards Zero-Shot Referring Multi-Object Tracking-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chamiti_2025_CVPR, author = {Chamiti, Tzoulio and Di Bella, Leandro and Munteanu, Adrian and Deligiannis, Nikos}, title = {ReferGPT: Towards Zero-Shot Referring Multi-Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3849-3858} }
Robusto-1 Dataset: Comparing Humans and VLMs on real out-of-distribution Autonomous Driving VQA from Peru-
[pdf]
[supp]
[bibtex]@InProceedings{Cusipuma_2025_CVPR, author = {Cusipuma, Dunant and Ortega, David and Flores-Benites, Victor and Deza, Arturo}, title = {Robusto-1 Dataset: Comparing Humans and VLMs on real out-of-distribution Autonomous Driving VQA from Peru}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3817-3828} }
Harmonizing Attention Fields with Knowledge Distillation for Multi-View 3D Object Detection-
[pdf]
[bibtex]@InProceedings{Qi_2025_CVPR, author = {Qi, Yafei and Yang, Menghao and Wu, Fan and Wang, Chen and Zhang, Yongmin}, title = {Harmonizing Attention Fields with Knowledge Distillation for Multi-View 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3759-3767} }
Fairness-Aware Boosting Model for Imbalanced 3D Point Cloud Segmentation in Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Yahyapour_2025_CVPR, author = {Yahyapour, Elahe and Ai, Chengbo}, title = {Fairness-Aware Boosting Model for Imbalanced 3D Point Cloud Segmentation in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3808-3816} }
CleanMAP: Distilling Multimodal LLMs for Confidence-Driven Crowdsourced HD Map Updates-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shaw_2025_CVPR, author = {Shaw, Ankit Kumar and Jiang, Kun and Wen, Tuopu and Sah, Chandan Kumar and Shi, Yining and Yang, Mengmeng and Yang, Diange and Lian, Xiaoli}, title = {CleanMAP: Distilling Multimodal LLMs for Confidence-Driven Crowdsourced HD Map Updates}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3798-3807} }
ZFusion: An Effective Fuser of Camera and 4D Radar for 3D Object Perception in Autonomous Driving-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Sheng and Zhan, Tong and Qiao, Shichen and Gong, Jicheng and Yang, Qing and Wang, Jian and Lu, Yanfeng}, title = {ZFusion: An Effective Fuser of Camera and 4D Radar for 3D Object Perception in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3768-3777} }
Talk2Traffic: Interactive and Editable Traffic Scenario Generation for Autonomous Driving with Multimodal Large Language Model-
[pdf]
[bibtex]@InProceedings{Sheng_2025_CVPR, author = {Sheng, Zihao and Huang, Zilin and Qu, Yansong and Leng, Yue and Chen, Sikai}, title = {Talk2Traffic: Interactive and Editable Traffic Scenario Generation for Autonomous Driving with Multimodal Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3788-3797} }
RAD: Retrieval-Augmented Decision-Making of Meta-Actions with Vision-Language Models in Autonomous Driving-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yujin and Liu, Quanfeng and Jiang, Zhengxin and Wang, Tianyi and Jiao, Junfeng and Chu, Hongqing and Gao, Bingzhao and Chen, Hong}, title = {RAD: Retrieval-Augmented Decision-Making of Meta-Actions with Vision-Language Models in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3838-3848} }
PF3Det: A Prompted Foundation Feature Assisted Visual LiDAR 3D Detector-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Kaidong and Zhang, Tianxiao and Peng, Kuan-Chuan and Wang, Guanghui}, title = {PF3Det: A Prompted Foundation Feature Assisted Visual LiDAR 3D Detector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3778-3787} }
Drive4C: A Closed-Loop Benchmark on What Foundation Models Really Need to Be Capable of for Language-Guided Autonomous Driving-
[pdf]
[bibtex]@InProceedings{Sohn_2025_CVPR, author = {Sohn, Tin Stribor and Dillitzer, Maximilian and Bach, Johannes and Corso, Jason J. and Br\"uhl, Tim and Schwager, Robin and Eberhardt, Tim Dieter and Sax, Eric}, title = {Drive4C: A Closed-Loop Benchmark on What Foundation Models Really Need to Be Capable of for Language-Guided Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3859-3869} }