CVPR 2025 Open Access Repository

Workshop on Distillation of Foundation Models for Autonomous Driving

Drive4C: A Closed-Loop Benchmark on What Foundation Models Really Need to Be Capable of for Language-Guided Autonomous Driving: Tin Stribor Sohn,

Maximilian Dillitzer,

Johannes Bach,

Jason J. Corso,

Tim Brühl,

Robin Schwager,

Tim Dieter Eberhardt,

Eric Sax; [pdf]
[bibtex]
@InProceedings{Sohn_2025_CVPR, author = {Sohn, Tin Stribor and Dillitzer, Maximilian and Bach, Johannes and Corso, Jason J. and Br\"uhl, Tim and Schwager, Robin and Eberhardt, Tim Dieter and Sax, Eric}, title = {Drive4C: A Closed-Loop Benchmark on What Foundation Models Really Need to Be Capable of for Language-Guided Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3898-3908} }
Robusto-1 Dataset: Comparing Humans and VLMs on real out-of-distribution Autonomous Driving VQA from Peru: Dunant Cusipuma,

David Ortega,

Victor Flores-Benites,

Arturo Deza; [pdf] [supp]
[bibtex]
@InProceedings{Cusipuma_2025_CVPR, author = {Cusipuma, Dunant and Ortega, David and Flores-Benites, Victor and Deza, Arturo}, title = {Robusto-1 Dataset: Comparing Humans and VLMs on real out-of-distribution Autonomous Driving VQA from Peru}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3856-3867} }
RAD: Retrieval-Augmented Decision-Making of Meta-Actions with Vision-Language Models in Autonomous Driving: Yujin Wang,

Quanfeng Liu,

Zhengxin Jiang,

Tianyi Wang,

Junfeng Jiao,

Hongqing Chu,

Bingzhao Gao,

Hong Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yujin and Liu, Quanfeng and Jiang, Zhengxin and Wang, Tianyi and Jiao, Junfeng and Chu, Hongqing and Gao, Bingzhao and Chen, Hong}, title = {RAD: Retrieval-Augmented Decision-Making of Meta-Actions with Vision-Language Models in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3877-3887} }
Fairness-Aware Boosting Model for Imbalanced 3D Point Cloud Segmentation in Autonomous Driving: Elahe Yahyapour,

Chengbo Ai; [pdf] [supp]
[bibtex]
@InProceedings{Yahyapour_2025_CVPR, author = {Yahyapour, Elahe and Ai, Chengbo}, title = {Fairness-Aware Boosting Model for Imbalanced 3D Point Cloud Segmentation in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3847-3855} }
CleanMAP: Distilling Multimodal LLMs for Confidence-Driven Crowdsourced HD Map Updates: Ankit Kumar Shaw,

Kun Jiang,

Tuopu Wen,

Chandan Kumar Sah,

Yining Shi,

Mengmeng Yang,

Diange Yang,

Xiaoli Lian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shaw_2025_CVPR, author = {Shaw, Ankit Kumar and Jiang, Kun and Wen, Tuopu and Sah, Chandan Kumar and Shi, Yining and Yang, Mengmeng and Yang, Diange and Lian, Xiaoli}, title = {CleanMAP: Distilling Multimodal LLMs for Confidence-Driven Crowdsourced HD Map Updates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3837-3846} }
Talk2Traffic: Interactive and Editable Traffic Scenario Generation for Autonomous Driving with Multimodal Large Language Model: Zihao Sheng,

Zilin Huang,

Yansong Qu,

Yue Leng,

Sikai Chen; [pdf]
[bibtex]
@InProceedings{Sheng_2025_CVPR, author = {Sheng, Zihao and Huang, Zilin and Qu, Yansong and Leng, Yue and Chen, Sikai}, title = {Talk2Traffic: Interactive and Editable Traffic Scenario Generation for Autonomous Driving with Multimodal Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3827-3836} }
Harmonizing Attention Fields with Knowledge Distillation for Multi-View 3D Object Detection: Yafei Qi,

Menghao Yang,

Fan Wu,

Chen Wang,

Yongmin Zhang; [pdf]
[bibtex]
@InProceedings{Qi_2025_CVPR, author = {Qi, Yafei and Yang, Menghao and Wu, Fan and Wang, Chen and Zhang, Yongmin}, title = {Harmonizing Attention Fields with Knowledge Distillation for Multi-View 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3798-3806} }
PF3Det: A Prompted Foundation Feature Assisted Visual LiDAR 3D Detector: Kaidong Li,

Tianxiao Zhang,

Kuan-Chuan Peng,

Guanghui Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Kaidong and Zhang, Tianxiao and Peng, Kuan-Chuan and Wang, Guanghui}, title = {PF3Det: A Prompted Foundation Feature Assisted Visual LiDAR 3D Detector}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3817-3826} }
Clip4Retrofit: Enabling Real-Time Image Labeling on Edge Devices via Cross-Architecture CLIP Distillation: Li Zhong,

Ahmed Ghazal,

Jun-Jun Wan,

Frederik Zilly,

Patrick Mackens,

Joachim Vollrath,

Bogdan Coseriu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Li and Ghazal, Ahmed and Wan, Jun-Jun and Zilly, Frederik and Mackens, Patrick and Vollrath, Joachim and Coseriu, Bogdan}, title = {Clip4Retrofit: Enabling Real-Time Image Labeling on Edge Devices via Cross-Architecture CLIP Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3868-3876} }
ZFusion: An Effective Fuser of Camera and 4D Radar for 3D Object Perception in Autonomous Driving: Sheng Yang,

Tong Zhan,

Shichen Qiao,

Jicheng Gong,

Qing Yang,

Jian Wang,

Yanfeng Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Sheng and Zhan, Tong and Qiao, Shichen and Gong, Jicheng and Yang, Qing and Wang, Jian and Lu, Yanfeng}, title = {ZFusion: An Effective Fuser of Camera and 4D Radar for 3D Object Perception in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3807-3816} }
ReasonDrive: Efficient Visual Question Answering for Autonomous Vehicles with Reasoning-Enhanced Small Vision-Language Models: Amirhosein Chahe,

Lifeng Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Chahe_2025_CVPR, author = {Chahe, Amirhosein and Zhou, Lifeng}, title = {ReasonDrive: Efficient Visual Question Answering for Autonomous Vehicles with Reasoning-Enhanced Small Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3909-3918} }
ReferGPT: Towards Zero-Shot Referring Multi-Object Tracking: Tzoulio Chamiti,

Leandro Di Bella,

Adrian Munteanu,

Nikos Deligiannis; [pdf] [arXiv]
[bibtex]
@InProceedings{Chamiti_2025_CVPR, author = {Chamiti, Tzoulio and Di Bella, Leandro and Munteanu, Adrian and Deligiannis, Nikos}, title = {ReferGPT: Towards Zero-Shot Referring Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3888-3897} }