9th AI Ctiy Challenge
Multimodal and Multi-task Fusion for Spatial Reasoning-
[pdf]
[bibtex]@InProceedings{Dang_2025_ICCV, author = {Dang, Van-Minh and Le, Thanh-Sach}, title = {Multimodal and Multi-task Fusion for Spatial Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5351-5357} }
SmolRGPT: Efficient Spatial Reasoning for Warehouse Environments with 600M Parameters-
[pdf]
[arXiv]
[bibtex]@InProceedings{Traore_2025_ICCV, author = {Traore, Abadrahmane and Hervet, Eric and Couturier, Andy}, title = {SmolRGPT: Efficient Spatial Reasoning for Warehouse Environments with 600M Parameters}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5271-5279} }
Efficient and Distortion-Aware Fisheye Object Detection for Edge Devices-
[pdf]
[bibtex]@InProceedings{Gia_2025_ICCV, author = {Gia, Bao Tran and Khanh, Tuong Bui Cong and Le Thi Thanh, Tam and Trong, Hien Ho and Do, Tien and Ngo, Thanh Duc and Le, Duy-Dinh and Satoh, Shin'ichi}, title = {Efficient and Distortion-Aware Fisheye Object Detection for Edge Devices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5410-5416} }
MCBLT: Multi-Camera Multi-Object 3D Tracking in Long Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yizhou and Meinhardt, Tim and Cetintas, Orcun and Yang, Cheng-Yen and Pusegaonkar, Sameer and Missaoui, Benjamin and Biswas, Sujit and Tang, Zheng and Leal-Taixe, Laura}, title = {MCBLT: Multi-Camera Multi-Object 3D Tracking in Long Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5245-5254} }
TinyGiantVLM: A Lightweight Vision-Language Architecture for Spatial Reasoning under Resource Constraints-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ly_2025_ICCV, author = {Ly, Vinh-Thuan and Truong, Hoang M. and Nguyen, Xuan-Huong}, title = {TinyGiantVLM: A Lightweight Vision-Language Architecture for Spatial Reasoning under Resource Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5449-5456} }
DepthTrack: Cluster Meets BEV for Multi-Camera Multi-Target 3D Tracking-
[pdf]
[bibtex]@InProceedings{Tran_2025_ICCV, author = {Tran, Tai Huu-Phuong and Tran, Duong Nguyen-Ngoc and Huynh, Ngoc Doan-Minh and Tran, Chi Dai and Pham, Long Hoang and Ho, Quoc Pham-Nam and Nguyen, Huy-Hung and Vu, Duong Khac and Jeon, Hyung-Min and Jeon, Hyung-Joon and Phan, Son Hong and Le Ba Khanh, Trinh and Jeon, Jae Wook}, title = {DepthTrack: Cluster Meets BEV for Multi-Camera Multi-Target 3D Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5289-5298} }
Real-Time Object Detection on Edge Devices: A Fisheye Specific DFINE-
[pdf]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Jaewoo and Lee, Jihae and Yong, Yunjeong}, title = {Real-Time Object Detection on Edge Devices: A Fisheye Specific DFINE}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5417-5426} }
TrafficInternVL: Spatially-Guided Fine-Tuning with Caption Refinement for Fine-Grained Traffic Safety Captioning and Visual Question Answering-
[pdf]
[bibtex]@InProceedings{Phimsiri_2025_ICCV, author = {Phimsiri, Sasin and Sunpawatr, Sarut and Cherdchusakulchai, Riu and Kiawjak, Pornprom and Tosawadi, Teepakorn and Tungjitnob, Suchat and Trairattanapa, Visarut and Vatathanavaro, Supawit and Kudisthalert, Wasu and Utintu, Chaitat and Saetan, Worawit and Kongsawat, Nathamon and Borisuitsawat, Phawat and Mahakijdechachai, Kasisdis and Su-Inn, Nitipan and Thamwiwatthana, Ek and Suttichaya, Vasin}, title = {TrafficInternVL: Spatially-Guided Fine-Tuning with Caption Refinement for Fine-Grained Traffic Safety Captioning and Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5299-5306} }
Warehouse Spatial Question Answering with LLM Agent: 1st Place Solution of the 9th AI City Challenge Track 3-
[pdf]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Hsiang-Wei and Kim, Pyongkun and Cheng, Jen-Hao and Chen, Kuang-Ming and Yang, Cheng-Yen and Alattar, Bahaa and Lin, Yi-Ru and Kim, Sangwon and Kim, Kwangju and Huang, Chung-I and Hwang, Jenq-Neng}, title = {Warehouse Spatial Question Answering with LLM Agent: 1st Place Solution of the 9th AI City Challenge Track 3}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5224-5228} }
Enhanced Fisheye Object Detection via YOLO Ensemble Learning and Weighted Box Fusion-
[pdf]
[bibtex]@InProceedings{Tsai_2025_ICCV, author = {Tsai, Chun-Ming and Wu, Li-Li and Chen, Tsung-Yu}, title = {Enhanced Fisheye Object Detection via YOLO Ensemble Learning and Weighted Box Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5237-5244} }
A Real-time Vehicle Detection Pipeline with Data-centric Enhancements and Multi-stage DETR Distillation-
[pdf]
[bibtex]@InProceedings{Nguyen_2025_ICCV, author = {Nguyen, Huy Minh Nhat and Pham, Hieu Dinh Trung and Le, Khang Minh and Nguyen, Cuong Tuan}, title = {A Real-time Vehicle Detection Pipeline with Data-centric Enhancements and Multi-stage DETR Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5382-5389} }
Multi-Agent Cooperation for Traffic Safety Description and Analysis-
[pdf]
[bibtex]@InProceedings{Kachhadiya_2025_ICCV, author = {Kachhadiya, Ridham and Patil, Dhanishtha and Anastasiu, David C}, title = {Multi-Agent Cooperation for Traffic Safety Description and Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5427-5435} }
The 9th AI City Challenge-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tang_2025_ICCV, author = {Tang, Zheng and Wang, Shuo and Anastasiu, David C. and Chang, Ming-Ching and Sharma, Anuj and Kong, Quan and Kobori, Norimasa and Gochoo, Munkhjargal and Batnasan, Ganzorig and Otgonbold, Munkh-Erdene and Alnajjar, Fady and Hsieh, Jun-Wei and Kornuta, Tomasz and Li, Xiaolong and Zhao, Yilin and Zhang, Han and Radhakrishnan, Subhashree and Jain, Arihant and Kumar, Ratnesh and Murali, Vidya N. and Wang, Yuxing and Pusegaonkar, Sameer Satish and Wang, Yizhou and Biswas, Sujit and Wu, Xunlei and Zheng, Zhedong and Chakraborty, Pranamesh and Chellappa, Rama}, title = {The 9th AI City Challenge}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5467-5476} }
EKI-GAN: Context-Aware Vehicle Trajectory Forecasting with Vehicle Factors and Environmental Information at Signalized Intersections-
[pdf]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Chuheng and Wu, Guoyuan and Barth, Matthew}, title = {EKI-GAN: Context-Aware Vehicle Trajectory Forecasting with Vehicle Factors and Environmental Information at Signalized Intersections}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5315-5324} }
Online 3D Multi-Camera Perception through Robust 2D Tracking and Depth-based Late Aggregation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Vu-Minh_2025_ICCV, author = {Vu-Minh, Le and Thao-Anh, Tran and Huy, Do Duc and Canh, Do Xuan and Ninh, Huong and Tran, Hai}, title = {Online 3D Multi-Camera Perception through Robust 2D Tracking and Depth-based Late Aggregation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5390-5400} }
Task-Specific Dual-Model Framework for Comprehensive Traffic Safety Video Description and Analysis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kyem_2025_ICCV, author = {Kyem, Blessing Agyei and Owor, Neema J. and Danyo, Andrews and Asamoah, Joshua K. and Denteh, Eugene and Muturi, Tanner and Dontoh, Anthony and Adu-Gyamfi, Yaw and Aboah, Armstrong}, title = {Task-Specific Dual-Model Framework for Comprehensive Traffic Safety Video Description and Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5325-5333} }
STER-VLM: Spatio-Temporal With Enhanced Reference Vision-Language Models-
[pdf]
[bibtex]@InProceedings{Nguyen-Nhu_2025_ICCV, author = {Nguyen-Nhu, Tinh-Anh and Minh, Triet Dao Hoang and To-Thanh, Dat and Le-Gia, Phuc and Vo-Lan, Tuan and Nguyen, Tien-Huy}, title = {STER-VLM: Spatio-Temporal With Enhanced Reference Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5457-5466} }
Multi-Camera 3D Object Tracking via 3D Point Clouds and Re-Identification-
[pdf]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Jaewon and Kim, Heecheol and Lee, Doohee and Lee, Kanghee}, title = {Multi-Camera 3D Object Tracking via 3D Point Clouds and Re-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5358-5365} }
A Unified Detection Pipeline for Robust Object Detection in Fisheye-Based Traffic Surveillance-
[pdf]
[bibtex]@InProceedings{Owor_2025_ICCV, author = {Owor, Neema Jakisa and Asamoah, Joshua Kofi and Muturi, Tanner Wambui and Owor, Anneliese Jakisa and Kyem, Blessing Agyei and Danyo, Andrews and Adu-Gyamfi, Yaw and Aboah, Armstrong}, title = {A Unified Detection Pipeline for Robust Object Detection in Fisheye-Based Traffic Surveillance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5255-5262} }
TrafficInternVL: Understanding Traffic Scenarios with Vision-Language Models-
[pdf]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Hsiu-Fu and Yang, Ya-Ting and Chen, Yung-Ter and Chou, I-Fan}, title = {TrafficInternVL: Understanding Traffic Scenarios with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5229-5236} }
Prompt-Guided Spatial Understanding with RGB-D Transformers for Fine-Grained Object Relation Reasoning-
[pdf]
[bibtex]@InProceedings{Muturi_2025_ICCV, author = {Muturi, Tanner W. and Kyem, Blessing Agyei and Asamoah, Joshua Kofi and Owor, Neema Jakisa and Dzinyela, Richard and Danyo, Andrews and Adu-Gyamfi, Yaw and Aboah, Armstrong}, title = {Prompt-Guided Spatial Understanding with RGB-D Transformers for Fine-Grained Object Relation Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5280-5288} }
A Lightweight and Data-Centric Framework for Real-Time Object Detection in Fisheye Camera-
[pdf]
[bibtex]@InProceedings{Vinh_2025_ICCV, author = {Vinh, An To and Vinh, Nguyen Mai and Si, Nguyen Luong and Quoc, Duy Do and Khanh, Duy Tran and Hoang, Nguyen Nguyen and Do, Tien and Ngo, Thanh Duc and Le, Duy-Dinh and Satoh, Shin'ichi}, title = {A Lightweight and Data-Centric Framework for Real-Time Object Detection in Fisheye Camera}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5441-5448} }
Domain-Aware Enhancements to Vision-Language Models for Urban Traffic Safety Question Answering-
[pdf]
[bibtex]@InProceedings{Ha_2025_ICCV, author = {Ha, Vu Thanh Dat and Tran, Tuan Huy and Dong, Gang Thep and Chu, Ngoc Chien and Vu, Huan and Nguyen, Tien Cuong}, title = {Domain-Aware Enhancements to Vision-Language Models for Urban Traffic Safety Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5366-5374} }
Hierarchical Multi-Modal Fusion for Roadside VRU Detection: Method Complementarity Under Sparse Label Constraints-
[pdf]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Chuheng and Zhang, Ziyan and Liu, Haishan and Wu, Guoyuan and Barth, Matthew}, title = {Hierarchical Multi-Modal Fusion for Roadside VRU Detection: Method Complementarity Under Sparse Label Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5343-5350} }
Data Augmentation Is All You Need For Robust Fisheye Object Detection-
[pdf]
[bibtex]@InProceedings{Pham_2025_ICCV, author = {Pham, Long Hoang and Ho, Quoc Pham-Nam and Vu, Duong Khac and Nguyen, Huy-Hung and Tran, Chi Dai and Tran, Duong Nguyen-Ngoc and Tran, Tai Huu-Phuong and Huynh, Ngoc Doan-Minh and Jeon, Hyung Joon and Jeon, Hyung-Min and Phan, Son Hong and Le Kha Ba, Trinh and Jeon, Jae Wook}, title = {Data Augmentation Is All You Need For Robust Fisheye Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5334-5342} }
TrafficVILA: A Multimodal Framework for Traffic Safety Description and Analysis-
[pdf]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Beomseok and Yang, Wanzhao and Yuan, Sifan and Anwar, Syed Muhammad and Marsic, Ivan}, title = {TrafficVILA: A Multimodal Framework for Traffic Safety Description and Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5401-5409} }
Boosting Fisheye Detection with Augmentations and Ensembles-
[pdf]
[bibtex]@InProceedings{Qian_2025_ICCV, author = {Qian, Chengzhi and Li, Jing and Huang, Yangyang and Lin, Zhixin and Yao, Yue and Wang, Jianping}, title = {Boosting Fisheye Detection with Augmentations and Ensembles}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5436-5440} }
Augmentation, Distillation and Optimization: A Practical Pipeline for Fisheye Object Detection on Edge Devices-
[pdf]
[bibtex]@InProceedings{Duong_2025_ICCV, author = {Duong, Viet Hung and Truong, Van Duy and Dinh, Duy Khanh and Vu, Huan and Van Luong, Thien and Nguyen, Tien Cuong}, title = {Augmentation, Distillation and Optimization: A Practical Pipeline for Fisheye Object Detection on Edge Devices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5263-5270} }
TrafficVILA: Scaling Vision-Language Models to High-Resolution Video Understanding for Traffic Safety Analysis-
[pdf]
[bibtex]@InProceedings{Pervaiz_2025_ICCV, author = {Pervaiz, Zaid B. and Cha, Seunghwan and Gulati, Rohan and Jhuria, Monika and Praveen, Varun and Kornuta, Tomasz and Lu, Yao and Murali, Vidya}, title = {TrafficVILA: Scaling Vision-Language Models to High-Resolution Video Understanding for Traffic Safety Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5307-5314} }
VGCRTrack: Multi-Camera 3D Tracking with View-Aware Geometric Center Refinement-
[pdf]
[bibtex]@InProceedings{Phan_2025_ICCV, author = {Phan, Trieu-Huy and Dinh, Duc-Duy and Huynh, Thanh-Phong and Le, Quoc-Thinh and Dang, Huy-Hoang and Tran, Vu-Hoang and Luu, Van-Tin and Huang, Ching-Chun}, title = {VGCRTrack: Multi-Camera 3D Tracking with View-Aware Geometric Center Refinement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {5375-5381} }