2nd Workshop on Efficient Large Vision Models


Rethinking the Role of Spatial Mixing
George Cazenavette,
Joel Julin,
Simon Lucey
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cazenavette_2025_CVPR, author = {Cazenavette, George and Julin, Joel and Lucey, Simon}, title = {Rethinking the Role of Spatial Mixing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3219-3228} }

ViDROP: Video Dense Representation through Spatio-Temporal Sparsity
Sepehr Sameni,
Simon Jenni,
Paolo Favaro
[pdf]
[bibtex]
@InProceedings{Sameni_2025_CVPR, author = {Sameni, Sepehr and Jenni, Simon and Favaro, Paolo}, title = {ViDROP: Video Dense Representation through Spatio-Temporal Sparsity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3176-3186} }

VisionCube: 3D-Aware Vision-Language Model for Multi-Step Spatial Reasoning
Feiyang Wang,
Nan Luo,
Wangyu Wu
[pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Feiyang and Luo, Nan and Wu, Wangyu}, title = {VisionCube: 3D-Aware Vision-Language Model for Multi-Step Spatial Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3270-3279} }

CARN: Complexity-Aware Routing Network for Efficient and Adaptive Inference
Rebati Gaire,
Arman Roohi
[pdf]
[bibtex]
@InProceedings{Gaire_2025_CVPR, author = {Gaire, Rebati and Roohi, Arman}, title = {CARN: Complexity-Aware Routing Network for Efficient and Adaptive Inference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3318-3326} }

SmoothCache: A Universal Inference Acceleration Technique for Diffusion Transformers
Joseph Liu,
Joshua Geddes,
Ziyu Guo,
Haomiao Jiang,
Mahesh Kumar Nandwana
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Joseph and Geddes, Joshua and Guo, Ziyu and Jiang, Haomiao and Nandwana, Mahesh Kumar}, title = {SmoothCache: A Universal Inference Acceleration Technique for Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3229-3238} }

From Data to Design: Leveraging Frequency Statistics for Efficient Neural Network Architectures
Mustafa Munir,
Guihong Li,
Md Mostafijur Rahman,
Alex Zhang,
Radu Marculescu
[pdf] [supp]
[bibtex]
@InProceedings{Munir_2025_CVPR, author = {Munir, Mustafa and Li, Guihong and Rahman, Md Mostafijur and Zhang, Alex and Marculescu, Radu}, title = {From Data to Design: Leveraging Frequency Statistics for Efficient Neural Network Architectures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3208-3218} }

Dual Precision Quantization for Efficient and Accurate Deep Neural Networks Inference
Tomer Gafni,
Asaf Karnieli,
Yair Hanani
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gafni_2025_CVPR, author = {Gafni, Tomer and Karnieli, Asaf and Hanani, Yair}, title = {Dual Precision Quantization for Efficient and Accurate Deep Neural Networks Inference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3259-3269} }

Prompt Categories Cluster for Weakly Supervised Semantic Segmentation
Wangyu Wu,
Xianglin Qiu,
Siqi Song,
Zhenhong Chen,
Xiaowei Huang,
Fei Ma,
Jimin Xiao
[pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Wangyu and Qiu, Xianglin and Song, Siqi and Chen, Zhenhong and Huang, Xiaowei and Ma, Fei and Xiao, Jimin}, title = {Prompt Categories Cluster for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3198-3207} }

Efficiently Mitigating Video Content Misalignment on Large Vision Model with Time-Series Data Alignment
Hanchen Xie,
Rose Ma,
Jiageng Zhu,
Zheda Mai,
Wael Abd-Almageed,
Zubin Abraham
[pdf]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Hanchen and Ma, Rose and Zhu, Jiageng and Mai, Zheda and Abd-Almageed, Wael and Abraham, Zubin}, title = {Efficiently Mitigating Video Content Misalignment on Large Vision Model with Time-Series Data Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3301-3307} }

U-Shape Mamba: State Space Model for faster diffusion
Alex Ergasti,
Filippo Botti,
Tomaso Fontanini,
Claudio Ferrari,
Massimo Bertozzi,
Andrea Prati
[pdf]
[bibtex]
@InProceedings{Ergasti_2025_CVPR, author = {Ergasti, Alex and Botti, Filippo and Fontanini, Tomaso and Ferrari, Claudio and Bertozzi, Massimo and Prati, Andrea}, title = {U-Shape Mamba: State Space Model for faster diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3251-3258} }

Video, How Do Your Tokens Merge?
Sam Pollard,
Michael Wray
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pollard_2025_CVPR, author = {Pollard, Sam and Wray, Michael}, title = {Video, How Do Your Tokens Merge?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3347-3356} }

Window Token Concatenation for Efficient Visual Large Language Models
Yifan Li,
Wentao Bao,
Botao Ye,
Zhen Tan,
Tianlong Chen,
Huan Liu,
Yu Kong
[pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yifan and Bao, Wentao and Ye, Botao and Tan, Zhen and Chen, Tianlong and Liu, Huan and Kong, Yu}, title = {Window Token Concatenation for Efficient Visual Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3187-3197} }

Effectiveness of Max-Pooling for Fine-Tuning CLIP on Videos
Fatimah Zohra,
Chen Zhao,
Shuming Liu,
Bernard Ghanem
[pdf] [supp]
[bibtex]
@InProceedings{Zohra_2025_CVPR, author = {Zohra, Fatimah and Zhao, Chen and Liu, Shuming and Ghanem, Bernard}, title = {Effectiveness of Max-Pooling for Fine-Tuning CLIP on Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3291-3300} }

SimCache: Similarity Caching for Efficient VLM-based Scene Understanding
Surya Selvam,
Ravi K. Rajendran,
Murugan Sankaradas,
Anand Raghunathan,
Srimat T. Chakradhar
[pdf]
[bibtex]
@InProceedings{Selvam_2025_CVPR, author = {Selvam, Surya and Rajendran, Ravi K. and Sankaradas, Murugan and Raghunathan, Anand and Chakradhar, Srimat T.}, title = {SimCache: Similarity Caching for Efficient VLM-based Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3327-3336} }

DEFT-VTON: Efficient Virtual Try-On with Consistent Generalised H-Transform
Xingzi Xu,
Qi Li,
Shuwen Qiu,
Julien Han,
Karim Bouyarmane
[pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Xingzi and Li, Qi and Qiu, Shuwen and Han, Julien and Bouyarmane, Karim}, title = {DEFT-VTON: Efficient Virtual Try-On with Consistent Generalised H-Transform}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3308-3317} }

Efficient Image Generation with Variadic Attention Heads
Steven Walton,
Ali Hassani,
Xingqian Xu,
Zhangyang Wang,
Humphrey Shi
[pdf] [supp]
[bibtex]
@InProceedings{Walton_2025_CVPR, author = {Walton, Steven and Hassani, Ali and Xu, Xingqian and Wang, Zhangyang and Shi, Humphrey}, title = {Efficient Image Generation with Variadic Attention Heads}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3239-3250} }

Mix-QSAM: Mixed-Precision Quantization of the Segment Anything Model
Navin Ranjan,
Andreas Savakis
[pdf] [supp]
[bibtex]
@InProceedings{Ranjan_2025_CVPR, author = {Ranjan, Navin and Savakis, Andreas}, title = {Mix-QSAM: Mixed-Precision Quantization of the Segment Anything Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3280-3290} }

Distilling Normalizing Flows
Steven Walton,
Valeriy Klyukin,
Maksim Artemev,
Denis Derkach,
Nikita Orlov,
Humphrey Shi
[pdf]
[bibtex]
@InProceedings{Walton_2025_CVPR, author = {Walton, Steven and Klyukin, Valeriy and Artemev, Maksim and Derkach, Denis and Orlov, Nikita and Shi, Humphrey}, title = {Distilling Normalizing Flows}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3337-3346} }