2nd Workshop on Efficient Large Vision Models
Rethinking the Role of Spatial Mixing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cazenavette_2025_CVPR, author = {Cazenavette, George and Julin, Joel and Lucey, Simon}, title = {Rethinking the Role of Spatial Mixing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3219-3228} }
ViDROP: Video Dense Representation through Spatio-Temporal Sparsity-
[pdf]
[bibtex]@InProceedings{Sameni_2025_CVPR, author = {Sameni, Sepehr and Jenni, Simon and Favaro, Paolo}, title = {ViDROP: Video Dense Representation through Spatio-Temporal Sparsity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3176-3186} }
VisionCube: 3D-Aware Vision-Language Model for Multi-Step Spatial Reasoning-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Feiyang and Luo, Nan and Wu, Wangyu}, title = {VisionCube: 3D-Aware Vision-Language Model for Multi-Step Spatial Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3270-3279} }
CARN: Complexity-Aware Routing Network for Efficient and Adaptive Inference-
[pdf]
[bibtex]@InProceedings{Gaire_2025_CVPR, author = {Gaire, Rebati and Roohi, Arman}, title = {CARN: Complexity-Aware Routing Network for Efficient and Adaptive Inference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3318-3326} }
SmoothCache: A Universal Inference Acceleration Technique for Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Joseph and Geddes, Joshua and Guo, Ziyu and Jiang, Haomiao and Nandwana, Mahesh Kumar}, title = {SmoothCache: A Universal Inference Acceleration Technique for Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3229-3238} }
From Data to Design: Leveraging Frequency Statistics for Efficient Neural Network Architectures-
[pdf]
[supp]
[bibtex]@InProceedings{Munir_2025_CVPR, author = {Munir, Mustafa and Li, Guihong and Rahman, Md Mostafijur and Zhang, Alex and Marculescu, Radu}, title = {From Data to Design: Leveraging Frequency Statistics for Efficient Neural Network Architectures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3208-3218} }
Dual Precision Quantization for Efficient and Accurate Deep Neural Networks Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gafni_2025_CVPR, author = {Gafni, Tomer and Karnieli, Asaf and Hanani, Yair}, title = {Dual Precision Quantization for Efficient and Accurate Deep Neural Networks Inference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3259-3269} }
Prompt Categories Cluster for Weakly Supervised Semantic Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Wangyu and Qiu, Xianglin and Song, Siqi and Chen, Zhenhong and Huang, Xiaowei and Ma, Fei and Xiao, Jimin}, title = {Prompt Categories Cluster for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3198-3207} }
Efficiently Mitigating Video Content Misalignment on Large Vision Model with Time-Series Data Alignment-
[pdf]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Hanchen and Ma, Rose and Zhu, Jiageng and Mai, Zheda and Abd-Almageed, Wael and Abraham, Zubin}, title = {Efficiently Mitigating Video Content Misalignment on Large Vision Model with Time-Series Data Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3301-3307} }
U-Shape Mamba: State Space Model for faster diffusion-
[pdf]
[bibtex]@InProceedings{Ergasti_2025_CVPR, author = {Ergasti, Alex and Botti, Filippo and Fontanini, Tomaso and Ferrari, Claudio and Bertozzi, Massimo and Prati, Andrea}, title = {U-Shape Mamba: State Space Model for faster diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3251-3258} }
Video, How Do Your Tokens Merge?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pollard_2025_CVPR, author = {Pollard, Sam and Wray, Michael}, title = {Video, How Do Your Tokens Merge?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3347-3356} }
Window Token Concatenation for Efficient Visual Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yifan and Bao, Wentao and Ye, Botao and Tan, Zhen and Chen, Tianlong and Liu, Huan and Kong, Yu}, title = {Window Token Concatenation for Efficient Visual Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3187-3197} }
Effectiveness of Max-Pooling for Fine-Tuning CLIP on Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Zohra_2025_CVPR, author = {Zohra, Fatimah and Zhao, Chen and Liu, Shuming and Ghanem, Bernard}, title = {Effectiveness of Max-Pooling for Fine-Tuning CLIP on Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3291-3300} }
SimCache: Similarity Caching for Efficient VLM-based Scene Understanding-
[pdf]
[bibtex]@InProceedings{Selvam_2025_CVPR, author = {Selvam, Surya and Rajendran, Ravi K. and Sankaradas, Murugan and Raghunathan, Anand and Chakradhar, Srimat T.}, title = {SimCache: Similarity Caching for Efficient VLM-based Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3327-3336} }
DEFT-VTON: Efficient Virtual Try-On with Consistent Generalised H-Transform-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Xingzi and Li, Qi and Qiu, Shuwen and Han, Julien and Bouyarmane, Karim}, title = {DEFT-VTON: Efficient Virtual Try-On with Consistent Generalised H-Transform}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3308-3317} }
Efficient Image Generation with Variadic Attention Heads-
[pdf]
[supp]
[bibtex]@InProceedings{Walton_2025_CVPR, author = {Walton, Steven and Hassani, Ali and Xu, Xingqian and Wang, Zhangyang and Shi, Humphrey}, title = {Efficient Image Generation with Variadic Attention Heads}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3239-3250} }
Mix-QSAM: Mixed-Precision Quantization of the Segment Anything Model-
[pdf]
[supp]
[bibtex]@InProceedings{Ranjan_2025_CVPR, author = {Ranjan, Navin and Savakis, Andreas}, title = {Mix-QSAM: Mixed-Precision Quantization of the Segment Anything Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3280-3290} }
Distilling Normalizing Flows-
[pdf]
[bibtex]@InProceedings{Walton_2025_CVPR, author = {Walton, Steven and Klyukin, Valeriy and Artemev, Maksim and Derkach, Denis and Orlov, Nikita and Shi, Humphrey}, title = {Distilling Normalizing Flows}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3337-3346} }