CVPR 2025 Open Access Repository

2nd Workshop on Efficient Large Vision Models

Rethinking the Role of Spatial Mixing: George Cazenavette,

Joel Julin,

Simon Lucey; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cazenavette_2025_CVPR, author = {Cazenavette, George and Julin, Joel and Lucey, Simon}, title = {Rethinking the Role of Spatial Mixing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3244-3253} }
ViDROP: Video Dense Representation through Spatio-Temporal Sparsity: Sepehr Sameni,

Simon Jenni,

Paolo Favaro; [pdf]
[bibtex]
@InProceedings{Sameni_2025_CVPR, author = {Sameni, Sepehr and Jenni, Simon and Favaro, Paolo}, title = {ViDROP: Video Dense Representation through Spatio-Temporal Sparsity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3201-3211} }
VisionCube: 3D-Aware Vision-Language Model for Multi-Step Spatial Reasoning: Feiyang Wang,

Nan Luo,

Wangyu Wu; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Feiyang and Luo, Nan and Wu, Wangyu}, title = {VisionCube: 3D-Aware Vision-Language Model for Multi-Step Spatial Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3295-3304} }
CARN: Complexity-Aware Routing Network for Efficient and Adaptive Inference: Rebati Gaire,

Arman Roohi; [pdf]
[bibtex]
@InProceedings{Gaire_2025_CVPR, author = {Gaire, Rebati and Roohi, Arman}, title = {CARN: Complexity-Aware Routing Network for Efficient and Adaptive Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3343-3351} }
SmoothCache: A Universal Inference Acceleration Technique for Diffusion Transformers: Joseph Liu,

Joshua Geddes,

Ziyu Guo,

Haomiao Jiang,

Mahesh Kumar Nandwana; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Joseph and Geddes, Joshua and Guo, Ziyu and Jiang, Haomiao and Nandwana, Mahesh Kumar}, title = {SmoothCache: A Universal Inference Acceleration Technique for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3254-3263} }
From Data to Design: Leveraging Frequency Statistics for Efficient Neural Network Architectures: Mustafa Munir,

Guihong Li,

Md Mostafijur Rahman,

Alex Zhang,

Radu Marculescu; [pdf] [supp]
[bibtex]
@InProceedings{Munir_2025_CVPR, author = {Munir, Mustafa and Li, Guihong and Rahman, Md Mostafijur and Zhang, Alex and Marculescu, Radu}, title = {From Data to Design: Leveraging Frequency Statistics for Efficient Neural Network Architectures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3233-3243} }
Dual Precision Quantization for Efficient and Accurate Deep Neural Networks Inference: Tomer Gafni,

Asaf Karnieli,

Yair Hanani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gafni_2025_CVPR, author = {Gafni, Tomer and Karnieli, Asaf and Hanani, Yair}, title = {Dual Precision Quantization for Efficient and Accurate Deep Neural Networks Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3284-3294} }
Prompt Categories Cluster for Weakly Supervised Semantic Segmentation: Wangyu Wu,

Xianglin Qiu,

Siqi Song,

Zhenhong Chen,

Xiaowei Huang,

Fei Ma,

Jimin Xiao; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Wangyu and Qiu, Xianglin and Song, Siqi and Chen, Zhenhong and Huang, Xiaowei and Ma, Fei and Xiao, Jimin}, title = {Prompt Categories Cluster for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3223-3232} }
Efficiently Mitigating Video Content Misalignment on Large Vision Model with Time-Series Data Alignment: Hanchen Xie,

Rose Ma,

Jiageng Zhu,

Zheda Mai,

Wael Abd-Almageed,

Zubin Abraham; [pdf]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Hanchen and Ma, Rose and Zhu, Jiageng and Mai, Zheda and Abd-Almageed, Wael and Abraham, Zubin}, title = {Efficiently Mitigating Video Content Misalignment on Large Vision Model with Time-Series Data Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3326-3332} }
U-Shape Mamba: State Space Model for faster diffusion: Alex Ergasti,

Filippo Botti,

Tomaso Fontanini,

Claudio Ferrari,

Massimo Bertozzi,

Andrea Prati; [pdf] [arXiv]
[bibtex]
@InProceedings{Ergasti_2025_CVPR, author = {Ergasti, Alex and Botti, Filippo and Fontanini, Tomaso and Ferrari, Claudio and Bertozzi, Massimo and Prati, Andrea}, title = {U-Shape Mamba: State Space Model for faster diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3276-3283} }
Video, How Do Your Tokens Merge?: Sam Pollard,

Michael Wray; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pollard_2025_CVPR, author = {Pollard, Sam and Wray, Michael}, title = {Video, How Do Your Tokens Merge?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3372-3381} }
Window Token Concatenation for Efficient Visual Large Language Models: Yifan Li,

Wentao Bao,

Botao Ye,

Zhen Tan,

Tianlong Chen,

Huan Liu,

Yu Kong; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yifan and Bao, Wentao and Ye, Botao and Tan, Zhen and Chen, Tianlong and Liu, Huan and Kong, Yu}, title = {Window Token Concatenation for Efficient Visual Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3212-3222} }
Effectiveness of Max-Pooling for Fine-Tuning CLIP on Videos: Fatimah Zohra,

Chen Zhao,

Shuming Liu,

Bernard Ghanem; [pdf] [supp]
[bibtex]
@InProceedings{Zohra_2025_CVPR, author = {Zohra, Fatimah and Zhao, Chen and Liu, Shuming and Ghanem, Bernard}, title = {Effectiveness of Max-Pooling for Fine-Tuning CLIP on Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3316-3325} }
SimCache: Similarity Caching for Efficient VLM-based Scene Understanding: Surya Selvam,

Ravi K. Rajendran,

Murugan Sankaradas,

Anand Raghunathan,

Srimat T. Chakradhar; [pdf]
[bibtex]
@InProceedings{Selvam_2025_CVPR, author = {Selvam, Surya and Rajendran, Ravi K. and Sankaradas, Murugan and Raghunathan, Anand and Chakradhar, Srimat T.}, title = {SimCache: Similarity Caching for Efficient VLM-based Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3352-3361} }
DEFT-VTON: Efficient Virtual Try-On with Consistent Generalised H-Transform: Xingzi Xu,

Qi Li,

Shuwen Qiu,

Julien Han,

Karim Bouyarmane; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Xingzi and Li, Qi and Qiu, Shuwen and Han, Julien and Bouyarmane, Karim}, title = {DEFT-VTON: Efficient Virtual Try-On with Consistent Generalised H-Transform}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3333-3342} }
Efficient Image Generation with Variadic Attention Heads: Steven Walton,

Ali Hassani,

Xingqian Xu,

Zhangyang Wang,

Humphrey Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Walton_2025_CVPR, author = {Walton, Steven and Hassani, Ali and Xu, Xingqian and Wang, Zhangyang and Shi, Humphrey}, title = {Efficient Image Generation with Variadic Attention Heads}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3264-3275} }
Mix-QSAM: Mixed-Precision Quantization of the Segment Anything Model: Navin Ranjan,

Andreas Savakis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ranjan_2025_CVPR, author = {Ranjan, Navin and Savakis, Andreas}, title = {Mix-QSAM: Mixed-Precision Quantization of the Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3305-3315} }
Distilling Normalizing Flows: Steven Walton,

Valeriy Klyukin,

Maksim Artemev,

Denis Derkach,

Nikita Orlov,

Humphrey Shi; [pdf] [arXiv]
[bibtex]
@InProceedings{Walton_2025_CVPR, author = {Walton, Steven and Klyukin, Valeriy and Artemev, Maksim and Derkach, Denis and Orlov, Nikita and Shi, Humphrey}, title = {Distilling Normalizing Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {3362-3371} }