Efficient Large Vision Models


QAttn: Efficient GPU Kernels for Mixed-precision Vision Transformers
Piotr Kluska,
Adrián Castelló,
Florian Scheidegger,
A. Cristiano I. Malossi,
Enrique S. Quintana-Ortí
[pdf]
[bibtex]
@InProceedings{Kluska_2024_CVPR, author = {Kluska, Piotr and Castell\'o, Adri\'an and Scheidegger, Florian and Malossi, A. Cristiano I. and Quintana-Ort{\'\i}, Enrique S.}, title = {QAttn: Efficient GPU Kernels for Mixed-precision Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {3648-3657} }

SAM-CLIP: Merging Vision Foundation Models Towards Semantic and Spatial Understanding
Haoxiang Wang,
Pavan Kumar Anasosalu Vasu,
Fartash Faghri,
Raviteja Vemulapalli,
Mehrdad Farajtabar,
Sachin Mehta,
Mohammad Rastegari,
Oncel Tuzel,
Hadi Pouransari
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Haoxiang and Vasu, Pavan Kumar Anasosalu and Faghri, Fartash and Vemulapalli, Raviteja and Farajtabar, Mehrdad and Mehta, Sachin and Rastegari, Mohammad and Tuzel, Oncel and Pouransari, Hadi}, title = {SAM-CLIP: Merging Vision Foundation Models Towards Semantic and Spatial Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {3635-3647} }

Parameter Efficient Fine-tuning of Self-supervised ViTs without Catastrophic Forgetting
Reza Akbarian Bafghi,
Nidhin Harilal,
Claire Monteleoni,
Maziar Raissi
[pdf] [arXiv]
[bibtex]
@InProceedings{Bafghi_2024_CVPR, author = {Bafghi, Reza Akbarian and Harilal, Nidhin and Monteleoni, Claire and Raissi, Maziar}, title = {Parameter Efficient Fine-tuning of Self-supervised ViTs without Catastrophic Forgetting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {3679-3684} }

Adapting the Segment Anything Model During Usage in Novel Situations
Robin Schön,
Julian Lorenz,
Katja Ludwig,
Rainer Lienhart
[pdf]
[bibtex]
@InProceedings{Schon_2024_CVPR, author = {Sch\"on, Robin and Lorenz, Julian and Ludwig, Katja and Lienhart, Rainer}, title = {Adapting the Segment Anything Model During Usage in Novel Situations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {3616-3626} }

PMAFusion: Projection-Based Multi-Modal Alignment for 3D Semantic Occupancy Prediction
Shiyao Li,
Wenming Yang,
Qingmin Liao
[pdf]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Shiyao and Yang, Wenming and Liao, Qingmin}, title = {PMAFusion: Projection-Based Multi-Modal Alignment for 3D Semantic Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {3627-3634} }

HaLViT: Half of the Weights are Enough
Onur Can Koyun,
Behçet Uğur Töreyin
[pdf] [supp]
[bibtex]
@InProceedings{Koyun_2024_CVPR, author = {Koyun, Onur Can and T\"oreyin, Beh\c{c}et U\u{g}ur}, title = {HaLViT: Half of the Weights are Enough}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {3669-3678} }

Adaptive Memory Replay for Continual Learning
James Seale Smith,
Lazar Valkov,
Shaunak Halbe,
Vyshnavi Gutta,
Rogerio Feris,
Zsolt Kira,
Leonid Karlinsky
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Smith_2024_CVPR, author = {Smith, James Seale and Valkov, Lazar and Halbe, Shaunak and Gutta, Vyshnavi and Feris, Rogerio and Kira, Zsolt and Karlinsky, Leonid}, title = {Adaptive Memory Replay for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {3605-3615} }

Efficient Transformer Adaptation with Soft Token Merging
Xin Yuan,
Hongliang Fei,
Jinoo Baek
[pdf] [supp]
[bibtex]
@InProceedings{Yuan_2024_CVPR, author = {Yuan, Xin and Fei, Hongliang and Baek, Jinoo}, title = {Efficient Transformer Adaptation with Soft Token Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {3658-3668} }