CVPR 2024 Open Access Repository

Efficient Large Vision Models

EfficientViT-SAM: Accelerated Segment Anything Model Without Performance Loss: Zhuoyang Zhang,

Han Cai,

Song Han; [pdf]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhuoyang and Cai, Han and Han, Song}, title = {EfficientViT-SAM: Accelerated Segment Anything Model Without Performance Loss}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {7859-7863} }
On Speculative Decoding for Multimodal Large Language Models: Mukul Gagrani,

Raghavv Goel,

Wonseok Jeon,

Junyoung Park,

Mingu Lee,

Christopher Lott; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gagrani_2024_CVPR, author = {Gagrani, Mukul and Goel, Raghavv and Jeon, Wonseok and Park, Junyoung and Lee, Mingu and Lott, Christopher}, title = {On Speculative Decoding for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {8285-8289} }
QAttn: Efficient GPU Kernels for Mixed-precision Vision Transformers: Piotr Kluska,

Adrián Castelló,

Florian Scheidegger,

A. Cristiano I. Malossi,

Enrique S. Quintana-Ortí; [pdf]
[bibtex]
@InProceedings{Kluska_2024_CVPR, author = {Kluska, Piotr and Castell\'o, Adri\'an and Scheidegger, Florian and Malossi, A. Cristiano I. and Quintana-Ort{\'\i}, Enrique S.}, title = {QAttn: Efficient GPU Kernels for Mixed-precision Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {3648-3657} }
SimFreeze: Adaptively Freeze Vision Transformer Encoders with Token Similarity: Tianyi Shen,

Chonghan Lee,

Vijaykrishnan Narayanan; [pdf]
[bibtex]
@InProceedings{Shen_2024_CVPR, author = {Shen, Tianyi and Lee, Chonghan and Narayanan, Vijaykrishnan}, title = {SimFreeze: Adaptively Freeze Vision Transformer Encoders with Token Similarity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {8266-8270} }
Lowering PyTorch's Memory Consumption for Selective Differentiation: Samarth Bhatia,

Felix Dangel; [pdf] [supp]
[bibtex]
@InProceedings{Bhatia_2024_CVPR, author = {Bhatia, Samarth and Dangel, Felix}, title = {Lowering PyTorch's Memory Consumption for Selective Differentiation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {8260-8265} }
SAM-CLIP: Merging Vision Foundation Models Towards Semantic and Spatial Understanding: Haoxiang Wang,

Pavan Kumar Anasosalu Vasu,

Fartash Faghri,

Raviteja Vemulapalli,

Mehrdad Farajtabar,

Sachin Mehta,

Mohammad Rastegari,

Oncel Tuzel,

Hadi Pouransari; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Haoxiang and Vasu, Pavan Kumar Anasosalu and Faghri, Fartash and Vemulapalli, Raviteja and Farajtabar, Mehrdad and Mehta, Sachin and Rastegari, Mohammad and Tuzel, Oncel and Pouransari, Hadi}, title = {SAM-CLIP: Merging Vision Foundation Models Towards Semantic and Spatial Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {3635-3647} }
Adapting the Segment Anything Model During Usage in Novel Situations: Robin Schön,

Julian Lorenz,

Katja Ludwig,

Rainer Lienhart; [pdf]
[bibtex]
@InProceedings{Schon_2024_CVPR, author = {Sch\"on, Robin and Lorenz, Julian and Ludwig, Katja and Lienhart, Rainer}, title = {Adapting the Segment Anything Model During Usage in Novel Situations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {3616-3626} }
PMAFusion: Projection-Based Multi-Modal Alignment for 3D Semantic Occupancy Prediction: Shiyao Li,

Wenming Yang,

Qingmin Liao; [pdf]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Shiyao and Yang, Wenming and Liao, Qingmin}, title = {PMAFusion: Projection-Based Multi-Modal Alignment for 3D Semantic Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {3627-3634} }
Adaptive Memory Replay for Continual Learning: James Seale Smith,

Lazar Valkov,

Shaunak Halbe,

Vyshnavi Gutta,

Rogerio Feris,

Zsolt Kira,

Leonid Karlinsky; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Smith_2024_CVPR, author = {Smith, James Seale and Valkov, Lazar and Halbe, Shaunak and Gutta, Vyshnavi and Feris, Rogerio and Kira, Zsolt and Karlinsky, Leonid}, title = {Adaptive Memory Replay for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {3605-3615} }
Efficient Transformer Adaptation with Soft Token Merging: Xin Yuan,

Hongliang Fei,

Jinoo Baek; [pdf] [supp]
[bibtex]
@InProceedings{Yuan_2024_CVPR, author = {Yuan, Xin and Fei, Hongliang and Baek, Jinoo}, title = {Efficient Transformer Adaptation with Soft Token Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {3658-3668} }
Opportunities for Post-Training Dynamic Layer Sparsity in Large Vision and Language Models: Jordan Dotzel,

Carly Jiang,

Mohamed Abdelfattah,

Zhiru Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Dotzel_2024_CVPR, author = {Dotzel, Jordan and Jiang, Carly and Abdelfattah, Mohamed and Zhang, Zhiru}, title = {Opportunities for Post-Training Dynamic Layer Sparsity in Large Vision and Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {8280-8284} }
HaLViT: Half of the Weights are Enough: Onur Can Koyun,

Behçet Uğur Töreyin; [pdf] [supp]
[bibtex]
@InProceedings{Koyun_2024_CVPR, author = {Koyun, Onur Can and T\"oreyin, Beh\c{c}et U\u{g}ur}, title = {HaLViT: Half of the Weights are Enough}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {3669-3678} }
Parameter Efficient Fine-tuning of Self-supervised ViTs without Catastrophic Forgetting: Reza Akbarian Bafghi,

Nidhin Harilal,

Claire Monteleoni,

Maziar Raissi; [pdf] [arXiv]
[bibtex]
@InProceedings{Bafghi_2024_CVPR, author = {Bafghi, Reza Akbarian and Harilal, Nidhin and Monteleoni, Claire and Raissi, Maziar}, title = {Parameter Efficient Fine-tuning of Self-supervised ViTs without Catastrophic Forgetting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {3679-3684} }
Unleash the Potential of CLIP for Video Highlight Detection: Donghoon Han,

Seunghyeon Seo,

Eunhwan Park,

Seong-Uk Nam,

Nojun Kwak; [pdf] [arXiv]
[bibtex]
@InProceedings{Han_2024_CVPR, author = {Han, Donghoon and Seo, Seunghyeon and Park, Eunhwan and Nam, Seong-Uk and Kwak, Nojun}, title = {Unleash the Potential of CLIP for Video Highlight Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {8275-8279} }
Layered Diffusion Model for One-Shot High Resolution Text-to-Image Synthesis: Emaad Khwaja,

Abdullah Rashwan,

Ting Chen,

Oliver Wang,

Suraj Kothawade,

Yeqing Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khwaja_2024_CVPR, author = {Khwaja, Emaad and Rashwan, Abdullah and Chen, Ting and Wang, Oliver and Kothawade, Suraj and Li, Yeqing}, title = {Layered Diffusion Model for One-Shot High Resolution Text-to-Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2024}, pages = {8271-8274} }