Computer Vision with Small Data: Beyond Scale -- Toward Data-Efficient Dynamically-Aware Video Intelligence


Towards Data-Efficient Video Pre-training with Frozen Image Foundation Models
Svetlana Orlova,
Niccolò Cavagnero,
Gijs Dubbelman
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Orlova_2026_CVPR, author = {Orlova, Svetlana and Cavagnero, Niccol\`o and Dubbelman, Gijs}, title = {Towards Data-Efficient Video Pre-training with Frozen Image Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8784-8793} }

Focusing Attention in Self-Supervised Learning for Action Recognition
Vansh Tibrewal,
Bart R Thomson,
Michael Hugelshofer,
Henning Richter,
Pietro Perona,
Neehar Kondapaneni,
Markus Marks
[pdf] [supp]
[bibtex]
@InProceedings{Tibrewal_2026_CVPR, author = {Tibrewal, Vansh and Thomson, Bart R and Hugelshofer, Michael and Richter, Henning and Perona, Pietro and Kondapaneni, Neehar and Marks, Markus}, title = {Focusing Attention in Self-Supervised Learning for Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8812-8821} }

ManyHeight: Towards Multi-Frame Self-Supervised Road Surface Reconstruction
Angelos Koumpis,
Gasser Elazab,
Olaf Hellwich
[pdf]
[bibtex]
@InProceedings{Koumpis_2026_CVPR, author = {Koumpis, Angelos and Elazab, Gasser and Hellwich, Olaf}, title = {ManyHeight: Towards Multi-Frame Self-Supervised Road Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8775-8783} }

Toward Automated Behavior Understanding in Autism: A Zero-Shot Vision-Language Model Approach
Somaieh Amraee,
Ashutosh Singh,
Aston McCullough,
Mindy Scheithauer,
Matthew Goodwin,
Sarah Ostadabbas
[pdf] [supp]
[bibtex]
@InProceedings{Amraee_2026_CVPR, author = {Amraee, Somaieh and Singh, Ashutosh and McCullough, Aston and Scheithauer, Mindy and Goodwin, Matthew and Ostadabbas, Sarah}, title = {Toward Automated Behavior Understanding in Autism: A Zero-Shot Vision-Language Model Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8733-8743} }

Unlocking the Potential of Grounding DINO in Videos: Parameter-Efficient Adaptation for Limited-Data Spatial-Temporal Localization
Zanyi Wang,
Fan Li,
Dengyang Jiang,
Liuzhuozheng Li,
Yunhua Zhong,
Guang Dai,
Mengmeng Wang
[pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zanyi and Li, Fan and Jiang, Dengyang and Li, Liuzhuozheng and Zhong, Yunhua and Dai, Guang and Wang, Mengmeng}, title = {Unlocking the Potential of Grounding DINO in Videos: Parameter-Efficient Adaptation for Limited-Data Spatial-Temporal Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8822-8831} }

Narrative Aligned Long Form Video Question Answering
Rahul Jain,
Keval Doshi,
Burak Uzkent,
Garin Kessler
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jain_2026_CVPR, author = {Jain, Rahul and Doshi, Keval and Uzkent, Burak and Kessler, Garin}, title = {Narrative Aligned Long Form Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8765-8774} }

Data-Efficient Surgical Phase Segmentation in Small-Incision Cataract Surgery: A Controlled Study of Vision Foundation Models
Lincoln Spencer,
Song Wang,
Chen Chen
[pdf] [arXiv]
[bibtex]
@InProceedings{Spencer_2026_CVPR, author = {Spencer, Lincoln and Wang, Song and Chen, Chen}, title = {Data-Efficient Surgical Phase Segmentation in Small-Incision Cataract Surgery: A Controlled Study of Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8805-8811} }

SRL-CLIP: Efficient CLIP Video Adaptation via Structured Semantic Role Labels
Darshan Singh,
Makarand Tapaswi,
Zeeshan Khan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singh_2026_CVPR, author = {Singh, Darshan and Tapaswi, Makarand and Khan, Zeeshan}, title = {SRL-CLIP: Efficient CLIP Video Adaptation via Structured Semantic Role Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8794-8804} }

PiVoT: Proactive Video Templates for Enhancing Video Task Performance
Vishal Asnani,
Shruti Agarwal,
Xiaoming Liu
[pdf] [supp]
[bibtex]
@InProceedings{Asnani_2026_CVPR, author = {Asnani, Vishal and Agarwal, Shruti and Liu, Xiaoming}, title = {PiVoT: Proactive Video Templates for Enhancing Video Task Performance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8744-8754} }

HORNet: Task-Guided Frame Selection for Video Question Answering with Vision-Language Models
Xiangyu Bai,
Bishoy Galoaa,
Sarah Ostadabbas
[pdf] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Xiangyu and Galoaa, Bishoy and Ostadabbas, Sarah}, title = {HORNet: Task-Guided Frame Selection for Video Question Answering with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8755-8764} }