Computer Vision with Small Data: Beyond Scale -- Toward Data-Efficient Dynamically-Aware Video Intelligence
Towards Data-Efficient Video Pre-training with Frozen Image Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Orlova_2026_CVPR, author = {Orlova, Svetlana and Cavagnero, Niccol\`o and Dubbelman, Gijs}, title = {Towards Data-Efficient Video Pre-training with Frozen Image Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8784-8793} }
Focusing Attention in Self-Supervised Learning for Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Tibrewal_2026_CVPR, author = {Tibrewal, Vansh and Thomson, Bart R and Hugelshofer, Michael and Richter, Henning and Perona, Pietro and Kondapaneni, Neehar and Marks, Markus}, title = {Focusing Attention in Self-Supervised Learning for Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8812-8821} }
ManyHeight: Towards Multi-Frame Self-Supervised Road Surface Reconstruction-
[pdf]
[bibtex]@InProceedings{Koumpis_2026_CVPR, author = {Koumpis, Angelos and Elazab, Gasser and Hellwich, Olaf}, title = {ManyHeight: Towards Multi-Frame Self-Supervised Road Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8775-8783} }
Toward Automated Behavior Understanding in Autism: A Zero-Shot Vision-Language Model Approach-
[pdf]
[supp]
[bibtex]@InProceedings{Amraee_2026_CVPR, author = {Amraee, Somaieh and Singh, Ashutosh and McCullough, Aston and Scheithauer, Mindy and Goodwin, Matthew and Ostadabbas, Sarah}, title = {Toward Automated Behavior Understanding in Autism: A Zero-Shot Vision-Language Model Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8733-8743} }
Unlocking the Potential of Grounding DINO in Videos: Parameter-Efficient Adaptation for Limited-Data Spatial-Temporal Localization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zanyi and Li, Fan and Jiang, Dengyang and Li, Liuzhuozheng and Zhong, Yunhua and Dai, Guang and Wang, Mengmeng}, title = {Unlocking the Potential of Grounding DINO in Videos: Parameter-Efficient Adaptation for Limited-Data Spatial-Temporal Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8822-8831} }
Narrative Aligned Long Form Video Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jain_2026_CVPR, author = {Jain, Rahul and Doshi, Keval and Uzkent, Burak and Kessler, Garin}, title = {Narrative Aligned Long Form Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8765-8774} }
Data-Efficient Surgical Phase Segmentation in Small-Incision Cataract Surgery: A Controlled Study of Vision Foundation Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Spencer_2026_CVPR, author = {Spencer, Lincoln and Wang, Song and Chen, Chen}, title = {Data-Efficient Surgical Phase Segmentation in Small-Incision Cataract Surgery: A Controlled Study of Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8805-8811} }
SRL-CLIP: Efficient CLIP Video Adaptation via Structured Semantic Role Labels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singh_2026_CVPR, author = {Singh, Darshan and Tapaswi, Makarand and Khan, Zeeshan}, title = {SRL-CLIP: Efficient CLIP Video Adaptation via Structured Semantic Role Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8794-8804} }
PiVoT: Proactive Video Templates for Enhancing Video Task Performance-
[pdf]
[supp]
[bibtex]@InProceedings{Asnani_2026_CVPR, author = {Asnani, Vishal and Agarwal, Shruti and Liu, Xiaoming}, title = {PiVoT: Proactive Video Templates for Enhancing Video Task Performance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8744-8754} }
HORNet: Task-Guided Frame Selection for Video Question Answering with Vision-Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Xiangyu and Galoaa, Bishoy and Ostadabbas, Sarah}, title = {HORNet: Task-Guided Frame Selection for Video Question Answering with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8755-8764} }

