CVPR 2026 Open Access Repository

Computer Vision with Small Data: Beyond Scale -- Toward Data-Efficient Dynamically-Aware Video Intelligence

Towards Data-Efficient Video Pre-training with Frozen Image Foundation Models: Svetlana Orlova,

Niccolò Cavagnero,

Gijs Dubbelman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Orlova_2026_CVPR, author = {Orlova, Svetlana and Cavagnero, Niccol\`o and Dubbelman, Gijs}, title = {Towards Data-Efficient Video Pre-training with Frozen Image Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8784-8793} }
Focusing Attention in Self-Supervised Learning for Action Recognition: Vansh Tibrewal,

Bart R Thomson,

Michael Hugelshofer,

Henning Richter,

Pietro Perona,

Neehar Kondapaneni,

Markus Marks; [pdf] [supp]
[bibtex]
@InProceedings{Tibrewal_2026_CVPR, author = {Tibrewal, Vansh and Thomson, Bart R and Hugelshofer, Michael and Richter, Henning and Perona, Pietro and Kondapaneni, Neehar and Marks, Markus}, title = {Focusing Attention in Self-Supervised Learning for Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8812-8821} }
ManyHeight: Towards Multi-Frame Self-Supervised Road Surface Reconstruction: Angelos Koumpis,

Gasser Elazab,

Olaf Hellwich; [pdf]
[bibtex]
@InProceedings{Koumpis_2026_CVPR, author = {Koumpis, Angelos and Elazab, Gasser and Hellwich, Olaf}, title = {ManyHeight: Towards Multi-Frame Self-Supervised Road Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8775-8783} }
Toward Automated Behavior Understanding in Autism: A Zero-Shot Vision-Language Model Approach: Somaieh Amraee,

Ashutosh Singh,

Aston McCullough,

Mindy Scheithauer,

Matthew Goodwin,

Sarah Ostadabbas; [pdf] [supp]
[bibtex]
@InProceedings{Amraee_2026_CVPR, author = {Amraee, Somaieh and Singh, Ashutosh and McCullough, Aston and Scheithauer, Mindy and Goodwin, Matthew and Ostadabbas, Sarah}, title = {Toward Automated Behavior Understanding in Autism: A Zero-Shot Vision-Language Model Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8733-8743} }
Unlocking the Potential of Grounding DINO in Videos: Parameter-Efficient Adaptation for Limited-Data Spatial-Temporal Localization: Zanyi Wang,

Fan Li,

Dengyang Jiang,

Liuzhuozheng Li,

Yunhua Zhong,

Guang Dai,

Mengmeng Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zanyi and Li, Fan and Jiang, Dengyang and Li, Liuzhuozheng and Zhong, Yunhua and Dai, Guang and Wang, Mengmeng}, title = {Unlocking the Potential of Grounding DINO in Videos: Parameter-Efficient Adaptation for Limited-Data Spatial-Temporal Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8822-8831} }
Narrative Aligned Long Form Video Question Answering: Rahul Jain,

Keval Doshi,

Burak Uzkent,

Garin Kessler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jain_2026_CVPR, author = {Jain, Rahul and Doshi, Keval and Uzkent, Burak and Kessler, Garin}, title = {Narrative Aligned Long Form Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8765-8774} }
Data-Efficient Surgical Phase Segmentation in Small-Incision Cataract Surgery: A Controlled Study of Vision Foundation Models: Lincoln Spencer,

Song Wang,

Chen Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Spencer_2026_CVPR, author = {Spencer, Lincoln and Wang, Song and Chen, Chen}, title = {Data-Efficient Surgical Phase Segmentation in Small-Incision Cataract Surgery: A Controlled Study of Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8805-8811} }
SRL-CLIP: Efficient CLIP Video Adaptation via Structured Semantic Role Labels: Darshan Singh,

Makarand Tapaswi,

Zeeshan Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singh_2026_CVPR, author = {Singh, Darshan and Tapaswi, Makarand and Khan, Zeeshan}, title = {SRL-CLIP: Efficient CLIP Video Adaptation via Structured Semantic Role Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8794-8804} }
PiVoT: Proactive Video Templates for Enhancing Video Task Performance: Vishal Asnani,

Shruti Agarwal,

Xiaoming Liu; [pdf] [supp]
[bibtex]
@InProceedings{Asnani_2026_CVPR, author = {Asnani, Vishal and Agarwal, Shruti and Liu, Xiaoming}, title = {PiVoT: Proactive Video Templates for Enhancing Video Task Performance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8744-8754} }
HORNet: Task-Guided Frame Selection for Video Question Answering with Vision-Language Models: Xiangyu Bai,

Bishoy Galoaa,

Sarah Ostadabbas; [pdf] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Xiangyu and Galoaa, Bishoy and Ostadabbas, Sarah}, title = {HORNet: Task-Guided Frame Selection for Video Question Answering with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8755-8764} }