CVPR 2025 Open Access Repository

Pixel-level Video Understanding in the Wild Challenge

MTA-VPS: A Large-scale Benchmark for Video-Based Person Search: Ding Qi,

Shuguang Dou,

Jian Liu,

Huaixuan Cao,

Hao Zhang,

Dongsheng Jiang,

Cairong Zhao; [pdf]
[bibtex]
@InProceedings{Qi_2025_CVPR, author = {Qi, Ding and Dou, Shuguang and Liu, Jian and Cao, Huaixuan and Zhang, Hao and Jiang, Dongsheng and Zhao, Cairong}, title = {MTA-VPS: A Large-scale Benchmark for Video-Based Person Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {2673-2683} }
OpenTAD: A Unified Framework and Comprehensive Study of Temporal Action Detection: Shuming Liu,

Chen Zhao,

Fatimah Zohra,

Mattia Soldan,

Alejandro Pardo,

Mengmeng Xu,

Lama Alssum,

Merey Ramazanova,

Juan León Alcázar,

Anthony Cioppa,

Silvio Giancola,

Carlos Hinojosa,

Bernard Ghanem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Shuming and Zhao, Chen and Zohra, Fatimah and Soldan, Mattia and Pardo, Alejandro and Xu, Mengmeng and Alssum, Lama and Ramazanova, Merey and Alc\'azar, Juan Le\'on and Cioppa, Anthony and Giancola, Silvio and Hinojosa, Carlos and Ghanem, Bernard}, title = {OpenTAD: A Unified Framework and Comprehensive Study of Temporal Action Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {2650-2660} }
Adversarially Domain-adaptive Latent Diffusion for Unsupervised Semantic Segmentation: Jongmin Yu,

Zhongtian Sun,

Chi Bene Chen,

Jinhong Yang,

Shan Luo; [pdf] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Jongmin and Sun, Zhongtian and Chen, Chi Bene and Yang, Jinhong and Luo, Shan}, title = {Adversarially Domain-adaptive Latent Diffusion for Unsupervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {2639-2649} }
Efficient VideoMAE via Temporal Progressive Training: Xianhang Li,

Peng Wang,

Xinyu Li,

Heng Wang,

Hongru Zhu,

Cihang Xie; [pdf]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xianhang and Wang, Peng and Li, Xinyu and Wang, Heng and Zhu, Hongru and Xie, Cihang}, title = {Efficient VideoMAE via Temporal Progressive Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {2684-2693} }
Studying Image Diffusion Features for Zero-Shot Video Object Segmentation: Thanos Delatolas,

Vicky Kalogeiton,

Dim Papadopoulos; [pdf] [arXiv]
[bibtex]
@InProceedings{Delatolas_2025_CVPR, author = {Delatolas, Thanos and Kalogeiton, Vicky and Papadopoulos, Dim}, title = {Studying Image Diffusion Features for Zero-Shot Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {2661-2672} }
PVUW 2025 Challenge Report: Advances in Pixel-level Understanding of Complex Videos in the Wild: Henghui Ding,

Chang Liu,

Nikhila Ravi,

Shuting He,

Yunchao Wei,

Song Bai,

Philip Torr; [pdf] [arXiv]
[bibtex]
@InProceedings{Ding_2025_CVPR, author = {Ding, Henghui and Liu, Chang and Ravi, Nikhila and He, Shuting and Wei, Yunchao and Bai, Song and Torr, Philip}, title = {PVUW 2025 Challenge Report: Advances in Pixel-level Understanding of Complex Videos in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {2694-2703} }
REEF: Relevance-Aware and Efficient LLM Adapter for Video Understanding: Sakib Reza,

Xiyun Song,

Heather Yu,

Zongfang Lin,

Mohsen Moghaddam,

Octavia Camps; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Reza_2025_CVPR, author = {Reza, Sakib and Song, Xiyun and Yu, Heather and Lin, Zongfang and Moghaddam, Mohsen and Camps, Octavia}, title = {REEF: Relevance-Aware and Efficient LLM Adapter for Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {2617-2628} }
M-Adaptor: Text-driven Whole-body Human Motion Generation: Alicia Li,

Xiaodong Chen,

Bohao Liang,

Qian Bao,

Wu Liu; [pdf]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Alicia and Chen, Xiaodong and Liang, Bohao and Bao, Qian and Liu, Wu}, title = {M-Adaptor: Text-driven Whole-body Human Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2025}, pages = {2629-2638} }