Pixel-level Video Understanding in the Wild Challenge


MTA-VPS: A Large-scale Benchmark for Video-Based Person Search
Ding Qi,
Shuguang Dou,
Jian Liu,
Huaixuan Cao,
Hao Zhang,
Dongsheng Jiang,
Cairong Zhao
[pdf]
[bibtex]
@InProceedings{Qi_2025_CVPR, author = {Qi, Ding and Dou, Shuguang and Liu, Jian and Cao, Huaixuan and Zhang, Hao and Jiang, Dongsheng and Zhao, Cairong}, title = {MTA-VPS: A Large-scale Benchmark for Video-Based Person Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {2648-2658} }

OpenTAD: A Unified Framework and Comprehensive Study of Temporal Action Detection
Shuming Liu,
Chen Zhao,
Fatimah Zohra,
Mattia Soldan,
Alejandro Pardo,
Mengmeng Xu,
Lama Alssum,
Merey Ramazanova,
Juan León Alcázar,
Anthony Cioppa,
Silvio Giancola,
Carlos Hinojosa,
Bernard Ghanem
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Shuming and Zhao, Chen and Zohra, Fatimah and Soldan, Mattia and Pardo, Alejandro and Xu, Mengmeng and Alssum, Lama and Ramazanova, Merey and Alc\'azar, Juan Le\'on and Cioppa, Anthony and Giancola, Silvio and Hinojosa, Carlos and Ghanem, Bernard}, title = {OpenTAD: A Unified Framework and Comprehensive Study of Temporal Action Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {2625-2635} }

Adversarially Domain-adaptive Latent Diffusion for Unsupervised Semantic Segmentation
Jongmin Yu,
Zhongtian Sun,
Chi Bene Chen,
Jinhong Yang,
Shan Luo
[pdf] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Jongmin and Sun, Zhongtian and Chen, Chi Bene and Yang, Jinhong and Luo, Shan}, title = {Adversarially Domain-adaptive Latent Diffusion for Unsupervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {2614-2624} }

Efficient VideoMAE via Temporal Progressive Training
Xianhang Li,
Peng Wang,
Xinyu Li,
Heng Wang,
Hongru Zhu,
Cihang Xie
[pdf]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xianhang and Wang, Peng and Li, Xinyu and Wang, Heng and Zhu, Hongru and Xie, Cihang}, title = {Efficient VideoMAE via Temporal Progressive Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {2659-2668} }

Studying Image Diffusion Features for Zero-Shot Video Object Segmentation
Thanos Delatolas,
Vicky Kalogeiton,
Dim Papadopoulos
[pdf] [arXiv]
[bibtex]
@InProceedings{Delatolas_2025_CVPR, author = {Delatolas, Thanos and Kalogeiton, Vicky and Papadopoulos, Dim}, title = {Studying Image Diffusion Features for Zero-Shot Video Object Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {2636-2647} }

PVUW 2025 Challenge Report: Advances in Pixel-level Understanding of Complex Videos in the Wild
Henghui Ding,
Chang Liu,
Nikhila Ravi,
Shuting He,
Yunchao Wei,
Song Bai,
Philip Torr
[pdf] [arXiv]
[bibtex]
@InProceedings{Ding_2025_CVPR, author = {Ding, Henghui and Liu, Chang and Ravi, Nikhila and He, Shuting and Wei, Yunchao and Bai, Song and Torr, Philip}, title = {PVUW 2025 Challenge Report: Advances in Pixel-level Understanding of Complex Videos in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {2669-2678} }

REEF: Relevance-Aware and Efficient LLM Adapter for Video Understanding
Sakib Reza,
Xiyun Song,
Heather Yu,
Zongfang Lin,
Mohsen Moghaddam,
Octavia Camps
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Reza_2025_CVPR, author = {Reza, Sakib and Song, Xiyun and Yu, Heather and Lin, Zongfang and Moghaddam, Mohsen and Camps, Octavia}, title = {REEF: Relevance-Aware and Efficient LLM Adapter for Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {2592-2603} }

M-Adaptor: Text-driven Whole-body Human Motion Generation
Alicia Li,
Xiaodong Chen,
Bohao Liang,
Qian Bao,
Wu Liu
[pdf]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Alicia and Chen, Xiaodong and Liang, Bohao and Bao, Qian and Liu, Wu}, title = {M-Adaptor: Text-driven Whole-body Human Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR) Workshops}, month = {June}, year = {2025}, pages = {2604-2613} }