Pixel-level Video Understanding in the Wild Challenge
Geometry-Guided Camera Motion Understanding in VideoLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Haoan and Musunuri, Sri Harsha and Su, Guan-Ming}, title = {Geometry-Guided Camera Motion Understanding in VideoLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8423-8433} }
VISTA: A Benchmark for Spatio-Temporal Interaction Understanding in Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Aparcedo_2026_CVPR, author = {Aparcedo, Alejandro and Kumar, Akash and Garg, Aaryan and Pham, Dalton and Chen, Wen-Kai and Bharadwaj, Anirudh and Chadha, Aman and Rawat, Yogesh}, title = {VISTA: A Benchmark for Spatio-Temporal Interaction Understanding in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8411-8422} }
A Hyperbolic Perspective on Hierarchical Structure in Object-Centric Scene Representations-
[pdf]
[bibtex]@InProceedings{Madan_2026_CVPR, author = {Madan, Neelu and Vidal, Alex Pujol and Mogelmose, Andreas and Escalera, Sergio and Nasrollahi, Kamal and Taylor, Graham W. and Moeslund, Thomas B.}, title = {A Hyperbolic Perspective on Hierarchical Structure in Object-Centric Scene Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8476-8485} }
VVitCutLER:Towards Unsupervised Object Detection and Segmentation in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Zhijing and Hashmi, Khurram Azeem and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {VVitCutLER:Towards Unsupervised Object Detection and Segmentation in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8466-8475} }
Phrase-Instance Alignment for Generalized Referring Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, E-Ro and Le, Hieu and Samaras, Dimitris and Ryoo, Michael S.}, title = {Phrase-Instance Alignment for Generalized Referring Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8486-8495} }
Report of the 5th PVUW Challenge: Towards More Diverse Modalities in Pixel-Level Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Chang and Ding, Henghui and Ravi, Nikhila and Wei, Yunchao and He, Shuting and Bai, Song and Torr, Philip and Cao, Leilei and Zhang, Jinrong and Miao, Deshui and He, Xusheng and Gong, Dengxian and Wang, Zhiyu and Gao, Mingqi and Hong, Jihwan}, title = {Report of the 5th PVUW Challenge: Towards More Diverse Modalities in Pixel-Level Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8456-8465} }
A Paradigm Shift: Fully End-to-End Training for Temporal Sentence Grounding in Videos-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Allen and Liu, Qi and Liu, Kun and Liu, Xinchen and Liu, Wu}, title = {A Paradigm Shift: Fully End-to-End Training for Temporal Sentence Grounding in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8434-8444} }
Pixel-level Scene Understanding in One Token: Visual States Need What-is-Where Composition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Seokmin and Lee, Yunghee and Pak, Byeonghyun and Woo, Byeongju}, title = {Pixel-level Scene Understanding in One Token: Visual States Need What-is-Where Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8445-8455} }
Semantic Alignment in Hyperbolic Space for Open-Vocabulary Semantic Segmentation-
[pdf]
[bibtex]@InProceedings{Truong_2026_CVPR, author = {Truong, Hoang M. and Nguyen-Truong, Hai and Huynh, Dang}, title = {Semantic Alignment in Hyperbolic Space for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8496-8505} }
De-Supervision in Camouflaged Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Alessandrini_2026_CVPR, author = {Alessandrini, Luca and Rizzo, Antonino Maria and Magri, Luca and Boracchi, Giacomo and Arrigoni, Federica}, title = {De-Supervision in Camouflaged Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {8401-8410} }

