ICCV 2025 Open Access Repository

Workshop on AI-driven Skilled Activity Understanding, Assessment & Feedback Generation

InstaPose: Scene-Aware Pose Recommendation via Vision Transformers and Diversity-Optimized Reranking: Yu Ji,

I-Han Hsiao; [pdf]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Yu and Hsiao, I-Han}, title = {InstaPose: Scene-Aware Pose Recommendation via Vision Transformers and Diversity-Optimized Reranking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {2747-2753} }
Learning Robust Aligned Representations Across Multiple Visual Modalities in Human Action Recognition: David Lerch,

Bastian Rothenburger,

Zeyun Zhong,

Manuel Martin,

Frederik Diederichs,

Rainer Stiefelhagen; [pdf]
[bibtex]
@InProceedings{Lerch_2025_ICCV, author = {Lerch, David and Rothenburger, Bastian and Zhong, Zeyun and Martin, Manuel and Diederichs, Frederik and Stiefelhagen, Rainer}, title = {Learning Robust Aligned Representations Across Multiple Visual Modalities in Human Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {2700-2710} }
EgoOops: A Dataset for Mistake Action Detection from Egocentric Videos referring to Procedural Texts: Yuto Haneji,

Taichi Nishimura,

Hirotaka Kameko,

Keisuke Shirai,

Tomoya Yoshida,

Keiya Kajimura,

Koki Yamamoto,

Taiyu Cui,

Tomohiro Nishimoto,

Shinsuke Mori; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Haneji_2025_ICCV, author = {Haneji, Yuto and Nishimura, Taichi and Kameko, Hirotaka and Shirai, Keisuke and Yoshida, Tomoya and Kajimura, Keiya and Yamamoto, Koki and Cui, Taiyu and Nishimoto, Tomohiro and Mori, Shinsuke}, title = {EgoOops: A Dataset for Mistake Action Detection from Egocentric Videos referring to Procedural Texts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {2711-2721} }
EHWGesture - A dataset for multimodal understanding of clinical gestures: Gianluca Amprimo,

Alberto Ancilotto,

Alessandro Savino,

Fabio Quazzolo,

Claudia Ferraris,

Gabriella Olmo,

Elisabetta Farella,

Stefano Di Carlo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Amprimo_2025_ICCV, author = {Amprimo, Gianluca and Ancilotto, Alberto and Savino, Alessandro and Quazzolo, Fabio and Ferraris, Claudia and Olmo, Gabriella and Farella, Elisabetta and Di Carlo, Stefano}, title = {EHWGesture - A dataset for multimodal understanding of clinical gestures}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {2722-2731} }
Assessing the Quality of Soccer Shots from Single-Camera Video with Vision-Language Models and Motion Features: Filip Noworolnik,

Joanna Jaworek-Korjakowska; [pdf]
[bibtex]
@InProceedings{Noworolnik_2025_ICCV, author = {Noworolnik, Filip and Jaworek-Korjakowska, Joanna}, title = {Assessing the Quality of Soccer Shots from Single-Camera Video with Vision-Language Models and Motion Features}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {2754-2761} }
EASG-Bench: Video Q&A Benchmark with Egocentric Action Scene Graphs: Ivan Rodin,

Tz-Ying Wu,

Kyle Min,

Sharath Nittur Sridhar,

Antonino Furnari,

Subarna Tripathi,

Giovanni Maria Farinella; [pdf] [arXiv]
[bibtex]
@InProceedings{Rodin_2025_ICCV, author = {Rodin, Ivan and Wu, Tz-Ying and Min, Kyle and Sridhar, Sharath Nittur and Furnari, Antonino and Tripathi, Subarna and Farinella, Giovanni Maria}, title = {EASG-Bench: Video Q\&A Benchmark with Egocentric Action Scene Graphs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {2732-2737} }
CricTAL: Introducing Temporal Activity Localisation using pose estimation to identify key phases in cricket batting for downstream Action Quality Assessment: Tevin Moodley,

Dustin van der Haar; [pdf]
[bibtex]
@InProceedings{Moodley_2025_ICCV, author = {Moodley, Tevin and van der Haar, Dustin}, title = {CricTAL: Introducing Temporal Activity Localisation using pose estimation to identify key phases in cricket batting for downstream Action Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {2738-2746} }