-
[pdf]
[bibtex]@InProceedings{Schiatti_2025_ICCV, author = {Schiatti, Lucia and Vallarino, Guido and Lopez, Sabrina Megan and Kuo, Yen-Ling and Moro, Matteo and Zhang, Mengmi and Gori, Monica and Del Bue, Alessio and Katz, Boris and Barbu, Andrei}, title = {Exploring Human-Model Alignment in Visual Social Attention During Help-and-Hinder Social Interaction Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {4913-4922} }
Exploring Human-Model Alignment in Visual Social Attention During Help-and-Hinder Social Interaction Classification
Abstract
Artificial Neural Networks (ANNs) are considered the best models of human vision in many visual tasks. However, there is still a significant gap between humans and ANNs in visual tasks involving social perception and cognition. In this paper, we investigate the alignment between humans and different ANNs architectures, i.e., convolutional neural networks (CNNs) and transformers, during the classification of social interactions from videos. Specifically, we provide a novel dataset of videos and human gaze data during the classification of help and hinder social interactions, and we evaluate the human-model alignment in terms of classification accuracy and visual attention. We show that our proposed dataset and experimental protocol can enable comparison between different models' alignment to humans in terms of both static and dynamic visual attention. Our results suggest that a higher classification accuracy may be correlated with a higher similarity of the model's attention to human gaze-based saliency maps. However, a stronger alignment of human-model attention is not always predictive of a better classification.
Related Material
