Vision-and-Language Algorithmic Reasoning Workshop
MMTF: Multi-Modal Temporal Fusion for Commonsense Video Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Ahmad_2023_ICCV, author = {Ahmad, Mobeen and Park, Geonwoo and Park, Dongchan and Park, Sanguk}, title = {MMTF: Multi-Modal Temporal Fusion for Commonsense Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {4657-4662} }
Understanding Video Scenes Through Text: Insights from Text-Based Video Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jahagirdar_2023_ICCV, author = {Jahagirdar, Soumya and Mathew, Minesh and Karatzas, Dimosthenis and Jawahar, C. V.}, title = {Understanding Video Scenes Through Text: Insights from Text-Based Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {4646-4650} }
CLIP-Decoder : ZeroShot Multilabel Classification Using Multimodal CLIP Aligned Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Ali_2023_ICCV, author = {Ali, Muhammad and Khan, Salman}, title = {CLIP-Decoder : ZeroShot Multilabel Classification Using Multimodal CLIP Aligned Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {4675-4679} }
Uni-NLX: Unifying Textual Explanations for Vision and Vision-Language Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Sammani_2023_ICCV, author = {Sammani, Fawaz and Deligiannis, Nikos}, title = {Uni-NLX: Unifying Textual Explanations for Vision and Vision-Language Tasks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {4634-4639} }
Language-Enhanced RNR-Map: Querying Renderable Neural Radiance Field Maps with Natural Language-
[pdf]
[supp]
[bibtex]@InProceedings{Taioli_2023_ICCV, author = {Taioli, Francesco and Cunico, Federico and Girella, Federico and Bologna, Riccardo and Farinelli, Alessandro and Cristani, Marco}, title = {Language-Enhanced RNR-Map: Querying Renderable Neural Radiance Field Maps with Natural Language}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {4669-4674} }
SelfGraphVQA: A Self-Supervised Graph Neural Network for Scene-Based Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{de_Oliveira_Souza_2023_ICCV, author = {de Oliveira Souza, Bruno Cesar and Aasan, Marius and Pedrini, Helio and Rivera, Adin Ramirez}, title = {SelfGraphVQA: A Self-Supervised Graph Neural Network for Scene-Based Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {4640-4645} }
Iterative Robust Visual Grounding with Masked Reference Based Centerpoint Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2023_ICCV, author = {Li, Menghao and Wang, Chunlei and Feng, Wenquan and Lyu, Shuchang and Cheng, Guangliang and Li, Xiangtai and Liu, Binghao and Zhao, Qi}, title = {Iterative Robust Visual Grounding with Masked Reference Based Centerpoint Supervision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {4651-4656} }
Pointing out Human Answer Mistakes in a Goal-Oriented Visual Dialogue-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Oshima_2023_ICCV, author = {Oshima, Ryosuke and Shinagawa, Seitaro and Tsunashima, Hideki and Feng, Qi and Morishima, Shigeo}, title = {Pointing out Human Answer Mistakes in a Goal-Oriented Visual Dialogue}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {4663-4668} }
What If the TV Was Off? Examining Counterfactual Reasoning Abilities of Multi-Modal Language Models-
[pdf]
[bibtex]@InProceedings{Zhang_2023_ICCV, author = {Zhang, Letian and Zhai, Xiaotong and Zhao, Zhongkai and Wen, Xin and Zhao, Bingchen}, title = {What If the TV Was Off? Examining Counterfactual Reasoning Abilities of Multi-Modal Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {4629-4633} }