5th Workshop on Closing the Loop Between Vision and Language
Vision-Language Models Performing Zero-Shot Tasks Exhibit Disparities Between Gender Groups-
[pdf]
[bibtex]@InProceedings{Hall_2023_ICCV, author = {Hall, Melissa and Gustafson, Laura and Adcock, Aaron and Misra, Ishan and Ross, Candace}, title = {Vision-Language Models Performing Zero-Shot Tasks Exhibit Disparities Between Gender Groups}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {2778-2785} }
Multimodal Neurons in Pretrained Text-Only Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Schwettmann_2023_ICCV, author = {Schwettmann, Sarah and Chowdhury, Neil and Klein, Samuel and Bau, David and Torralba, Antonio}, title = {Multimodal Neurons in Pretrained Text-Only Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {2862-2867} }
Explaining Vision and Language Through Graphs of Events in Space and Time-
[pdf]
[arXiv]
[bibtex]@InProceedings{Masala_2023_ICCV, author = {Masala, Mihai and Cudlenco, Nicolae and Rebedea, Traian and Leordeanu, Marius}, title = {Explaining Vision and Language Through Graphs of Events in Space and Time}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {2826-2831} }
Sparse Linear Concept Discovery Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Panousis_2023_ICCV, author = {Panousis, Konstantinos Panagiotis and Ienco, Dino and Marcos, Diego}, title = {Sparse Linear Concept Discovery Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {2767-2771} }
LLaViLo: Boosting Video Moment Retrieval via Adapter-Based Multimodal Modeling-
[pdf]
[bibtex]@InProceedings{Ma_2023_ICCV, author = {Ma, Kaijing and Zang, Xianghao and Feng, Zerun and Fang, Han and Ban, Chao and Wei, Yuhan and He, Zhongjiang and Li, Yongxiang and Sun, Hao}, title = {LLaViLo: Boosting Video Moment Retrieval via Adapter-Based Multimodal Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {2798-2803} }
An Empirical Study of the Effect of Video Encoders on Temporal Video Grounding-
[pdf]
[bibtex]@InProceedings{De_la_Jara_2023_ICCV, author = {De la Jara, Ignacio M. and Rodriguez-Opazo, Cristian and Marrese-Taylor, Edison and Bravo-Marquez, Felipe}, title = {An Empirical Study of the Effect of Video Encoders on Temporal Video Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {2850-2855} }
PatFig: Generating Short and Long Captions for Patent Figures-
[pdf]
[arXiv]
[bibtex]@InProceedings{Aubakirova_2023_ICCV, author = {Aubakirova, Dana and Gerdes, Kim and Liu, Lufei}, title = {PatFig: Generating Short and Long Captions for Patent Figures}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {2843-2849} }
A Cross-Dataset Study on the Brazilian Sign Language Translation-
[pdf]
[bibtex]@InProceedings{de_Avellar_Sarmento_2023_ICCV, author = {de Avellar Sarmento, Amanda Hellen and Ponti, Moacir Antonelli}, title = {A Cross-Dataset Study on the Brazilian Sign Language Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {2816-2820} }
ECO: Ensembling Context Optimization for Vision-Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Agnolucci_2023_ICCV, author = {Agnolucci, Lorenzo and Baldrati, Alberto and Todino, Francesco and Becattini, Federico and Bertini, Marco and Del Bimbo, Alberto}, title = {ECO: Ensembling Context Optimization for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {2811-2815} }
Cross-Modal Dense Passage Retrieval for Outside Knowledge Visual Question Answering-
[pdf]
[bibtex]@InProceedings{Reichman_2023_ICCV, author = {Reichman, Benjamin and Heck, Larry}, title = {Cross-Modal Dense Passage Retrieval for Outside Knowledge Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {2837-2842} }
Towards Vision-Language Mechanistic Interpretability: A Causal Tracing Tool for BLIP-
[pdf]
[arXiv]
[bibtex]@InProceedings{Palit_2023_ICCV, author = {Palit, Vedant and Pandey, Rohan and Arora, Aryaman and Liang, Paul Pu}, title = {Towards Vision-Language Mechanistic Interpretability: A Causal Tracing Tool for BLIP}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {2856-2861} }
Mapping Memes to Words for Multimodal Hateful Meme Classification-
[pdf]
[bibtex]@InProceedings{Burbi_2023_ICCV, author = {Burbi, Giovanni and Baldrati, Alberto and Agnolucci, Lorenzo and Bertini, Marco and Del Bimbo, Alberto}, title = {Mapping Memes to Words for Multimodal Hateful Meme Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {2832-2836} }
BiLMa: Bidirectional Local-Matching for Text-based Person Re-identification-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fujii_2023_ICCV, author = {Fujii, Takuro and Tarashima, Shuhei}, title = {BiLMa: Bidirectional Local-Matching for Text-based Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {2786-2790} }
ProVLA: Compositional Image Search with Progressive Vision-Language Alignment and Multimodal Fusion-
[pdf]
[bibtex]@InProceedings{Hu_2023_ICCV, author = {Hu, Zhizhang and Zhu, Xinliang and Tran, Son and Vidal, Ren\'e and Dhua, Arnab}, title = {ProVLA: Compositional Image Search with Progressive Vision-Language Alignment and Multimodal Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {2772-2777} }
Context-VQA: Towards Context-Aware and Purposeful Visual Question Answering-
[pdf]
[bibtex]@InProceedings{Naik_2023_ICCV, author = {Naik, Nandita and Potts, Christopher and Kreiss, Elisa}, title = {Context-VQA: Towards Context-Aware and Purposeful Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {2821-2825} }
Zero-Shot and Few-Shot Video Question Answering with Multi-Modal Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Engin_2023_ICCV, author = {Engin, Deniz and Avrithis, Yannis}, title = {Zero-Shot and Few-Shot Video Question Answering with Multi-Modal Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {2804-2810} }
Alignment and Generation Adapter for Efficient Video-Text Understanding-
[pdf]
[bibtex]@InProceedings{Fang_2023_ICCV, author = {Fang, Han and Yang, Zhifei and Wei, Yuhan and Zang, Xianghao and Ban, Chao and Feng, Zerun and He, Zhongjiang and Li, Yongxiang and Sun, Hao}, title = {Alignment and Generation Adapter for Efficient Video-Text Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {2791-2797} }