What is Next in Multimodal Foundation Models?
Coarse to Fine Frame Selection for Online Open-Ended Video Question Answering-
[pdf]
[bibtex]@InProceedings{Nuthalapati_2023_ICCV, author = {Nuthalapati, Vidyaranya and Tunga, Anirudh}, title = {Coarse to Fine Frame Selection for Online Open-Ended Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {353-361} }
Retrieving-to-Answer: Zero-Shot Video Question Answering with Frozen Large Language Models-
[pdf]
[bibtex]@InProceedings{Pan_2023_ICCV, author = {Pan, Junting and Lin, Ziyi and Ge, Yuying and Zhu, Xiatian and Zhang, Renrui and Wang, Yi and Qiao, Yu and Li, Hongsheng}, title = {Retrieving-to-Answer: Zero-Shot Video Question Answering with Frozen Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {272-283} }
Video-and-Language (VidL) models and their cognitive relevance-
[pdf]
[supp]
[bibtex]@InProceedings{Zonneveld_2023_ICCV, author = {Zonneveld, Anne and Gatt, Albert and Calixto, Iacer}, title = {Video-and-Language (VidL) models and their cognitive relevance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {325-338} }
Video Attribute Prototype Network: A New Perspective for Zero-Shot Video Classification-
[pdf]
[bibtex]@InProceedings{Wang_2023_ICCV, author = {Wang, Bo and Zhao, Kaili and Zhao, Hongyang and Pu, Shi and Xiao, Bo and Guo, Jun}, title = {Video Attribute Prototype Network: A New Perspective for Zero-Shot Video Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {315-324} }
Interaction-Aware Prompting for Zero-Shot Spatio-Temporal Action Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2023_ICCV, author = {Huang, Wei-Jhe and Yeh, Jheng-Hsien and Chen, Min-Hung and Faure, Gueter Josmy and Lai, Shang-Hong}, title = {Interaction-Aware Prompting for Zero-Shot Spatio-Temporal Action Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {284-293} }
ClipCrop: Conditioned Cropping Driven by Vision-Language Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhong_2023_ICCV, author = {Zhong, Zhihang and Cheng, Mingxi and Wu, Zhirong and Yuan, Yuhui and Zheng, Yinqiang and Li, Ji and Hu, Han and Lin, Stephen and Sato, Yoichi and Sato, Imari}, title = {ClipCrop: Conditioned Cropping Driven by Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {294-304} }
Towards an Exhaustive Evaluation of Vision-Language Foundation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Salin_2023_ICCV, author = {Salin, Emmanuelle and Ayache, St\'ephane and Favre, Benoit}, title = {Towards an Exhaustive Evaluation of Vision-Language Foundation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {339-352} }
Enhancing CLIP with GPT-4: Harnessing Visual Descriptions as Prompts-
[pdf]
[supp]
[bibtex]@InProceedings{Maniparambil_2023_ICCV, author = {Maniparambil, Mayug and Vorster, Chris and Molloy, Derek and Murphy, Noel and McGuinness, Kevin and O'Connor, Noel E.}, title = {Enhancing CLIP with GPT-4: Harnessing Visual Descriptions as Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {262-271} }
Painter: Teaching Auto-Regressive Language Models to Draw Sketches-
[pdf]
[arXiv]
[bibtex]@InProceedings{Pourreza_2023_ICCV, author = {Pourreza, Reza and Bhattacharyya, Apratim and Panchal, Sunny and Lee, Mingu and Madan, Pulkit and Memisevic, Roland}, title = {Painter: Teaching Auto-Regressive Language Models to Draw Sketches}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {305-314} }