Workshop on Open-Domain Reasoning Under Multi-Modal Settings


TEVAD: Improved Video Anomaly Detection With Captions
Weiling Chen,
Keng Teck Ma,
Zi Jian Yew,
Minhoe Hur,
David Aik-Aun Khoo
[pdf] [supp]
[bibtex]
@InProceedings{Chen_2023_CVPR, author = {Chen, Weiling and Ma, Keng Teck and Yew, Zi Jian and Hur, Minhoe and Khoo, David Aik-Aun}, title = {TEVAD: Improved Video Anomaly Detection With Captions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5549-5559} }

Curriculum Learning for Data-Efficient Vision-Language Alignment
Tejas Srinivasan,
Xiang Ren,
Jesse Thomason
[pdf] [arXiv]
[bibtex]
@InProceedings{Srinivasan_2023_CVPR, author = {Srinivasan, Tejas and Ren, Xiang and Thomason, Jesse}, title = {Curriculum Learning for Data-Efficient Vision-Language Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5619-5624} }

BMRN: Boundary Matching and Refinement Network for Temporal Moment Localization With Natural Language
Muah Seol,
Jonghee Kim,
Jinyoung Moon
[pdf]
[bibtex]
@InProceedings{Seol_2023_CVPR, author = {Seol, Muah and Kim, Jonghee and Moon, Jinyoung}, title = {BMRN: Boundary Matching and Refinement Network for Temporal Moment Localization With Natural Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5571-5579} }

Making the V in Text-VQA Matter
Shamanthak Hegde,
Soumya Jahagirdar,
Shankar Gangisetty
[pdf]
[bibtex]
@InProceedings{Hegde_2023_CVPR, author = {Hegde, Shamanthak and Jahagirdar, Soumya and Gangisetty, Shankar}, title = {Making the V in Text-VQA Matter}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5580-5588} }

T2V2T: Text-to-Video-to-Text Fusion for Text-to-Video Retrieval
Jonghee Kim,
Youngwan Lee,
Jinyoung Moon
[pdf]
[bibtex]
@InProceedings{Kim_2023_CVPR, author = {Kim, Jonghee and Lee, Youngwan and Moon, Jinyoung}, title = {T2V2T: Text-to-Video-to-Text Fusion for Text-to-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5613-5618} }

CLIP-Guided Vision-Language Pre-Training for Question Answering in 3D Scenes
Maria Parelli,
Alexandros Delitzas,
Nikolas Hars,
Georgios Vlassis,
Sotirios Anagnostidis,
Gregor Bachmann,
Thomas Hofmann
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parelli_2023_CVPR, author = {Parelli, Maria and Delitzas, Alexandros and Hars, Nikolas and Vlassis, Georgios and Anagnostidis, Sotirios and Bachmann, Gregor and Hofmann, Thomas}, title = {CLIP-Guided Vision-Language Pre-Training for Question Answering in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5607-5612} }

Weakly Supervised Visual Question Answer Generation
Charani Alampalle,
Shamanthak Hegde,
Soumya Jahagirdar,
Shankar Gangisetty
[pdf]
[bibtex]
@InProceedings{Alampalle_2023_CVPR, author = {Alampalle, Charani and Hegde, Shamanthak and Jahagirdar, Soumya and Gangisetty, Shankar}, title = {Weakly Supervised Visual Question Answer Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5589-5597} }

Improving Language-Supervised Object Detection With Linguistic Structure Analysis
Arushi Rai,
Adriana Kovashka
[pdf]
[bibtex]
@InProceedings{Rai_2023_CVPR, author = {Rai, Arushi and Kovashka, Adriana}, title = {Improving Language-Supervised Object Detection With Linguistic Structure Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5560-5570} }

Visual Semantic Relatedness Dataset for Image Captioning
Ahmed Sabir,
Francesc Moreno-Noguer,
Lluís Padró
[pdf]
[bibtex]
@InProceedings{Sabir_2023_CVPR, author = {Sabir, Ahmed and Moreno-Noguer, Francesc and Padr\'o, Llu{\'\i}s}, title = {Visual Semantic Relatedness Dataset for Image Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5598-5606} }