Workshop on Open-Domain Reasoning Under Multi-Modal Settings
TEVAD: Improved Video Anomaly Detection With Captions-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2023_CVPR, author = {Chen, Weiling and Ma, Keng Teck and Yew, Zi Jian and Hur, Minhoe and Khoo, David Aik-Aun}, title = {TEVAD: Improved Video Anomaly Detection With Captions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5549-5559} }
Curriculum Learning for Data-Efficient Vision-Language Alignment-
[pdf]
[arXiv]
[bibtex]@InProceedings{Srinivasan_2023_CVPR, author = {Srinivasan, Tejas and Ren, Xiang and Thomason, Jesse}, title = {Curriculum Learning for Data-Efficient Vision-Language Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5619-5624} }
BMRN: Boundary Matching and Refinement Network for Temporal Moment Localization With Natural Language-
[pdf]
[bibtex]@InProceedings{Seol_2023_CVPR, author = {Seol, Muah and Kim, Jonghee and Moon, Jinyoung}, title = {BMRN: Boundary Matching and Refinement Network for Temporal Moment Localization With Natural Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5571-5579} }
Making the V in Text-VQA Matter-
[pdf]
[bibtex]@InProceedings{Hegde_2023_CVPR, author = {Hegde, Shamanthak and Jahagirdar, Soumya and Gangisetty, Shankar}, title = {Making the V in Text-VQA Matter}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5580-5588} }
T2V2T: Text-to-Video-to-Text Fusion for Text-to-Video Retrieval-
[pdf]
[bibtex]@InProceedings{Kim_2023_CVPR, author = {Kim, Jonghee and Lee, Youngwan and Moon, Jinyoung}, title = {T2V2T: Text-to-Video-to-Text Fusion for Text-to-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5613-5618} }
CLIP-Guided Vision-Language Pre-Training for Question Answering in 3D Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parelli_2023_CVPR, author = {Parelli, Maria and Delitzas, Alexandros and Hars, Nikolas and Vlassis, Georgios and Anagnostidis, Sotirios and Bachmann, Gregor and Hofmann, Thomas}, title = {CLIP-Guided Vision-Language Pre-Training for Question Answering in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5607-5612} }
Weakly Supervised Visual Question Answer Generation-
[pdf]
[bibtex]@InProceedings{Alampalle_2023_CVPR, author = {Alampalle, Charani and Hegde, Shamanthak and Jahagirdar, Soumya and Gangisetty, Shankar}, title = {Weakly Supervised Visual Question Answer Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5589-5597} }
Improving Language-Supervised Object Detection With Linguistic Structure Analysis-
[pdf]
[bibtex]@InProceedings{Rai_2023_CVPR, author = {Rai, Arushi and Kovashka, Adriana}, title = {Improving Language-Supervised Object Detection With Linguistic Structure Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5560-5570} }
Visual Semantic Relatedness Dataset for Image Captioning-
[pdf]
[bibtex]@InProceedings{Sabir_2023_CVPR, author = {Sabir, Ahmed and Moreno-Noguer, Francesc and Padr\'o, Llu{\'\i}s}, title = {Visual Semantic Relatedness Dataset for Image Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {5598-5606} }