6th Multi-Modal Learning and Applications Workshop
Multi Event Localization by Audio-Visual Fusion With Omnidirectional Camera and Microphone Array-
[pdf]
[bibtex]@InProceedings{Zheng_2023_CVPR, author = {Zheng, Wenru and Yoshihashi, Ryota and Kawakami, Rei and Sato, Ikuro and Kanezaki, Asako}, title = {Multi Event Localization by Audio-Visual Fusion With Omnidirectional Camera and Microphone Array}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {2566-2574} }
Exposing and Mitigating Spurious Correlations for Cross-Modal Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2023_CVPR, author = {Kim, Jae Myung and Koepke, A. Sophia and Schmid, Cordelia and Akata, Zeynep}, title = {Exposing and Mitigating Spurious Correlations for Cross-Modal Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {2585-2595} }
Adapting Grounded Visual Question Answering Models to Low Resource Languages-
[pdf]
[bibtex]@InProceedings{Wang_2023_CVPR, author = {Wang, Ying and Pfeiffer, Jonas and Carion, Nicolas and LeCun, Yann and Kamath, Aishwarya}, title = {Adapting Grounded Visual Question Answering Models to Low Resource Languages}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {2596-2605} }
The MONET Dataset: Multimodal Drone Thermal Dataset Recorded in Rural Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Riz_2023_CVPR, author = {Riz, Luigi and Caraffa, Andrea and Bortolon, Matteo and Mekhalfi, Mohamed Lamine and Boscaini, Davide and Moura, Andr\'e and Antunes, Jos\'e and Dias, Andr\'e and Silva, Hugo and Leonidou, Andreas and Constantinides, Christos and Keleshis, Christos and Abate, Dante and Poiesi, Fabio}, title = {The MONET Dataset: Multimodal Drone Thermal Dataset Recorded in Rural Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {2546-2554} }
SSGVS: Semantic Scene Graph-to-Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cong_2023_CVPR, author = {Cong, Yuren and Yi, Jinhui and Rosenhahn, Bodo and Yang, Michael Ying}, title = {SSGVS: Semantic Scene Graph-to-Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {2555-2565} }
TFRGAN: Leveraging Text Information for Blind Face Restoration With Extreme Degradation-
[pdf]
[bibtex]@InProceedings{Xie_2023_CVPR, author = {Xie, Chengxing and Ning, Qian and Dong, Weisheng and Shi, Guangming}, title = {TFRGAN: Leveraging Text Information for Blind Face Restoration With Extreme Degradation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {2535-2545} }
Dynamic Multimodal Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2023_CVPR, author = {Xue, Zihui and Marculescu, Radu}, title = {Dynamic Multimodal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {2575-2584} }
SEM-POS: Grammatically and Semantically Correct Video Captioning-
[pdf]
[bibtex]@InProceedings{Nadeem_2023_CVPR, author = {Nadeem, Asmar and Hilton, Adrian and Dawes, Robert and Thomas, Graham and Mustafa, Armin}, title = {SEM-POS: Grammatically and Semantically Correct Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {2606-2616} }
Robust Multiview Multimodal Driver Monitoring System Using Masked Multi-Head Self-Attention-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ma_2023_CVPR, author = {Ma, Yiming and Sanchez, Victor and Nikan, Soodeh and Upadhyay, Devesh and Atote, Bhushan and Guha, Tanaya}, title = {Robust Multiview Multimodal Driver Monitoring System Using Masked Multi-Head Self-Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2023}, pages = {2617-2625} }