Multimodal Learning and Applications
Radar Camera Fusion via Representation Learning in Autonomous Driving-
[pdf]
[arXiv]
[bibtex]@InProceedings{Dong_2021_CVPR, author = {Dong, Xu and Zhuang, Binnan and Mao, Yunxiang and Liu, Langechuan}, title = {Radar Camera Fusion via Representation Learning in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2021}, pages = {1672-1681} }
An Improved Attention for Visual Question Answering-
[pdf]
[arXiv]
[bibtex]@InProceedings{Rahman_2021_CVPR, author = {Rahman, Tanzila and Chou, Shih-Han and Sigal, Leonid and Carenini, Giuseppe}, title = {An Improved Attention for Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2021}, pages = {1653-1662} }
Private-Shared Disentangled Multimodal VAE for Learning of Latent Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2021_CVPR, author = {Lee, Mihee and Pavlovic, Vladimir}, title = {Private-Shared Disentangled Multimodal VAE for Learning of Latent Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2021}, pages = {1692-1700} }
Dealing With Missing Modalities in the Visual Question Answer-Difference Prediction Task Through Knowledge Distillation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cho_2021_CVPR, author = {Cho, Jae Won and Kim, Dong-Jin and Choi, Jinsoo and Jung, Yunjae and Kweon, In So}, title = {Dealing With Missing Modalities in the Visual Question Answer-Difference Prediction Task Through Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2021}, pages = {1592-1601} }
Self-Supervised Feature Learning by Cross-Modality and Cross-View Correspondences-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jing_2021_CVPR, author = {Jing, Longlong and Zhang, Ling and Tian, Yingli}, title = {Self-Supervised Feature Learning by Cross-Modality and Cross-View Correspondences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2021}, pages = {1581-1591} }
Target-Tailored Source-Transformation for Scene Graph Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liao_2021_CVPR, author = {Liao, Wentong and Lan, Cuiling and Yang, Michael Ying and Zeng, Wenjun and Rosenhahn, Bodo}, title = {Target-Tailored Source-Transformation for Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2021}, pages = {1663-1671} }
Beyond VQA: Generating Multi-Word Answers and Rationales to Visual Questions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dua_2021_CVPR, author = {Dua, Radhika and Kancheti, Sai Srinivas and Balasubramanian, Vineeth N}, title = {Beyond VQA: Generating Multi-Word Answers and Rationales to Visual Questions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2021}, pages = {1623-1632} }
Adaptive Intermediate Representations for Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kangaspunta_2021_CVPR, author = {Kangaspunta, Juhana and Piergiovanni, AJ and Jonschkowski, Rico and Ryoo, Michael and Angelova, Anelia}, title = {Adaptive Intermediate Representations for Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2021}, pages = {1602-1612} }
Exploring the Limits of Zero-Shot Learning - How Low Can You Go?-
[pdf]
[bibtex]@InProceedings{Dandu_2021_CVPR, author = {Dandu, Hemanth and Sharma, Karan and Bhandarkar, Suchendra M.}, title = {Exploring the Limits of Zero-Shot Learning - How Low Can You Go?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2021}, pages = {1710-1719} }
Progressive Knowledge-Embedded Unified Perceptual Parsing for Scene Understanding-
[pdf]
[bibtex]@InProceedings{Zheng_2021_CVPR, author = {Zheng, Wenbo and Yan, Lan and Wang, Fei-Yue and Gou, Chao}, title = {Progressive Knowledge-Embedded Unified Perceptual Parsing for Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2021}, pages = {1633-1642} }
APES: Audiovisual Person Search in Untrimmed Video-
[pdf]
[arXiv]
[bibtex]@InProceedings{Alcazar_2021_CVPR, author = {Alcazar, Juan Leon and Caba, Fabian and Mai, Long and Perazzi, Federico and Lee, Joon-Young and Arbelaez, Pablo and Ghanem, Bernard}, title = {APES: Audiovisual Person Search in Untrimmed Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2021}, pages = {1720-1729} }
Practical Cross-Modal Manifold Alignment for Robotic Grounded Language Learning-
[pdf]
[bibtex]@InProceedings{Nguyen_2021_CVPR, author = {Nguyen, Andre T. and Richards, Luke E. and Kebe, Gaoussou Youssouf and Raff, Edward and Darvish, Kasra and Ferraro, Frank and Matuszek, Cynthia}, title = {Practical Cross-Modal Manifold Alignment for Robotic Grounded Language Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2021}, pages = {1613-1622} }
Cross-Modal Speaker Verification and Recognition: A Multilingual Perspective-
[pdf]
[arXiv]
[bibtex]@InProceedings{Nawaz_2021_CVPR, author = {Nawaz, Shah and Saeed, Muhammad Saad and Morerio, Pietro and Mahmood, Arif and Gallo, Ignazio and Yousaf, Muhammad Haroon and Del Bue, Alessio}, title = {Cross-Modal Speaker Verification and Recognition: A Multilingual Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2021}, pages = {1682-1691} }
Using Text To Teach Image Retrieval-
[pdf]
[arXiv]
[bibtex]@InProceedings{Dong_2021_CVPR, author = {Dong, Haoyu and Wang, Ze and Qiu, Qiang and Sapiro, Guillermo}, title = {Using Text To Teach Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2021}, pages = {1643-1652} }
Editing Like Humans: A Contextual, Multimodal Framework for Automated Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Koorathota_2021_CVPR, author = {Koorathota, Sharath and Adelman, Patrick and Cotton, Kelly and Sajda, Paul}, title = {Editing Like Humans: A Contextual, Multimodal Framework for Automated Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2021}, pages = {1701-1709} }
3D Hand Pose Estimation via Aligned Latent Space Injection and Kinematic Losses-
[pdf]
[bibtex]@InProceedings{Stergioulas_2021_CVPR, author = {Stergioulas, Andreas and Chatzis, Theocharis and Konstantinidis, Dimitrios and Dimitropoulos, Kosmas and Daras, Petros}, title = {3D Hand Pose Estimation via Aligned Latent Space Injection and Kinematic Losses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2021}, pages = {1730-1739} }