Sight and Sound


Sound to Visual: Hierarchical Cross-Modal Talking Face Generation
Lele Chen,
Haitian Zheng,
Ross Maddox,
Zhiyao Duan,
Chenliang Xu
[pdf]
[bibtex]
@InProceedings{Chen_2019_CVPR_Workshops,
author = {Chen, Lele and Zheng, Haitian and Maddox, Ross and Duan, Zhiyao and Xu, Chenliang},
title = {Sound to Visual: Hierarchical Cross-Modal Talking Face Generation},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2019}
}

Audio-Visual Event Localization in the Wild
Yapeng Tian,
Jing Shi,
Bochen Li,
Zhiyao Duan,
Chenliang Xu
[pdf]
[bibtex]
@InProceedings{Tian_2019_CVPR_Workshops,
author = {Tian, Yapeng and Shi, Jing and Li, Bochen and Duan, Zhiyao and Xu, Chenliang},
title = {Audio-Visual Event Localization in the Wild},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2019}
}

Audio-Visual Interpretable and Controllable Video Captioning
Yapeng Tian,
Chenxiao Guan,
Goodman Justin,
Marc Moore,
Chenliang Xu
[pdf]
[bibtex]
@InProceedings{Tian_2019_CVPR_Workshops,
author = {Tian, Yapeng and Guan, Chenxiao and Justin, Goodman and Moore, Marc and Xu, Chenliang},
title = {Audio-Visual Interpretable and Controllable Video Captioning},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2019}
}

Reflection and Diffraction-Aware Sound Source Localization
Inkyu An,
Jung-Woo Choi,
Dinesh Manocha,
Sung-Eui Yoon
[pdf]
[bibtex]
@InProceedings{An_2019_CVPR_Workshops,
author = {An, Inkyu and Choi, Jung-Woo and Manocha, Dinesh and Yoon, Sung-Eui},
title = {Reflection and Diffraction-Aware Sound Source Localization},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2019}
}

Generating Video from Single Image and Sound
Yukitaka Tsuchiya,
Takahiro Itazuri,
Ryota Natsume,
Shintaro Yamamoto,
Takuya Kato,
Shigeo Morishima
[pdf]
[bibtex]
@InProceedings{Tsuchiya_2019_CVPR_Workshops,
author = {Tsuchiya, Yukitaka and Itazuri, Takahiro and Natsume, Ryota and Yamamoto, Shintaro and Kato, Takuya and Morishima, Shigeo},
title = {Generating Video from Single Image and Sound},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2019}
}

WAV2PIX: Speech-conditioned Face Generation using Generative Adversarial Networks
Amanda Cardoso Duarte,
Francisco Roldan,
Miquel Tubau,
Janna Escur,
Santiago Pascual,
Amaia Salvador,
Eva Mohedano,
Kevin McGuinness,
Jordi Torres,
Xavier Giro-i-Nieto
[pdf]
[bibtex]
@InProceedings{Duarte_2019_CVPR_Workshops,
author = {Cardoso Duarte, Amanda and Roldan, Francisco and Tubau, Miquel and Escur, Janna and Pascual, Santiago and Salvador, Amaia and Mohedano, Eva and McGuinness, Kevin and Torres, Jordi and Giro-i-Nieto, Xavier},
title = {WAV2PIX: Speech-conditioned Face Generation using Generative Adversarial Networks},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2019}
}

On Attention Modules for Audio-Visual Synchronization
Naji Khosravan,
Shervin Ardeshir,
Rohit Puri
[pdf]
[bibtex]
@InProceedings{Khosravan_2019_CVPR_Workshops,
author = {Khosravan, Naji and Ardeshir, Shervin and Puri, Rohit},
title = {On Attention Modules for Audio-Visual Synchronization},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2019}
}

Grounding Spoken Words in Unlabeled Video
Angie W Boggust,
Kartik Audhkhasi,
Dhiraj Joshi,
David Harwath,
Samuel Thomas,
Rogerio Feris,
Danny Gutfreund,
Yang Zhang,
Antonio Torralba,
Michael Picheny,
James Glass
[pdf]
[bibtex]
@InProceedings{Boggust_2019_CVPR_Workshops,
author = {W Boggust, Angie and Audhkhasi, Kartik and Joshi, Dhiraj and Harwath, David and Thomas, Samuel and Feris, Rogerio and Gutfreund, Danny and Zhang, Yang and Torralba, Antonio and Picheny, Michael and Glass, James},
title = {Grounding Spoken Words in Unlabeled Video},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2019}
}

A Neurorobotic Experiment for Crossmodal Conflict Resolution
German Parisi,
Pablo Barros,
Di Fu,
Sven Magg,
Haiyan Wu,
Xun Liu,
Stefan Wermter
[pdf]
[bibtex]
@InProceedings{Parisi_2019_CVPR_Workshops,
author = {Parisi, German and Barros, Pablo and Fu, Di and Magg, Sven and Wu, Haiyan and Liu, Xun and Wermter, Stefan},
title = {A Neurorobotic Experiment for Crossmodal Conflict Resolution},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2019}
}

End-to-End Speech-Driven Realistic Facial Animation with Temporal GANs
Konstantinos Vougioukas,
Stavros Petridis,
Maja Pantic
[pdf]
[bibtex]
@InProceedings{Vougioukas_2019_CVPR_Workshops,
author = {Vougioukas, Konstantinos and Petridis, Stavros and Pantic, Maja},
title = {End-to-End Speech-Driven Realistic Facial Animation with Temporal GANs},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2019}
}