Sight and Sound


Learning to Separate Object Sounds by Watching Unlabeled Video
Ruohan Gao,
Rogerio S. Feris,
Kristen Grauman
[pdf] [arXiv]
[bibtex]
@InProceedings{Gao_2018_CVPR_Workshops,
author = {Gao, Ruohan and Feris, Rogerio S. and Grauman, Kristen},
title = {Learning to Separate Object Sounds by Watching Unlabeled Video},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2018}
}

Visual to Sound: Generating Natural Sound for Videos in the Wild
Yipin Zhou,
Zhaowen Wang,
Chen Fang,
Trung Bui,
Tamara L. Berg
[pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2018_CVPR_Workshops,
author = {Zhou, Yipin and Wang, Zhaowen and Fang, Chen and Bui, Trung and Berg, Tamara L.},
title = {Visual to Sound: Generating Natural Sound for Videos in the Wild},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2018}
}

Fast Forwarding Egocentric Videos by Listening and Watching
Vinicius S,
Furlan,
Ruzena Bajcsy,
Erickson R. Nascimento
[pdf] [arXiv]
[bibtex]
@InProceedings{S_2018_CVPR_Workshops,
author = {S, Vinicius and Furlan, and Bajcsy, Ruzena and Nascimento, Erickson R.},
title = {Fast Forwarding Egocentric Videos by Listening and Watching},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2018}
}

On Learning Association of Sound Source and Visual Scenes
Arda Senocak,
Tae-Hyun Oh,
Junsik Kim,
Ming-Hsuan Yang,
In So Kweon
[pdf]
[bibtex]
@InProceedings{Senocak_2018_CVPR_Workshops,
author = {Senocak, Arda and Oh, Tae-Hyun and Kim, Junsik and Yang, Ming-Hsuan and So Kweon, In},
title = {On Learning Association of Sound Source and Visual Scenes},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2018}
}

Image Generation Associated With Music Data
Yue Qiu,
Hirokatsu Kataoka
[pdf]
[bibtex]
@InProceedings{Qiu_2018_CVPR_Workshops,
author = {Qiu, Yue and Kataoka, Hirokatsu},
title = {Image Generation Associated With Music Data},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2018}
}

Semantic Speech Retrieval With a Visually Grounded Model of Untranscribed Speech
Herman Kamper,
Gregory Shakhnarovich,
Karen Livescu
[pdf] [arXiv]
[bibtex]
@InProceedings{Kamper_2018_CVPR_Workshops,
author = {Kamper, Herman and Shakhnarovich, Gregory and Livescu, Karen},
title = {Semantic Speech Retrieval With a Visually Grounded Model of Untranscribed Speech},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2018}
}

Weakly Supervised Representation Learning for Unsynchronized Audio-Visual Events
Sanjeel Parekh,
Slim Essid,
Alexey Ozerov,
Ngoc Q. K. Duong,
Patrick Perez,
Gael Richard
[pdf] [arXiv]
[bibtex]
@InProceedings{Parekh_2018_CVPR_Workshops,
author = {Parekh, Sanjeel and Essid, Slim and Ozerov, Alexey and Duong, Ngoc Q. K. and Perez, Patrick and Richard, Gael},
title = {Weakly Supervised Representation Learning for Unsynchronized Audio-Visual Events},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2018}
}

The Excitement of Sports: Automatic Highlights Using Audio/Visual Cues
Michele Merler,
Dhiraj Joshi,
Khoi-Nguyen C. Mac,
Quoc-Bao Nguyen,
Stephen Hammer,
John Kent,
Jinjun Xiong,
Minh N. Do,
John R. Smith,
Rogerio S. Feris
[pdf]
[bibtex]
@InProceedings{Merler_2018_CVPR_Workshops,
author = {Merler, Michele and Joshi, Dhiraj and Mac, Khoi-Nguyen C. and Nguyen, Quoc-Bao and Hammer, Stephen and Kent, John and Xiong, Jinjun and Do, Minh N. and Smith, John R. and Feris, Rogerio S.},
title = {The Excitement of Sports: Automatic Highlights Using Audio/Visual Cues},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2018}
}

A Multimodal Approach to Mapping Soundscapes
Tawfiq Salem,
Menghua Zhai,
Scott Workman,
Nathan Jacobs
[pdf]
[bibtex]
@InProceedings{Salem_2018_CVPR_Workshops,
author = {Salem, Tawfiq and Zhai, Menghua and Workman, Scott and Jacobs, Nathan},
title = {A Multimodal Approach to Mapping Soundscapes},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2018}
}

Multimodal Attention for Fusion of Audio and Spatiotemporal Features for Video Description
Chiori Hori,
Takaaki Hori,
Gordon Wichern,
Jue Wang,
Teng-Yok Lee,
Anoop Cherian,
Tim K. Marks
[pdf]
[bibtex]
@InProceedings{Hori_2018_CVPR_Workshops,
author = {Hori, Chiori and Hori, Takaaki and Wichern, Gordon and Wang, Jue and Lee, Teng-Yok and Cherian, Anoop and Marks, Tim K.},
title = {Multimodal Attention for Fusion of Audio and Spatiotemporal Features for Video Description},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2018}
}

Visual Rhythm and Beat
Abe Davis,
Maneesh Agrawala
[pdf]
[bibtex]
@InProceedings{Davis_2018_CVPR_Workshops,
author = {Davis, Abe and Agrawala, Maneesh},
title = {Visual Rhythm and Beat},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2018}
}

Inverting Audio-Visual Simulation for Shape and Material Perception
Zhoutong Zhang,
Jiajun Wu,
Qiujia Li,
Zhengjia Huang,
Joshua B. Tenenbaum,
William T. Freeman
[pdf]
[bibtex]
@InProceedings{Zhang_2018_CVPR_Workshops,
author = {Zhang, Zhoutong and Wu, Jiajun and Li, Qiujia and Huang, Zhengjia and Tenenbaum, Joshua B. and Freeman, William T.},
title = {Inverting Audio-Visual Simulation for Shape and Material Perception},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {June},
year = {2018}
}