@inproceedings{3406fd15d80b4a92b4b00cdd1f56fbd4,
title = "Structured matching for phrase localization",
abstract = "In this paper we introduce a new approach to phrase localization: grounding phrases in sentences to image regions. We propose a structured matching of phrases and regions that encourages the semantic relations between phrases to agree with the visual relations between regions. We formulate structured matching as a discrete optimization problem and relax it to a linear program. We use neural networks to embed regions and phrases into vectors, which then define the similarities (matching weights) between regions and phrases. We integrate structured matching with neural networks to enable end-to-end training. Experiments on Flickr30K Entities demonstrate the empirical effectiveness of our approach.",
keywords = "Language, Vision",
author = "Mingzhe Wang and Mahmoud Azab and Noriyuki Kojima and Rada Mihalcea and Jia Deng",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing AG 2016.; 14th European Conference on Computer Vision, ECCV 2016 ; Conference date: 08-10-2016 Through 16-10-2016",
year = "2016",
doi = "10.1007/978-3-319-46484-8_42",
language = "English (US)",
isbn = "9783319464831",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "696--711",
editor = "Bastian Leibe and Jiri Matas and Nicu Sebe and Max Welling",
booktitle = "Computer Vision - 14th European Conference, ECCV 2016, Proceedings",
address = "Germany",
}