@inproceedings{kojima-etal-2020-learned,
title = "What is Learned in Visually Grounded Neural Syntax Acquisition",
author = "Kojima, Noriyuki and
Averbuch-Elor, Hadar and
Rush, Alexander and
Artzi, Yoav",
editor = "Jurafsky, Dan and
Chai, Joyce and
Schluter, Natalie and
Tetreault, Joel",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.acl-main.234",
doi = "10.18653/v1/2020.acl-main.234",
pages = "2615--2635",
abstract = "Visual features are a promising signal for learning bootstrap textual models. However, blackbox learning models make it difficult to isolate the specific contribution of visual components. In this analysis, we consider the case study of the Visually Grounded Neural Syntax Learner (Shi et al., 2019), a recent approach for learning syntax from a visual training signal. By constructing simplified versions of the model, we isolate the core factors that yield the model{'}s strong performance. Contrary to what the model might be capable of learning, we find significantly less expressive versions produce similar predictions and perform just as well, or even better. We also find that a simple lexical signal of noun concreteness plays the main role in the model{'}s predictions as opposed to more complex syntactic reasoning.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kojima-etal-2020-learned">
<titleInfo>
<title>What is Learned in Visually Grounded Neural Syntax Acquisition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Noriyuki</namePart>
<namePart type="family">Kojima</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hadar</namePart>
<namePart type="family">Averbuch-Elor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Rush</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Artzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Jurafsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Chai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalie</namePart>
<namePart type="family">Schluter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Tetreault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Visual features are a promising signal for learning bootstrap textual models. However, blackbox learning models make it difficult to isolate the specific contribution of visual components. In this analysis, we consider the case study of the Visually Grounded Neural Syntax Learner (Shi et al., 2019), a recent approach for learning syntax from a visual training signal. By constructing simplified versions of the model, we isolate the core factors that yield the model’s strong performance. Contrary to what the model might be capable of learning, we find significantly less expressive versions produce similar predictions and perform just as well, or even better. We also find that a simple lexical signal of noun concreteness plays the main role in the model’s predictions as opposed to more complex syntactic reasoning.</abstract>
<identifier type="citekey">kojima-etal-2020-learned</identifier>
<identifier type="doi">10.18653/v1/2020.acl-main.234</identifier>
<location>
<url>https://aclanthology.org/2020.acl-main.234</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>2615</start>
<end>2635</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T What is Learned in Visually Grounded Neural Syntax Acquisition
%A Kojima, Noriyuki
%A Averbuch-Elor, Hadar
%A Rush, Alexander
%A Artzi, Yoav
%Y Jurafsky, Dan
%Y Chai, Joyce
%Y Schluter, Natalie
%Y Tetreault, Joel
%S Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F kojima-etal-2020-learned
%X Visual features are a promising signal for learning bootstrap textual models. However, blackbox learning models make it difficult to isolate the specific contribution of visual components. In this analysis, we consider the case study of the Visually Grounded Neural Syntax Learner (Shi et al., 2019), a recent approach for learning syntax from a visual training signal. By constructing simplified versions of the model, we isolate the core factors that yield the model’s strong performance. Contrary to what the model might be capable of learning, we find significantly less expressive versions produce similar predictions and perform just as well, or even better. We also find that a simple lexical signal of noun concreteness plays the main role in the model’s predictions as opposed to more complex syntactic reasoning.
%R 10.18653/v1/2020.acl-main.234
%U https://aclanthology.org/2020.acl-main.234
%U https://doi.org/10.18653/v1/2020.acl-main.234
%P 2615-2635
Markdown (Informal)
[What is Learned in Visually Grounded Neural Syntax Acquisition](https://aclanthology.org/2020.acl-main.234) (Kojima et al., ACL 2020)
ACL