@inproceedings{sadler-etal-2019-neural,
title = "Can Neural Image Captioning be Controlled via Forced Attention?",
author = "Sadler, Philipp and
Scheffler, Tatjana and
Schlangen, David",
editor = "van Deemter, Kees and
Lin, Chenghua and
Takamura, Hiroya",
booktitle = "Proceedings of the 12th International Conference on Natural Language Generation",
month = oct # "{--}" # nov,
year = "2019",
address = "Tokyo, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-8653",
doi = "10.18653/v1/W19-8653",
pages = "427--431",
abstract = "Learned dynamic weighting of the conditioning signal (attention) has been shown to improve neural language generation in a variety of settings. The weights applied when generating a particular output sequence have also been viewed as providing a potentially explanatory insight in the internal workings of the generator. In this paper, we reverse the direction of this connection and ask whether through the control of the attention of the model we can control its output. Specifically, we take a standard neural image captioning model that uses attention, and fix the attention to predetermined areas in the image. We evaluate whether the resulting output is more likely to mention the class of the object in that area than the normally generated caption. We introduce three effective methods to control the attention and find that these are producing expected results in up to 27.43{\%} of the cases.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sadler-etal-2019-neural">
<titleInfo>
<title>Can Neural Image Captioning be Controlled via Forced Attention?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Sadler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tatjana</namePart>
<namePart type="family">Scheffler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Schlangen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-oct–nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th International Conference on Natural Language Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kees</namePart>
<namePart type="family">van Deemter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenghua</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroya</namePart>
<namePart type="family">Takamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tokyo, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Learned dynamic weighting of the conditioning signal (attention) has been shown to improve neural language generation in a variety of settings. The weights applied when generating a particular output sequence have also been viewed as providing a potentially explanatory insight in the internal workings of the generator. In this paper, we reverse the direction of this connection and ask whether through the control of the attention of the model we can control its output. Specifically, we take a standard neural image captioning model that uses attention, and fix the attention to predetermined areas in the image. We evaluate whether the resulting output is more likely to mention the class of the object in that area than the normally generated caption. We introduce three effective methods to control the attention and find that these are producing expected results in up to 27.43% of the cases.</abstract>
<identifier type="citekey">sadler-etal-2019-neural</identifier>
<identifier type="doi">10.18653/v1/W19-8653</identifier>
<location>
<url>https://aclanthology.org/W19-8653</url>
</location>
<part>
<date>2019-oct–nov</date>
<extent unit="page">
<start>427</start>
<end>431</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can Neural Image Captioning be Controlled via Forced Attention?
%A Sadler, Philipp
%A Scheffler, Tatjana
%A Schlangen, David
%Y van Deemter, Kees
%Y Lin, Chenghua
%Y Takamura, Hiroya
%S Proceedings of the 12th International Conference on Natural Language Generation
%D 2019
%8 oct–nov
%I Association for Computational Linguistics
%C Tokyo, Japan
%F sadler-etal-2019-neural
%X Learned dynamic weighting of the conditioning signal (attention) has been shown to improve neural language generation in a variety of settings. The weights applied when generating a particular output sequence have also been viewed as providing a potentially explanatory insight in the internal workings of the generator. In this paper, we reverse the direction of this connection and ask whether through the control of the attention of the model we can control its output. Specifically, we take a standard neural image captioning model that uses attention, and fix the attention to predetermined areas in the image. We evaluate whether the resulting output is more likely to mention the class of the object in that area than the normally generated caption. We introduce three effective methods to control the attention and find that these are producing expected results in up to 27.43% of the cases.
%R 10.18653/v1/W19-8653
%U https://aclanthology.org/W19-8653
%U https://doi.org/10.18653/v1/W19-8653
%P 427-431
Markdown (Informal)
[Can Neural Image Captioning be Controlled via Forced Attention?](https://aclanthology.org/W19-8653) (Sadler et al., INLG 2019)
ACL