@inproceedings{hancock-etal-2019-learning,
title = "Learning from Dialogue after Deployment: Feed Yourself, Chatbot!",
author = "Hancock, Braden and
Bordes, Antoine and
Mazare, Pierre-Emmanuel and
Weston, Jason",
editor = "Korhonen, Anna and
Traum, David and
M{\`a}rquez, Llu{\'\i}s",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P19-1358",
doi = "10.18653/v1/P19-1358",
pages = "3667--3684",
abstract = "The majority of conversations a dialogue agent sees over its lifetime occur after it has already been trained and deployed, leaving a vast store of potential training signal untapped. In this work, we propose the self-feeding chatbot, a dialogue agent with the ability to extract new training examples from the conversations it participates in. As our agent engages in conversation, it also estimates user satisfaction in its responses. When the conversation appears to be going well, the user{'}s responses become new training examples to imitate. When the agent believes it has made a mistake, it asks for feedback; learning to predict the feedback that will be given improves the chatbot{'}s dialogue abilities further. On the PersonaChat chit-chat dataset with over 131k training examples, we find that learning from dialogue with a self-feeding chatbot significantly improves performance, regardless of the amount of traditional supervision.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hancock-etal-2019-learning">
<titleInfo>
<title>Learning from Dialogue after Deployment: Feed Yourself, Chatbot!</title>
</titleInfo>
<name type="personal">
<namePart type="given">Braden</namePart>
<namePart type="family">Hancock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Bordes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre-Emmanuel</namePart>
<namePart type="family">Mazare</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="family">Weston</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Traum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Màrquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The majority of conversations a dialogue agent sees over its lifetime occur after it has already been trained and deployed, leaving a vast store of potential training signal untapped. In this work, we propose the self-feeding chatbot, a dialogue agent with the ability to extract new training examples from the conversations it participates in. As our agent engages in conversation, it also estimates user satisfaction in its responses. When the conversation appears to be going well, the user’s responses become new training examples to imitate. When the agent believes it has made a mistake, it asks for feedback; learning to predict the feedback that will be given improves the chatbot’s dialogue abilities further. On the PersonaChat chit-chat dataset with over 131k training examples, we find that learning from dialogue with a self-feeding chatbot significantly improves performance, regardless of the amount of traditional supervision.</abstract>
<identifier type="citekey">hancock-etal-2019-learning</identifier>
<identifier type="doi">10.18653/v1/P19-1358</identifier>
<location>
<url>https://aclanthology.org/P19-1358</url>
</location>
<part>
<date>2019-07</date>
<extent unit="page">
<start>3667</start>
<end>3684</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning from Dialogue after Deployment: Feed Yourself, Chatbot!
%A Hancock, Braden
%A Bordes, Antoine
%A Mazare, Pierre-Emmanuel
%A Weston, Jason
%Y Korhonen, Anna
%Y Traum, David
%Y Màrquez, Lluís
%S Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics
%D 2019
%8 July
%I Association for Computational Linguistics
%C Florence, Italy
%F hancock-etal-2019-learning
%X The majority of conversations a dialogue agent sees over its lifetime occur after it has already been trained and deployed, leaving a vast store of potential training signal untapped. In this work, we propose the self-feeding chatbot, a dialogue agent with the ability to extract new training examples from the conversations it participates in. As our agent engages in conversation, it also estimates user satisfaction in its responses. When the conversation appears to be going well, the user’s responses become new training examples to imitate. When the agent believes it has made a mistake, it asks for feedback; learning to predict the feedback that will be given improves the chatbot’s dialogue abilities further. On the PersonaChat chit-chat dataset with over 131k training examples, we find that learning from dialogue with a self-feeding chatbot significantly improves performance, regardless of the amount of traditional supervision.
%R 10.18653/v1/P19-1358
%U https://aclanthology.org/P19-1358
%U https://doi.org/10.18653/v1/P19-1358
%P 3667-3684
Markdown (Informal)
[Learning from Dialogue after Deployment: Feed Yourself, Chatbot!](https://aclanthology.org/P19-1358) (Hancock et al., ACL 2019)
ACL