@inproceedings{chen-etal-2017-line,
title = "On-line Dialogue Policy Learning with Companion Teaching",
author = "Chen, Lu and
Yang, Runzhe and
Chang, Cheng and
Ye, Zihao and
Zhou, Xiang and
Yu, Kai",
editor = "Lapata, Mirella and
Blunsom, Phil and
Koller, Alexander",
booktitle = "Proceedings of the 15th Conference of the {E}uropean Chapter of the Association for Computational Linguistics: Volume 2, Short Papers",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/E17-2032",
pages = "198--204",
abstract = "On-line dialogue policy learning is the key for building evolvable conversational agent in real world scenarios. Poor initial policy can easily lead to bad user experience and consequently fail to attract sufficient users for policy training. A novel framework, companion teaching, is proposed to include a human teacher in the dialogue policy training loop to address the cold start problem. Here, dialogue policy is trained using not only user{'}s reward, but also teacher{'}s example action as well as estimated immediate reward at turn level. Simulation experiments showed that, with small number of human teaching dialogues, the proposed approach can effectively improve user experience at the beginning and smoothly lead to good performance with more user interaction data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2017-line">
<titleInfo>
<title>On-line Dialogue Policy Learning with Companion Teaching</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Runzhe</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheng</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zihao</namePart>
<namePart type="family">Ye</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mirella</namePart>
<namePart type="family">Lapata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Phil</namePart>
<namePart type="family">Blunsom</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Koller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>On-line dialogue policy learning is the key for building evolvable conversational agent in real world scenarios. Poor initial policy can easily lead to bad user experience and consequently fail to attract sufficient users for policy training. A novel framework, companion teaching, is proposed to include a human teacher in the dialogue policy training loop to address the cold start problem. Here, dialogue policy is trained using not only user’s reward, but also teacher’s example action as well as estimated immediate reward at turn level. Simulation experiments showed that, with small number of human teaching dialogues, the proposed approach can effectively improve user experience at the beginning and smoothly lead to good performance with more user interaction data.</abstract>
<identifier type="citekey">chen-etal-2017-line</identifier>
<location>
<url>https://aclanthology.org/E17-2032</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>198</start>
<end>204</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On-line Dialogue Policy Learning with Companion Teaching
%A Chen, Lu
%A Yang, Runzhe
%A Chang, Cheng
%A Ye, Zihao
%A Zhou, Xiang
%A Yu, Kai
%Y Lapata, Mirella
%Y Blunsom, Phil
%Y Koller, Alexander
%S Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F chen-etal-2017-line
%X On-line dialogue policy learning is the key for building evolvable conversational agent in real world scenarios. Poor initial policy can easily lead to bad user experience and consequently fail to attract sufficient users for policy training. A novel framework, companion teaching, is proposed to include a human teacher in the dialogue policy training loop to address the cold start problem. Here, dialogue policy is trained using not only user’s reward, but also teacher’s example action as well as estimated immediate reward at turn level. Simulation experiments showed that, with small number of human teaching dialogues, the proposed approach can effectively improve user experience at the beginning and smoothly lead to good performance with more user interaction data.
%U https://aclanthology.org/E17-2032
%P 198-204
Markdown (Informal)
[On-line Dialogue Policy Learning with Companion Teaching](https://aclanthology.org/E17-2032) (Chen et al., EACL 2017)
ACL
- Lu Chen, Runzhe Yang, Cheng Chang, Zihao Ye, Xiang Zhou, and Kai Yu. 2017. On-line Dialogue Policy Learning with Companion Teaching. In Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers, pages 198–204, Valencia, Spain. Association for Computational Linguistics.