@inproceedings{neubig-etal-2019-compare,
title = "compare-mt: A Tool for Holistic Comparison of Language Generation Systems",
author = "Neubig, Graham and
Dou, Zi-Yi and
Hu, Junjie and
Michel, Paul and
Pruthi, Danish and
Wang, Xinyi",
editor = "Ammar, Waleed and
Louis, Annie and
Mostafazadeh, Nasrin",
booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics (Demonstrations)",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N19-4007",
doi = "10.18653/v1/N19-4007",
pages = "35--41",
abstract = "In this paper, we describe compare-mt, a tool for holistic analysis and comparison of the results of systems for language generation tasks such as machine translation. The main goal of the tool is to give the user a high-level and coherent view of the salient differences between systems that can then be used to guide further analysis or system improvement. It implements a number of tools to do so, such as analysis of accuracy of generation of particular types of words, bucketed histograms of sentence accuracies or counts based on salient characteristics, and extraction of characteristic n-grams for each system. It also has a number of advanced features such as use of linguistic labels, source side data, or comparison of log likelihoods for probabilistic models, and also aims to be easily extensible by users to new types of analysis. compare-mt is a pure-Python open source package, that has already proven useful to generate analyses that have been used in our published papers. Demo Video: \url{https://youtu.be/NyJEQT7t2CA}",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="neubig-etal-2019-compare">
<titleInfo>
<title>compare-mt: A Tool for Holistic Comparison of Language Generation Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zi-Yi</namePart>
<namePart type="family">Dou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junjie</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Michel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danish</namePart>
<namePart type="family">Pruthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinyi</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics (Demonstrations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Waleed</namePart>
<namePart type="family">Ammar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Annie</namePart>
<namePart type="family">Louis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nasrin</namePart>
<namePart type="family">Mostafazadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we describe compare-mt, a tool for holistic analysis and comparison of the results of systems for language generation tasks such as machine translation. The main goal of the tool is to give the user a high-level and coherent view of the salient differences between systems that can then be used to guide further analysis or system improvement. It implements a number of tools to do so, such as analysis of accuracy of generation of particular types of words, bucketed histograms of sentence accuracies or counts based on salient characteristics, and extraction of characteristic n-grams for each system. It also has a number of advanced features such as use of linguistic labels, source side data, or comparison of log likelihoods for probabilistic models, and also aims to be easily extensible by users to new types of analysis. compare-mt is a pure-Python open source package, that has already proven useful to generate analyses that have been used in our published papers. Demo Video: https://youtu.be/NyJEQT7t2CA</abstract>
<identifier type="citekey">neubig-etal-2019-compare</identifier>
<identifier type="doi">10.18653/v1/N19-4007</identifier>
<location>
<url>https://aclanthology.org/N19-4007</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>35</start>
<end>41</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T compare-mt: A Tool for Holistic Comparison of Language Generation Systems
%A Neubig, Graham
%A Dou, Zi-Yi
%A Hu, Junjie
%A Michel, Paul
%A Pruthi, Danish
%A Wang, Xinyi
%Y Ammar, Waleed
%Y Louis, Annie
%Y Mostafazadeh, Nasrin
%S Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics (Demonstrations)
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, Minnesota
%F neubig-etal-2019-compare
%X In this paper, we describe compare-mt, a tool for holistic analysis and comparison of the results of systems for language generation tasks such as machine translation. The main goal of the tool is to give the user a high-level and coherent view of the salient differences between systems that can then be used to guide further analysis or system improvement. It implements a number of tools to do so, such as analysis of accuracy of generation of particular types of words, bucketed histograms of sentence accuracies or counts based on salient characteristics, and extraction of characteristic n-grams for each system. It also has a number of advanced features such as use of linguistic labels, source side data, or comparison of log likelihoods for probabilistic models, and also aims to be easily extensible by users to new types of analysis. compare-mt is a pure-Python open source package, that has already proven useful to generate analyses that have been used in our published papers. Demo Video: https://youtu.be/NyJEQT7t2CA
%R 10.18653/v1/N19-4007
%U https://aclanthology.org/N19-4007
%U https://doi.org/10.18653/v1/N19-4007
%P 35-41
Markdown (Informal)
[compare-mt: A Tool for Holistic Comparison of Language Generation Systems](https://aclanthology.org/N19-4007) (Neubig et al., NAACL 2019)
ACL
- Graham Neubig, Zi-Yi Dou, Junjie Hu, Paul Michel, Danish Pruthi, and Xinyi Wang. 2019. compare-mt: A Tool for Holistic Comparison of Language Generation Systems. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics (Demonstrations), pages 35–41, Minneapolis, Minnesota. Association for Computational Linguistics.