@inproceedings{sharif-etal-2018-learning,
title = "Learning-based Composite Metrics for Improved Caption Evaluation",
author = "Sharif, Naeha and
White, Lyndon and
Bennamoun, Mohammed and
Ali Shah, Syed Afaq",
editor = "Shwartz, Vered and
Tabassum, Jeniya and
Voigt, Rob and
Che, Wanxiang and
de Marneffe, Marie-Catherine and
Nissim, Malvina",
booktitle = "Proceedings of {ACL} 2018, Student Research Workshop",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P18-3003",
doi = "10.18653/v1/P18-3003",
pages = "14--20",
abstract = "The evaluation of image caption quality is a challenging task, which requires the assessment of two main aspects in a caption: adequacy and fluency. These quality aspects can be judged using a combination of several linguistic features. However, most of the current image captioning metrics focus only on specific linguistic facets, such as the lexical or semantic, and fail to meet a satisfactory level of correlation with human judgements at the sentence-level. We propose a learning-based framework to incorporate the scores of a set of lexical and semantic metrics as features, to capture the adequacy and fluency of captions at different linguistic levels. Our experimental results demonstrate that composite metrics draw upon the strengths of stand-alone measures to yield improved correlation and accuracy.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sharif-etal-2018-learning">
<titleInfo>
<title>Learning-based Composite Metrics for Improved Caption Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Naeha</namePart>
<namePart type="family">Sharif</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lyndon</namePart>
<namePart type="family">White</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="family">Bennamoun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Syed</namePart>
<namePart type="given">Afaq</namePart>
<namePart type="family">Ali Shah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of ACL 2018, Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vered</namePart>
<namePart type="family">Shwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeniya</namePart>
<namePart type="family">Tabassum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rob</namePart>
<namePart type="family">Voigt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malvina</namePart>
<namePart type="family">Nissim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The evaluation of image caption quality is a challenging task, which requires the assessment of two main aspects in a caption: adequacy and fluency. These quality aspects can be judged using a combination of several linguistic features. However, most of the current image captioning metrics focus only on specific linguistic facets, such as the lexical or semantic, and fail to meet a satisfactory level of correlation with human judgements at the sentence-level. We propose a learning-based framework to incorporate the scores of a set of lexical and semantic metrics as features, to capture the adequacy and fluency of captions at different linguistic levels. Our experimental results demonstrate that composite metrics draw upon the strengths of stand-alone measures to yield improved correlation and accuracy.</abstract>
<identifier type="citekey">sharif-etal-2018-learning</identifier>
<identifier type="doi">10.18653/v1/P18-3003</identifier>
<location>
<url>https://aclanthology.org/P18-3003</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>14</start>
<end>20</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning-based Composite Metrics for Improved Caption Evaluation
%A Sharif, Naeha
%A White, Lyndon
%A Bennamoun, Mohammed
%A Ali Shah, Syed Afaq
%Y Shwartz, Vered
%Y Tabassum, Jeniya
%Y Voigt, Rob
%Y Che, Wanxiang
%Y de Marneffe, Marie-Catherine
%Y Nissim, Malvina
%S Proceedings of ACL 2018, Student Research Workshop
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne, Australia
%F sharif-etal-2018-learning
%X The evaluation of image caption quality is a challenging task, which requires the assessment of two main aspects in a caption: adequacy and fluency. These quality aspects can be judged using a combination of several linguistic features. However, most of the current image captioning metrics focus only on specific linguistic facets, such as the lexical or semantic, and fail to meet a satisfactory level of correlation with human judgements at the sentence-level. We propose a learning-based framework to incorporate the scores of a set of lexical and semantic metrics as features, to capture the adequacy and fluency of captions at different linguistic levels. Our experimental results demonstrate that composite metrics draw upon the strengths of stand-alone measures to yield improved correlation and accuracy.
%R 10.18653/v1/P18-3003
%U https://aclanthology.org/P18-3003
%U https://doi.org/10.18653/v1/P18-3003
%P 14-20
Markdown (Informal)
[Learning-based Composite Metrics for Improved Caption Evaluation](https://aclanthology.org/P18-3003) (Sharif et al., ACL 2018)
ACL