@inproceedings{andrews-etal-2017-bayesian,
title = "{B}ayesian Modeling of Lexical Resources for Low-Resource Settings",
author = "Andrews, Nicholas and
Dredze, Mark and
Van Durme, Benjamin and
Eisner, Jason",
editor = "Barzilay, Regina and
Kan, Min-Yen",
booktitle = "Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P17-1095",
doi = "10.18653/v1/P17-1095",
pages = "1029--1039",
abstract = "Lexical resources such as dictionaries and gazetteers are often used as auxiliary data for tasks such as part-of-speech induction and named-entity recognition. However, discriminative training with lexical features requires annotated data to reliably estimate the lexical feature weights and may result in overfitting the lexical features at the expense of features which generalize better. In this paper, we investigate a more robust approach: we stipulate that the lexicon is the result of an assumed generative process. Practically, this means that we may treat the lexical resources as observations under the proposed generative model. The lexical resources provide training data for the generative model without requiring separate data to estimate lexical feature weights. We evaluate the proposed approach in two settings: part-of-speech induction and low-resource named-entity recognition.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="andrews-etal-2017-bayesian">
<titleInfo>
<title>Bayesian Modeling of Lexical Resources for Low-Resource Settings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="family">Andrews</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Dredze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Van Durme</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="family">Eisner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Regina</namePart>
<namePart type="family">Barzilay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Lexical resources such as dictionaries and gazetteers are often used as auxiliary data for tasks such as part-of-speech induction and named-entity recognition. However, discriminative training with lexical features requires annotated data to reliably estimate the lexical feature weights and may result in overfitting the lexical features at the expense of features which generalize better. In this paper, we investigate a more robust approach: we stipulate that the lexicon is the result of an assumed generative process. Practically, this means that we may treat the lexical resources as observations under the proposed generative model. The lexical resources provide training data for the generative model without requiring separate data to estimate lexical feature weights. We evaluate the proposed approach in two settings: part-of-speech induction and low-resource named-entity recognition.</abstract>
<identifier type="citekey">andrews-etal-2017-bayesian</identifier>
<identifier type="doi">10.18653/v1/P17-1095</identifier>
<location>
<url>https://aclanthology.org/P17-1095</url>
</location>
<part>
<date>2017-07</date>
<extent unit="page">
<start>1029</start>
<end>1039</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bayesian Modeling of Lexical Resources for Low-Resource Settings
%A Andrews, Nicholas
%A Dredze, Mark
%A Van Durme, Benjamin
%A Eisner, Jason
%Y Barzilay, Regina
%Y Kan, Min-Yen
%S Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2017
%8 July
%I Association for Computational Linguistics
%C Vancouver, Canada
%F andrews-etal-2017-bayesian
%X Lexical resources such as dictionaries and gazetteers are often used as auxiliary data for tasks such as part-of-speech induction and named-entity recognition. However, discriminative training with lexical features requires annotated data to reliably estimate the lexical feature weights and may result in overfitting the lexical features at the expense of features which generalize better. In this paper, we investigate a more robust approach: we stipulate that the lexicon is the result of an assumed generative process. Practically, this means that we may treat the lexical resources as observations under the proposed generative model. The lexical resources provide training data for the generative model without requiring separate data to estimate lexical feature weights. We evaluate the proposed approach in two settings: part-of-speech induction and low-resource named-entity recognition.
%R 10.18653/v1/P17-1095
%U https://aclanthology.org/P17-1095
%U https://doi.org/10.18653/v1/P17-1095
%P 1029-1039
Markdown (Informal)
[Bayesian Modeling of Lexical Resources for Low-Resource Settings](https://aclanthology.org/P17-1095) (Andrews et al., ACL 2017)
ACL
- Nicholas Andrews, Mark Dredze, Benjamin Van Durme, and Jason Eisner. 2017. Bayesian Modeling of Lexical Resources for Low-Resource Settings. In Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 1029–1039, Vancouver, Canada. Association for Computational Linguistics.