@inproceedings{mukherjee-etal-2017-creating,
title = "Creating {POS} Tagging and Dependency Parsing Experts via Topic Modeling",
author = {Mukherjee, Atreyee and
K{\"u}bler, Sandra and
Scheutz, Matthias},
editor = "Lapata, Mirella and
Blunsom, Phil and
Koller, Alexander",
booktitle = "Proceedings of the 15th Conference of the {E}uropean Chapter of the Association for Computational Linguistics: Volume 1, Long Papers",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/E17-1033",
pages = "347--355",
abstract = "Part of speech (POS) taggers and dependency parsers tend to work well on homogeneous datasets but their performance suffers on datasets containing data from different genres. In our current work, we investigate how to create POS tagging and dependency parsing experts for heterogeneous data by employing topic modeling. We create topic models (using Latent Dirichlet Allocation) to determine genres from a heterogeneous dataset and then train an expert for each of the genres. Our results show that the topic modeling experts reach substantial improvements when compared to the general versions. For dependency parsing, the improvement reaches 2 percent points over the full training baseline when we use two topics.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mukherjee-etal-2017-creating">
<titleInfo>
<title>Creating POS Tagging and Dependency Parsing Experts via Topic Modeling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atreyee</namePart>
<namePart type="family">Mukherjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandra</namePart>
<namePart type="family">Kübler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Scheutz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mirella</namePart>
<namePart type="family">Lapata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Phil</namePart>
<namePart type="family">Blunsom</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Koller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Part of speech (POS) taggers and dependency parsers tend to work well on homogeneous datasets but their performance suffers on datasets containing data from different genres. In our current work, we investigate how to create POS tagging and dependency parsing experts for heterogeneous data by employing topic modeling. We create topic models (using Latent Dirichlet Allocation) to determine genres from a heterogeneous dataset and then train an expert for each of the genres. Our results show that the topic modeling experts reach substantial improvements when compared to the general versions. For dependency parsing, the improvement reaches 2 percent points over the full training baseline when we use two topics.</abstract>
<identifier type="citekey">mukherjee-etal-2017-creating</identifier>
<location>
<url>https://aclanthology.org/E17-1033</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>347</start>
<end>355</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Creating POS Tagging and Dependency Parsing Experts via Topic Modeling
%A Mukherjee, Atreyee
%A Kübler, Sandra
%A Scheutz, Matthias
%Y Lapata, Mirella
%Y Blunsom, Phil
%Y Koller, Alexander
%S Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F mukherjee-etal-2017-creating
%X Part of speech (POS) taggers and dependency parsers tend to work well on homogeneous datasets but their performance suffers on datasets containing data from different genres. In our current work, we investigate how to create POS tagging and dependency parsing experts for heterogeneous data by employing topic modeling. We create topic models (using Latent Dirichlet Allocation) to determine genres from a heterogeneous dataset and then train an expert for each of the genres. Our results show that the topic modeling experts reach substantial improvements when compared to the general versions. For dependency parsing, the improvement reaches 2 percent points over the full training baseline when we use two topics.
%U https://aclanthology.org/E17-1033
%P 347-355
Markdown (Informal)
[Creating POS Tagging and Dependency Parsing Experts via Topic Modeling](https://aclanthology.org/E17-1033) (Mukherjee et al., EACL 2017)
ACL