@inproceedings{qiu-etal-2020-concise,
title = "A Concise Model for Multi-Criteria {C}hinese Word Segmentation with Transformer Encoder",
author = "Qiu, Xipeng and
Pei, Hengzhi and
Yan, Hang and
Huang, Xuanjing",
editor = "Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.findings-emnlp.260/",
doi = "10.18653/v1/2020.findings-emnlp.260",
pages = "2887--2897",
abstract = "Multi-criteria Chinese word segmentation (MCCWS) aims to exploit the relations among the multiple heterogeneous segmentation criteria and further improve the performance of each single criterion. Previous work usually regards MCCWS as different tasks, which are learned together under the multi-task learning framework. In this paper, we propose a concise but effective unified model for MCCWS, which is fully-shared for all the criteria. By leveraging the powerful ability of the Transformer encoder, the proposed unified model can segment Chinese text according to a unique criterion-token indicating the output criterion. Besides, the proposed unified model can segment both simplified and traditional Chinese and has an excellent transfer capability. Experiments on eight datasets with different criteria show that our model outperforms our single-criterion baseline model and other multi-criteria models. Source codes of this paper are available on Github."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="qiu-etal-2020-concise">
<titleInfo>
<title>A Concise Model for Multi-Criteria Chinese Word Segmentation with Transformer Encoder</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xipeng</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hengzhi</namePart>
<namePart type="family">Pei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hang</namePart>
<namePart type="family">Yan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2020</title>
</titleInfo>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Multi-criteria Chinese word segmentation (MCCWS) aims to exploit the relations among the multiple heterogeneous segmentation criteria and further improve the performance of each single criterion. Previous work usually regards MCCWS as different tasks, which are learned together under the multi-task learning framework. In this paper, we propose a concise but effective unified model for MCCWS, which is fully-shared for all the criteria. By leveraging the powerful ability of the Transformer encoder, the proposed unified model can segment Chinese text according to a unique criterion-token indicating the output criterion. Besides, the proposed unified model can segment both simplified and traditional Chinese and has an excellent transfer capability. Experiments on eight datasets with different criteria show that our model outperforms our single-criterion baseline model and other multi-criteria models. Source codes of this paper are available on Github.</abstract>
<identifier type="citekey">qiu-etal-2020-concise</identifier>
<identifier type="doi">10.18653/v1/2020.findings-emnlp.260</identifier>
<location>
<url>https://aclanthology.org/2020.findings-emnlp.260/</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>2887</start>
<end>2897</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Concise Model for Multi-Criteria Chinese Word Segmentation with Transformer Encoder
%A Qiu, Xipeng
%A Pei, Hengzhi
%A Yan, Hang
%A Huang, Xuanjing
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Findings of the Association for Computational Linguistics: EMNLP 2020
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F qiu-etal-2020-concise
%X Multi-criteria Chinese word segmentation (MCCWS) aims to exploit the relations among the multiple heterogeneous segmentation criteria and further improve the performance of each single criterion. Previous work usually regards MCCWS as different tasks, which are learned together under the multi-task learning framework. In this paper, we propose a concise but effective unified model for MCCWS, which is fully-shared for all the criteria. By leveraging the powerful ability of the Transformer encoder, the proposed unified model can segment Chinese text according to a unique criterion-token indicating the output criterion. Besides, the proposed unified model can segment both simplified and traditional Chinese and has an excellent transfer capability. Experiments on eight datasets with different criteria show that our model outperforms our single-criterion baseline model and other multi-criteria models. Source codes of this paper are available on Github.
%R 10.18653/v1/2020.findings-emnlp.260
%U https://aclanthology.org/2020.findings-emnlp.260/
%U https://doi.org/10.18653/v1/2020.findings-emnlp.260
%P 2887-2897
Markdown (Informal)
[A Concise Model for Multi-Criteria Chinese Word Segmentation with Transformer Encoder](https://aclanthology.org/2020.findings-emnlp.260/) (Qiu et al., Findings 2020)
ACL