@inproceedings{ceolin-2021-comparing,
title = "Comparing the Performance of {CNN}s and Shallow Models for Language Identification",
author = "Ceolin, Andrea",
editor = {Zampieri, Marcos and
Nakov, Preslav and
Ljube{\v{s}}i{\'c}, Nikola and
Tiedemann, J{\"o}rg and
Scherrer, Yves and
Jauhiainen, Tommi},
booktitle = "Proceedings of the Eighth Workshop on NLP for Similar Languages, Varieties and Dialects",
month = apr,
year = "2021",
address = "Kiyv, Ukraine",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.vardial-1.12",
pages = "102--112",
abstract = {In this work we compare the performance of convolutional neural networks and shallow models on three out of the four language identification shared tasks proposed in the VarDial Evaluation Campaign 2021. In our experiments, convolutional neural networks and shallow models yielded comparable performance in the Romanian Dialect Identification (RDI) and the Dravidian Language Identification (DLI) shared tasks, after the training data was augmented, while an ensemble of support vector machines and Na{\"\i}ve Bayes models was the best performing model in the Uralic Language Identification (ULI) task. While the deep learning models did not achieve state-of-the-art performance at the tasks and tended to overfit the data, the ensemble method was one of two methods that beat the existing baseline for the first track of the ULI shared task.},
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ceolin-2021-comparing">
<titleInfo>
<title>Comparing the Performance of CNNs and Shallow Models for Language Identification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andrea</namePart>
<namePart type="family">Ceolin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Workshop on NLP for Similar Languages, Varieties and Dialects</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommi</namePart>
<namePart type="family">Jauhiainen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Kiyv, Ukraine</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work we compare the performance of convolutional neural networks and shallow models on three out of the four language identification shared tasks proposed in the VarDial Evaluation Campaign 2021. In our experiments, convolutional neural networks and shallow models yielded comparable performance in the Romanian Dialect Identification (RDI) and the Dravidian Language Identification (DLI) shared tasks, after the training data was augmented, while an ensemble of support vector machines and Naïve Bayes models was the best performing model in the Uralic Language Identification (ULI) task. While the deep learning models did not achieve state-of-the-art performance at the tasks and tended to overfit the data, the ensemble method was one of two methods that beat the existing baseline for the first track of the ULI shared task.</abstract>
<identifier type="citekey">ceolin-2021-comparing</identifier>
<location>
<url>https://aclanthology.org/2021.vardial-1.12</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>102</start>
<end>112</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Comparing the Performance of CNNs and Shallow Models for Language Identification
%A Ceolin, Andrea
%Y Zampieri, Marcos
%Y Nakov, Preslav
%Y Ljubešić, Nikola
%Y Tiedemann, Jörg
%Y Scherrer, Yves
%Y Jauhiainen, Tommi
%S Proceedings of the Eighth Workshop on NLP for Similar Languages, Varieties and Dialects
%D 2021
%8 April
%I Association for Computational Linguistics
%C Kiyv, Ukraine
%F ceolin-2021-comparing
%X In this work we compare the performance of convolutional neural networks and shallow models on three out of the four language identification shared tasks proposed in the VarDial Evaluation Campaign 2021. In our experiments, convolutional neural networks and shallow models yielded comparable performance in the Romanian Dialect Identification (RDI) and the Dravidian Language Identification (DLI) shared tasks, after the training data was augmented, while an ensemble of support vector machines and Naïve Bayes models was the best performing model in the Uralic Language Identification (ULI) task. While the deep learning models did not achieve state-of-the-art performance at the tasks and tended to overfit the data, the ensemble method was one of two methods that beat the existing baseline for the first track of the ULI shared task.
%U https://aclanthology.org/2021.vardial-1.12
%P 102-112
Markdown (Informal)
[Comparing the Performance of CNNs and Shallow Models for Language Identification](https://aclanthology.org/2021.vardial-1.12) (Ceolin, VarDial 2021)
ACL