@inproceedings{michel-etal-2017-geometry,
title = "Does the Geometry of Word Embeddings Help Document Classification? A Case Study on Persistent Homology-Based Representations",
author = "Michel, Paul and
Ravichander, Abhilasha and
Rijhwani, Shruti",
editor = "Blunsom, Phil and
Bordes, Antoine and
Cho, Kyunghyun and
Cohen, Shay and
Dyer, Chris and
Grefenstette, Edward and
Hermann, Karl Moritz and
Rimell, Laura and
Weston, Jason and
Yih, Scott",
booktitle = "Proceedings of the 2nd Workshop on Representation Learning for {NLP}",
month = aug,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-2628",
doi = "10.18653/v1/W17-2628",
pages = "235--240",
abstract = "We investigate the pertinence of methods from algebraic topology for text data analysis. These methods enable the development of mathematically-principled isometric-invariant mappings from a set of vectors to a document embedding, which is stable with respect to the geometry of the document in the selected metric space. In this work, we evaluate the utility of these topology-based document representations in traditional NLP tasks, specifically document clustering and sentiment classification. We find that the embeddings do not benefit text analysis. In fact, performance is worse than simple techniques like tf-idf, indicating that the geometry of the document does not provide enough variability for classification on the basis of topic or sentiment in the chosen datasets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="michel-etal-2017-geometry">
<titleInfo>
<title>Does the Geometry of Word Embeddings Help Document Classification? A Case Study on Persistent Homology-Based Representations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Michel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhilasha</namePart>
<namePart type="family">Ravichander</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Rijhwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Representation Learning for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Phil</namePart>
<namePart type="family">Blunsom</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Bordes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyunghyun</namePart>
<namePart type="family">Cho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shay</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Dyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edward</namePart>
<namePart type="family">Grefenstette</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karl</namePart>
<namePart type="given">Moritz</namePart>
<namePart type="family">Hermann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Rimell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="family">Weston</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We investigate the pertinence of methods from algebraic topology for text data analysis. These methods enable the development of mathematically-principled isometric-invariant mappings from a set of vectors to a document embedding, which is stable with respect to the geometry of the document in the selected metric space. In this work, we evaluate the utility of these topology-based document representations in traditional NLP tasks, specifically document clustering and sentiment classification. We find that the embeddings do not benefit text analysis. In fact, performance is worse than simple techniques like tf-idf, indicating that the geometry of the document does not provide enough variability for classification on the basis of topic or sentiment in the chosen datasets.</abstract>
<identifier type="citekey">michel-etal-2017-geometry</identifier>
<identifier type="doi">10.18653/v1/W17-2628</identifier>
<location>
<url>https://aclanthology.org/W17-2628</url>
</location>
<part>
<date>2017-08</date>
<extent unit="page">
<start>235</start>
<end>240</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Does the Geometry of Word Embeddings Help Document Classification? A Case Study on Persistent Homology-Based Representations
%A Michel, Paul
%A Ravichander, Abhilasha
%A Rijhwani, Shruti
%Y Blunsom, Phil
%Y Bordes, Antoine
%Y Cho, Kyunghyun
%Y Cohen, Shay
%Y Dyer, Chris
%Y Grefenstette, Edward
%Y Hermann, Karl Moritz
%Y Rimell, Laura
%Y Weston, Jason
%Y Yih, Scott
%S Proceedings of the 2nd Workshop on Representation Learning for NLP
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver, Canada
%F michel-etal-2017-geometry
%X We investigate the pertinence of methods from algebraic topology for text data analysis. These methods enable the development of mathematically-principled isometric-invariant mappings from a set of vectors to a document embedding, which is stable with respect to the geometry of the document in the selected metric space. In this work, we evaluate the utility of these topology-based document representations in traditional NLP tasks, specifically document clustering and sentiment classification. We find that the embeddings do not benefit text analysis. In fact, performance is worse than simple techniques like tf-idf, indicating that the geometry of the document does not provide enough variability for classification on the basis of topic or sentiment in the chosen datasets.
%R 10.18653/v1/W17-2628
%U https://aclanthology.org/W17-2628
%U https://doi.org/10.18653/v1/W17-2628
%P 235-240
Markdown (Informal)
[Does the Geometry of Word Embeddings Help Document Classification? A Case Study on Persistent Homology-Based Representations](https://aclanthology.org/W17-2628) (Michel et al., RepL4NLP 2017)
ACL