@inproceedings{kumar-etal-2024-beyond,
title = "Beyond Common Words: Enhancing {ASR} Cross-Lingual Proper Noun Recognition Using Large Language Models",
author = "Kumar, Rishabh and
Ghosh, Sabyasachi and
Ramakrishnan, Ganesh",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.399/",
doi = "10.18653/v1/2024.findings-emnlp.399",
pages = "6821--6828",
abstract = "In this work, we address the challenge of cross-lingual proper noun recognition in automatic speech recognition (ASR), where proper nouns in an utterance may originate from a language different from the language in which the ASR system is trained. We enhance the performance of end-to-end ASR systems by instructing a large language model (LLM) to correct the ASR model`s predictions. The LLM`s context is augmented with a dictionary of cross-lingual words that are phonetically and graphemically similar to the potentially incorrect proper nouns in the ASR predictions. Our dictionary-based method DiP-ASR (Dictionary-based Prompting for Automatic Speech Recognition) significantly reduces word error rates compared to both the end-to-end ASR baseline and instruction-based prompting of the LLM without the dictionary across cross-lingual proper noun recognition tasks involving three secondary languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kumar-etal-2024-beyond">
<titleInfo>
<title>Beyond Common Words: Enhancing ASR Cross-Lingual Proper Noun Recognition Using Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rishabh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sabyasachi</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ganesh</namePart>
<namePart type="family">Ramakrishnan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, we address the challenge of cross-lingual proper noun recognition in automatic speech recognition (ASR), where proper nouns in an utterance may originate from a language different from the language in which the ASR system is trained. We enhance the performance of end-to-end ASR systems by instructing a large language model (LLM) to correct the ASR model‘s predictions. The LLM‘s context is augmented with a dictionary of cross-lingual words that are phonetically and graphemically similar to the potentially incorrect proper nouns in the ASR predictions. Our dictionary-based method DiP-ASR (Dictionary-based Prompting for Automatic Speech Recognition) significantly reduces word error rates compared to both the end-to-end ASR baseline and instruction-based prompting of the LLM without the dictionary across cross-lingual proper noun recognition tasks involving three secondary languages.</abstract>
<identifier type="citekey">kumar-etal-2024-beyond</identifier>
<identifier type="doi">10.18653/v1/2024.findings-emnlp.399</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.399/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>6821</start>
<end>6828</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Beyond Common Words: Enhancing ASR Cross-Lingual Proper Noun Recognition Using Large Language Models
%A Kumar, Rishabh
%A Ghosh, Sabyasachi
%A Ramakrishnan, Ganesh
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F kumar-etal-2024-beyond
%X In this work, we address the challenge of cross-lingual proper noun recognition in automatic speech recognition (ASR), where proper nouns in an utterance may originate from a language different from the language in which the ASR system is trained. We enhance the performance of end-to-end ASR systems by instructing a large language model (LLM) to correct the ASR model‘s predictions. The LLM‘s context is augmented with a dictionary of cross-lingual words that are phonetically and graphemically similar to the potentially incorrect proper nouns in the ASR predictions. Our dictionary-based method DiP-ASR (Dictionary-based Prompting for Automatic Speech Recognition) significantly reduces word error rates compared to both the end-to-end ASR baseline and instruction-based prompting of the LLM without the dictionary across cross-lingual proper noun recognition tasks involving three secondary languages.
%R 10.18653/v1/2024.findings-emnlp.399
%U https://aclanthology.org/2024.findings-emnlp.399/
%U https://doi.org/10.18653/v1/2024.findings-emnlp.399
%P 6821-6828
Markdown (Informal)
[Beyond Common Words: Enhancing ASR Cross-Lingual Proper Noun Recognition Using Large Language Models](https://aclanthology.org/2024.findings-emnlp.399/) (Kumar et al., Findings 2024)
ACL