@inproceedings{huzaifah-etal-2023-i2rs,
title = "{I}2{R}{'}s End-to-End Speech Translation System for {IWSLT} 2023 Offline Shared Task",
author = "Huzaifah, Muhammad and
Min Tan, Kye and
Duan, Richeng",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Carpuat, Marine",
booktitle = "Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.iwslt-1.16",
doi = "10.18653/v1/2023.iwslt-1.16",
pages = "202--210",
abstract = "This paper describes I2R{'}s submission to the offline speech translation track for IWSLT 2023. We focus on an end-to-end approach for translation from English audio to German text, one of the three available language directions in this year{'}s edition. The I2R system leverages on pretrained models that have been exposed to large-scale audio and text data for our base model. We introduce several stages of additional pretraining followed by fine-tuning to adapt the system for the downstream speech translation task. The strategy is supplemented by other techniques such as data augmentation, domain tagging, knowledge distillation, and model ensemble, among others. We evaluate the system on several publicly available test sets for comparison.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="huzaifah-etal-2023-i2rs">
<titleInfo>
<title>I2R’s End-to-End Speech Translation System for IWSLT 2023 Offline Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">Huzaifah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kye</namePart>
<namePart type="family">Min Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richeng</namePart>
<namePart type="family">Duan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes I2R’s submission to the offline speech translation track for IWSLT 2023. We focus on an end-to-end approach for translation from English audio to German text, one of the three available language directions in this year’s edition. The I2R system leverages on pretrained models that have been exposed to large-scale audio and text data for our base model. We introduce several stages of additional pretraining followed by fine-tuning to adapt the system for the downstream speech translation task. The strategy is supplemented by other techniques such as data augmentation, domain tagging, knowledge distillation, and model ensemble, among others. We evaluate the system on several publicly available test sets for comparison.</abstract>
<identifier type="citekey">huzaifah-etal-2023-i2rs</identifier>
<identifier type="doi">10.18653/v1/2023.iwslt-1.16</identifier>
<location>
<url>https://aclanthology.org/2023.iwslt-1.16</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>202</start>
<end>210</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T I2R’s End-to-End Speech Translation System for IWSLT 2023 Offline Shared Task
%A Huzaifah, Muhammad
%A Min Tan, Kye
%A Duan, Richeng
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Carpuat, Marine
%S Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada (in-person and online)
%F huzaifah-etal-2023-i2rs
%X This paper describes I2R’s submission to the offline speech translation track for IWSLT 2023. We focus on an end-to-end approach for translation from English audio to German text, one of the three available language directions in this year’s edition. The I2R system leverages on pretrained models that have been exposed to large-scale audio and text data for our base model. We introduce several stages of additional pretraining followed by fine-tuning to adapt the system for the downstream speech translation task. The strategy is supplemented by other techniques such as data augmentation, domain tagging, knowledge distillation, and model ensemble, among others. We evaluate the system on several publicly available test sets for comparison.
%R 10.18653/v1/2023.iwslt-1.16
%U https://aclanthology.org/2023.iwslt-1.16
%U https://doi.org/10.18653/v1/2023.iwslt-1.16
%P 202-210
Markdown (Informal)
[I2R’s End-to-End Speech Translation System for IWSLT 2023 Offline Shared Task](https://aclanthology.org/2023.iwslt-1.16) (Huzaifah et al., IWSLT 2023)
ACL