@inproceedings{ali-etal-2014-advances,
title = "Advances in dialectal {A}rabic speech recognition: a study using {T}witter to improve {E}gyptian {ASR}",
author = "Ali, Ahmed and
Mubarak, Hamdy and
Vogel, Stephan",
editor = {Federico, Marcello and
St{\"u}ker, Sebastian and
Yvon, Fran{\c{c}}ois},
booktitle = "Proceedings of the 11th International Workshop on Spoken Language Translation: Papers",
month = dec # " 4-5",
year = "2014",
address = "Lake Tahoe, California",
url = "https://aclanthology.org/2014.iwslt-papers.1/",
pages = "156--162",
abstract = "This paper reports results in building an Egyptian Arabic speech recognition system as an example for under-resourced languages. We investigated different approaches to build the system using 10 hours for training the acoustic model, and results for both grapheme system and phoneme system using MADA. The phoneme-based system shows better results than the grapheme-based system. In this paper, we explore the use of tweets written in dialectal Arabic. Using 880K Egyptian tweets reduced the Out Of Vocabulary (OOV) rate from 15.1{\%} to 3.2{\%} and the WER from 59.6{\%} to 44.7{\%}, a relative gain 25{\%} in WER."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ali-etal-2014-advances">
<titleInfo>
<title>Advances in dialectal Arabic speech recognition: a study using Twitter to improve Egyptian ASR</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Ali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hamdy</namePart>
<namePart type="family">Mubarak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephan</namePart>
<namePart type="family">Vogel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-dec 4-5</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th International Workshop on Spoken Language Translation: Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Stüker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">François</namePart>
<namePart type="family">Yvon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<place>
<placeTerm type="text">Lake Tahoe, California</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper reports results in building an Egyptian Arabic speech recognition system as an example for under-resourced languages. We investigated different approaches to build the system using 10 hours for training the acoustic model, and results for both grapheme system and phoneme system using MADA. The phoneme-based system shows better results than the grapheme-based system. In this paper, we explore the use of tweets written in dialectal Arabic. Using 880K Egyptian tweets reduced the Out Of Vocabulary (OOV) rate from 15.1% to 3.2% and the WER from 59.6% to 44.7%, a relative gain 25% in WER.</abstract>
<identifier type="citekey">ali-etal-2014-advances</identifier>
<location>
<url>https://aclanthology.org/2014.iwslt-papers.1/</url>
</location>
<part>
<date>2014-dec 4-5</date>
<extent unit="page">
<start>156</start>
<end>162</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Advances in dialectal Arabic speech recognition: a study using Twitter to improve Egyptian ASR
%A Ali, Ahmed
%A Mubarak, Hamdy
%A Vogel, Stephan
%Y Federico, Marcello
%Y Stüker, Sebastian
%Y Yvon, François
%S Proceedings of the 11th International Workshop on Spoken Language Translation: Papers
%D 2014
%8 dec 4 5
%C Lake Tahoe, California
%F ali-etal-2014-advances
%X This paper reports results in building an Egyptian Arabic speech recognition system as an example for under-resourced languages. We investigated different approaches to build the system using 10 hours for training the acoustic model, and results for both grapheme system and phoneme system using MADA. The phoneme-based system shows better results than the grapheme-based system. In this paper, we explore the use of tweets written in dialectal Arabic. Using 880K Egyptian tweets reduced the Out Of Vocabulary (OOV) rate from 15.1% to 3.2% and the WER from 59.6% to 44.7%, a relative gain 25% in WER.
%U https://aclanthology.org/2014.iwslt-papers.1/
%P 156-162
Markdown (Informal)
[Advances in dialectal Arabic speech recognition: a study using Twitter to improve Egyptian ASR](https://aclanthology.org/2014.iwslt-papers.1/) (Ali et al., IWSLT 2014)
ACL