{"id":"https://openalex.org/W2972808286","doi":"https://doi.org/10.21437/interspeech.2019-2599","title":"Analyzing Phonetic and Graphemic Representations in End-to-End Automatic Speech Recognition","display_name":"Analyzing Phonetic and Graphemic Representations in End-to-End Automatic Speech Recognition","publication_year":2019,"publication_date":"2019-09-13","ids":{"openalex":"https://openalex.org/W2972808286","doi":"https://doi.org/10.21437/interspeech.2019-2599","mag":"2972808286"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-2599","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/1907.04224","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051184573","display_name":"Yonatan Belinkov","orcid":null},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yonatan Belinkov","raw_affiliation_strings":["Harvard John A. Paulson School of Engineering and Applied Sciences, Cambridge, MA, USA","MIT Computer Science and Artificial Intelligence Laboratory, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"Harvard John A. Paulson School of Engineering and Applied Sciences, Cambridge, MA, USA","institution_ids":[]},{"raw_affiliation_string":"MIT Computer Science and Artificial Intelligence Laboratory, Cambridge, MA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100693435","display_name":"Ahmed Ali","orcid":"https://orcid.org/0000-0002-9186-7544"},"institutions":[{"id":"https://openalex.org/I1301390666","display_name":"Qatar Airways (Qatar)","ror":"https://ror.org/01hx00y13","country_code":"QA","type":"company","lineage":["https://openalex.org/I1301390666"]}],"countries":["QA"],"is_corresponding":false,"raw_author_name":"Ahmed Ali","raw_affiliation_strings":["Qatar Computing Research Institute, HBKU, Doha, Qatar"],"affiliations":[{"raw_affiliation_string":"Qatar Computing Research Institute, HBKU, Doha, Qatar","institution_ids":["https://openalex.org/I1301390666"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112758056","display_name":"James Glass","orcid":null},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"James Glass","raw_affiliation_strings":["MIT Computer Science and Artificial Intelligence Laboratory, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"MIT Computer Science and Artificial Intelligence Laboratory, Cambridge, MA, USA","institution_ids":[]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.19,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":29,"citation_normalized_percentile":{"value":0.881774,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"81","last_page":"85"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9985,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9983,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pronunciation","display_name":"Pronunciation","score":0.7998318},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.59241587}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8454206},{"id":"https://openalex.org/C2780844864","wikidata":"https://www.wikidata.org/wiki/Q184377","display_name":"Pronunciation","level":2,"score":0.7998318},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6316635},{"id":"https://openalex.org/C2778121359","wikidata":"https://www.wikidata.org/wiki/Q8096","display_name":"Lexicon","level":2,"score":0.60402715},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.59241587},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5811313},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5086561},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.49681595},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.41083544},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.11503616},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-2599","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/1907.04224","pdf_url":"http://arxiv.org/pdf/1907.04224","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/1907.04224","pdf_url":"http://arxiv.org/pdf/1907.04224","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"display_name":"Quality education","id":"https://metadata.un.org/sdg/4","score":0.77}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":30,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1736701665","https://openalex.org/W1961523962","https://openalex.org/W1990501283","https://openalex.org/W2193413348","https://openalex.org/W2250357346","https://openalex.org/W2295676751","https://openalex.org/W2327501763","https://openalex.org/W2402146185","https://openalex.org/W2511774920","https://openalex.org/W2515741950","https://openalex.org/W2586148577","https://openalex.org/W2586568968","https://openalex.org/W2748318213","https://openalex.org/W2752168051","https://openalex.org/W2756127416","https://openalex.org/W2758849341","https://openalex.org/W2888760383","https://openalex.org/W2888995442","https://openalex.org/W2899073901","https://openalex.org/W2900092534","https://openalex.org/W2906152891","https://openalex.org/W2953190524","https://openalex.org/W2962753610","https://openalex.org/W2962799131","https://openalex.org/W2963077089","https://openalex.org/W2963430224","https://openalex.org/W2963540920","https://openalex.org/W2964060510","https://openalex.org/W3105148948"],"related_works":["https://openalex.org/W4220683390","https://openalex.org/W2776838583","https://openalex.org/W2423284978","https://openalex.org/W2359469050","https://openalex.org/W2355751417","https://openalex.org/W2350724007","https://openalex.org/W2183593636","https://openalex.org/W2083922162","https://openalex.org/W2000075989","https://openalex.org/W1607396156"],"abstract_inverted_index":{"End-to-end":[0],"neural":[1,56,141],"network":[2,57,76],"systems":[3],"for":[4,26],"automatic":[5],"speech":[6],"recognition":[7],"(ASR)":[8],"are":[9,59,133],"trained":[10],"from":[11],"acoustic":[12,27],"features":[13],"to":[14,18,67],"text":[15],"transcriptions.In":[16],"contrast":[17],"modular":[19],"ASR":[20,95],"systems,":[21],"which":[22],"contain":[23],"separately-trained":[24],"components":[25],"modeling,":[28,33],"pronunciation":[29],"lexicon,":[30],"and":[31,41,77,109,121,123],"language":[32],"the":[34,43,48,52,69,75,88,98,139],"end-to-end":[35,94],"paradigm":[36],"is":[37,63],"both":[38],"conceptually":[39],"simpler":[40],"has":[42],"potential":[44],"benefit":[45],"of":[46,71,74,103,138],"training":[47],"entire":[49],"system":[50],"on":[51],"end":[53],"task.However,":[54],"such":[55],"models":[58],"more":[60],"opaque:":[61],"it":[62,80],"not":[64],"clear":[65],"how":[66,130],"interpret":[68],"role":[70],"different":[72,114,131,136],"parts":[73],"what":[78],"information":[79],"learns":[81],"during":[82],"training.In":[83],"this":[84],"paper,":[85],"we":[86],"analyze":[87],"learned":[89],"internal":[90],"representations":[91],"in":[92,101,129,135],"an":[93],"model.We":[96],"evaluate":[97],"representation":[99],"quality":[100],"terms":[102],"several":[104],"classification":[105],"tasks,":[106],"comparing":[107],"phonemes":[108],"graphemes,":[110],"as":[111,113],"well":[112],"articulatory":[115],"features.We":[116],"study":[117],"two":[118],"languages":[119],"(English":[120],"Arabic)":[122],"three":[124],"datasets,":[125],"finding":[126],"remarkable":[127],"consistency":[128],"properties":[132],"represented":[134],"layers":[137],"deep":[140],"network.":[142]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2972808286","counts_by_year":[{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":1}],"updated_date":"2025-01-22T10:40:25.040328","created_date":"2019-09-19"}