{"id":"https://openalex.org/W4375869205","doi":"https://doi.org/10.1109/icassp49357.2023.10096228","title":"UML: A Universal Monolingual Output Layer For Multilingual Asr","display_name":"UML: A Universal Monolingual Output Layer For Multilingual Asr","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375869205","doi":"https://doi.org/10.1109/icassp49357.2023.10096228"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096228","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.1109/icassp49357.2023.10096228","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100460206","display_name":"Chao Zhang","orcid":"https://orcid.org/0000-0002-7730-5131"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chao Zhang","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100374448","display_name":"Bo Li","orcid":"https://orcid.org/0000-0002-6711-3603"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bo Li","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070513394","display_name":"Tara N. Sainath","orcid":"https://orcid.org/0000-0002-4126-6556"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tara N. Sainath","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032931723","display_name":"Trevor Strohman","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Trevor Strohman","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001306222","display_name":"Shuo-Yiin Chang","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shuo-Yiin Chang","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.824,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.999754,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":78,"max":84},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9995,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8710176},{"id":"https://openalex.org/C61423126","wikidata":"https://www.wikidata.org/wiki/Q187432","display_name":"Scripting language","level":2,"score":0.71060926},{"id":"https://openalex.org/C145644426","wikidata":"https://www.wikidata.org/wiki/Q169411","display_name":"Unified Modeling Language","level":3,"score":0.6691216},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.6386401},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.6156269},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5511812},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.47625566},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.40653455},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.36283624},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35344738},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.32716092},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.15520212},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1447475},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096228","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2302.11186","pdf_url":"https://arxiv.org/pdf/2302.11186","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096228","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Quality education","id":"https://metadata.un.org/sdg/4","score":0.71}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":34,"referenced_works":["https://openalex.org/W1506752962","https://openalex.org/W1526974435","https://openalex.org/W1978660892","https://openalex.org/W1992912377","https://openalex.org/W2019973879","https://openalex.org/W2025198378","https://openalex.org/W2067093722","https://openalex.org/W2106440210","https://openalex.org/W2121879602","https://openalex.org/W2786835190","https://openalex.org/W2799800213","https://openalex.org/W2962704885","https://openalex.org/W2962784628","https://openalex.org/W2963414781","https://openalex.org/W2963431393","https://openalex.org/W2963979492","https://openalex.org/W2964309797","https://openalex.org/W2972417954","https://openalex.org/W3015877095","https://openalex.org/W3094242422","https://openalex.org/W3096032230","https://openalex.org/W3096215352","https://openalex.org/W3137976833","https://openalex.org/W3160201895","https://openalex.org/W3161375121","https://openalex.org/W3163203022","https://openalex.org/W3178647810","https://openalex.org/W3195874849","https://openalex.org/W4210463634","https://openalex.org/W4221151615","https://openalex.org/W4225295099","https://openalex.org/W4297841565","https://openalex.org/W4297841598","https://openalex.org/W4319862468"],"related_works":["https://openalex.org/W4394391052","https://openalex.org/W4384345686","https://openalex.org/W4242899858","https://openalex.org/W2780629932","https://openalex.org/W262455470","https://openalex.org/W2577782152","https://openalex.org/W2547618384","https://openalex.org/W2357937784","https://openalex.org/W2145834781","https://openalex.org/W2046765700"],"abstract_inverted_index":{"Word-piece":[0],"models":[1],"(WPMs)":[2],"are":[3],"commonly":[4],"used":[5],"subword":[6],"units":[7],"in":[8,23,82,97],"state-of-the-art":[9],"end-to-end":[10],"automatic":[11],"speech":[12],"recognition":[13],"(ASR)":[14],"systems.":[15],"For":[16],"multilingual":[17,28,130],"ASR,":[18],"due":[19],"to":[20,41,55,95],"the":[21,31,92,98,106,109,121],"differences":[22],"written":[24],"scripts":[25],"across":[26,89],"languages,":[27],"WPMs":[29],"bring":[30],"challenges":[32],"of":[33,60,100,108,123],"having":[34],"overly":[35],"large":[36],"output":[37,52,62,71,86,102],"layers":[38],"and":[39,80,128],"scaling":[40],"more":[42],"languages.":[43,90],"In":[44],"this":[45],"work,":[46],"we":[47],"propose":[48],"a":[49,83],"universal":[50],"monolingual":[51,85],"layer":[53,87],"(UML)":[54],"address":[56],"such":[57],"problems.":[58],"Instead":[59],"one":[61,66,76],"node":[63,72,103],"for":[64,77,126],"only":[65],"WPM,":[67],"UML":[68,93,125],"re-associates":[69],"each":[70,78,101],"with":[73],"multiple":[74],"WPMs,":[75],"language,":[79],"results":[81,113],"smaller":[84],"shared":[88],"Consequently,":[91],"enables":[94],"switch":[96],"interpretation":[99],"depending":[104],"on":[105,114],"language":[107],"input":[110],"speech.":[111],"Experimental":[112],"an":[115],"11-language":[116],"voice":[117],"search":[118],"task":[119],"demonstrated":[120],"feasibility":[122],"using":[124],"high-quality":[127],"high-efficiency":[129],"streaming":[131],"ASR.":[132]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4375869205","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-01-01T22:35:43.119259","created_date":"2023-05-10"}