{"id":"https://openalex.org/W4223988178","doi":"https://doi.org/10.21437/interspeech.2022-10791","title":"A Unified Cascaded Encoder ASR Model for Dynamic Model Sizes","display_name":"A Unified Cascaded Encoder ASR Model for Dynamic Model Sizes","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4223988178","doi":"https://doi.org/10.21437/interspeech.2022-10791"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-10791","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2204.06164","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058886181","display_name":"Shaojin Ding","orcid":"https://orcid.org/0000-0002-2108-3111"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shaojin Ding","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101432591","display_name":"Weiran Wang","orcid":"https://orcid.org/0009-0000-0843-707X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang Weiran","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037644321","display_name":"Ding Zhao","orcid":"https://orcid.org/0000-0002-9400-8446"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ding Zhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070513394","display_name":"Tara N. Sainath","orcid":"https://orcid.org/0000-0002-4126-6556"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tara Sainath","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101319167","display_name":"Yanzhang He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yanzhang He","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062218486","display_name":"Robert David","orcid":"https://orcid.org/0000-0002-3244-5341"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Robert David","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008863473","display_name":"Rami Botros","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rami Botros","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100327839","display_name":"Xin Wang","orcid":"https://orcid.org/0000-0001-8246-0606"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xin Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085651536","display_name":"Rina Panigrahy\u200e","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rina Panigrahy\u200e","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026297587","display_name":"Qiao Liang","orcid":"https://orcid.org/0000-0003-4464-4644"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiao Liang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014088530","display_name":"Dongseong Hwang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dongseong Hwang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048236120","display_name":"Ian McGraw","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ian McGraw","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032640894","display_name":"Rohit Prabhavalkar","orcid":"https://orcid.org/0000-0001-5331-6058"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rohit Prabhavalkar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5032931723","display_name":"Trevor Strohman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Trevor Strohman","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.524,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.669771,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":88,"max":90},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9973,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9973,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9933,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9871,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7504883},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.681102},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.38575038},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33093685},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-10791","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2204.06164","pdf_url":"http://arxiv.org/pdf/2204.06164","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2204.06164","pdf_url":"http://arxiv.org/pdf/2204.06164","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.9,"id":"https://metadata.un.org/sdg/7"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2382290278","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2350741829","https://openalex.org/W2317200988","https://openalex.org/W2073681303","https://openalex.org/W2051487156"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,43],"propose":[4],"a":[5],"dynamic":[6,39],"cascaded":[7,40,110],"encoder":[8,41,73,111],"Automatic":[9],"Speech":[10],"Recognition":[11],"(ASR)":[12],"model,":[13,42],"which":[14],"unifies":[15,117],"models":[16,123],"for":[17,60],"different":[18],"deployment":[19,89],"scenarios.":[20],"Moreover,":[21],"the":[22,38,50,65,72,77,92,108,118,136],"model":[23,27,54,95,115],"can":[24],"significantly":[25],"reduce":[26],"size":[28,78,99,127],"and":[29,81,87,100,121],"power":[30,102],"consumption":[31,103],"without":[32],"loss":[33],"of":[34,52,79,139],"quality.":[35],"Namely,":[36],"with":[37,129],"explore":[44],"three":[45],"techniques":[46],"to":[47,70,84,107],"maximally":[48],"boost":[49],"performance":[51],"each":[53,61],"size:":[55],"1)":[56],"Use":[57,68],"separate":[58,141],"decoders":[59],"sub-model":[62],"while":[63,133],"sharing":[64],"encoders;":[66],"2)":[67],"funnel-pooling":[69],"improve":[71,85],"efficiency;":[74],"3)":[75],"Balance":[76],"causal":[80],"non-causal":[82],"encoders":[83],"quality":[86,131],"fit":[88],"constraints.":[90],"Overall,":[91],"proposed":[93],"large-medium":[94],"has":[96],"30%":[97],"smaller":[98],"reduces":[101],"by":[104],"33%,":[105],"compared":[106],"baseline":[109],"model.":[112],"The":[113],"triple-size":[114],"that":[116],"large,":[119],"medium,":[120],"small":[122],"achieves":[124],"37%":[125],"total":[126],"reduction":[128],"minimal":[130],"loss,":[132],"substantially":[134],"reducing":[135],"engineering":[137],"efforts":[138],"having":[140],"models.":[142]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4223988178","counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":5}],"updated_date":"2024-12-30T13:22:53.489372","created_date":"2022-04-19"}