{"id":"https://openalex.org/W4391013270","doi":"https://doi.org/10.48550/arxiv.2401.08992","title":"Efficient Adapter Finetuning for Tail Languages in Streaming Multilingual ASR","display_name":"Efficient Adapter Finetuning for Tail Languages in Streaming Multilingual ASR","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4391013270","doi":"https://doi.org/10.48550/arxiv.2401.08992"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2401.08992","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2401.08992","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002187986","display_name":"Junwen Bai","orcid":"https://orcid.org/0000-0001-7939-4927"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bai, Junwen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100374360","display_name":"Bo Li","orcid":"https://orcid.org/0000-0001-6709-0942"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Bo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030757951","display_name":"Qiujia Li","orcid":"https://orcid.org/0000-0003-3074-3692"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Qiujia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070513394","display_name":"Tara N. Sainath","orcid":"https://orcid.org/0000-0002-4126-6556"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sainath, Tara N.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5032931723","display_name":"Trevor Strohman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Strohman, Trevor","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10863","display_name":"Voice and Speech Disorders","score":0.9897,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9802,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adapter","display_name":"Adapter (computing)","score":0.7021164},{"id":"https://openalex.org/keywords/serialization","display_name":"Serialization","score":0.43157086}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8679577},{"id":"https://openalex.org/C177284502","wikidata":"https://www.wikidata.org/wiki/Q1005390","display_name":"Adapter (computing)","level":2,"score":0.7021164},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.6896378},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.51635826},{"id":"https://openalex.org/C52723943","wikidata":"https://www.wikidata.org/wiki/Q1127410","display_name":"Serialization","level":2,"score":0.43157086},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42615646},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37158948},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.348839},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16354},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.14139476},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2401.08992","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2401.08992","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2401.08992","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality education","score":0.75}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4321593827","https://openalex.org/W4306381730","https://openalex.org/W4231356583","https://openalex.org/W4229060448","https://openalex.org/W2899905671","https://openalex.org/W2376159383","https://openalex.org/W236169673","https://openalex.org/W2351439380","https://openalex.org/W2133028525","https://openalex.org/W1593760324"],"abstract_inverted_index":{"The":[0,123,157,170],"end-to-end":[1],"ASR":[2],"model":[3,82,132,142,159,171,224],"is":[4,14,136,144,173],"often":[5],"desired":[6],"in":[7,118],"the":[8,31,61,71,81,119,130,139,145,150,161,219,222,229],"streaming":[9,120],"multilingual":[10,121,178],"scenario":[11],"since":[12],"it":[13],"easier":[15],"to":[16,46,78,88,204],"deploy":[17],"and":[18,34,143,202],"can":[19,217],"benefit":[20],"from":[21,164],"pre-trained":[22],"speech":[23],"models":[24],"such":[25],"as":[26,67],"powerful":[27],"foundation":[28,141],"models.":[29],"Meanwhile,":[30],"heterogeneous":[32],"nature":[33],"imbalanced":[35],"data":[36,62],"abundance":[37],"of":[38,70,129,221],"different":[39,51,165,168],"languages":[40,52,117,185],"may":[41,64],"cause":[42],"performance":[43,49,172,232],"degradation,":[44],"leading":[45],"asynchronous":[47,230],"peak":[48,231],"for":[50,115,127,167],"during":[53,149],"training,":[54],"especially":[55],"on":[56,175,200,206],"tail":[57,116,184],"ones.":[58],"Sometimes":[59],"even":[60],"itself":[63],"become":[65],"unavailable":[66],"a":[68,106,176,207],"result":[69],"enhanced":[72,111],"privacy":[73],"protection.":[74],"Existing":[75],"work":[76],"tend":[77],"significantly":[79],"increase":[80],"size":[83],"or":[84],"learn":[85],"language-specific":[86],"decoders":[87],"accommodate":[89],"each":[90],"language":[91],"separately.":[92],"In":[93],"this":[94],"study,":[95],"we":[96,211],"explore":[97],"simple":[98],"yet":[99],"effective":[100],"Language-Dependent":[101],"Adapter":[102],"(LDA)":[103],"finetuning":[104,151],"under":[105],"cascaded":[107],"Conformer":[108],"transducer":[109],"framework":[110],"by":[112],"teacher":[113],"pseudo-labeling":[114],"ASR.":[122],"adapter":[124,162],"only":[125,146],"accounts":[126],"0.4%":[128],"full":[131,223],"per":[133],"language.":[134],"It":[135],"plugged":[137],"into":[138],"frozen":[140],"trainable":[147],"module":[148],"process":[152],"with":[153],"noisy":[154],"student":[155],"training.":[156],"final":[158],"merges":[160],"parameters":[163],"checkpoints":[166],"languages.":[169],"validated":[174],"challenging":[177],"dictation":[179],"dataset,":[180],"which":[181],"includes":[182],"39":[183],"across":[186],"Latin,":[187],"Greek,":[188],"Arabic,":[189],"etc.":[190],"Our":[191],"proposed":[192],"method":[193],"brings":[194],"12.2%":[195],"word":[196],"error":[197],"rate":[198],"reduction":[199],"average":[201],"up":[203],"37.5%":[205],"single":[208],"locale.":[209],"Furthermore,":[210],"show":[212],"that":[213],"our":[214],"parameter-efficient":[215],"LDA":[216],"match":[218],"quality":[220],"finetuning,":[225],"thus":[226],"greatly":[227],"alleviating":[228],"issue.":[233]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4391013270","counts_by_year":[],"updated_date":"2024-12-13T03:26:11.906353","created_date":"2024-01-19"}