{"id":"https://openalex.org/W4372346241","doi":"https://doi.org/10.1109/icassp49357.2023.10094903","title":"From English to More Languages: Parameter-Efficient Model Reprogramming for Cross-Lingual Speech Recognition","display_name":"From English to More Languages: Parameter-Efficient Model Reprogramming for Cross-Lingual Speech Recognition","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372346241","doi":"https://doi.org/10.1109/icassp49357.2023.10094903"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10094903","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.1109/icassp49357.2023.10094903","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020376803","display_name":"Chao-Han Huck Yang","orcid":"https://orcid.org/0000-0003-2879-8811"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chao-Han Huck Yang","raw_affiliation_strings":["Georgia Institute of Technology, USA","Google, USA"],"affiliations":[{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Georgia Institute of Technology, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100374448","display_name":"Bo Li","orcid":"https://orcid.org/0000-0002-6711-3603"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bo Li","raw_affiliation_strings":["Google, USA"],"affiliations":[{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100433648","display_name":"Yu Zhang","orcid":"https://orcid.org/0000-0002-9505-1833"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yu Zhang","raw_affiliation_strings":["Google, USA"],"affiliations":[{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049929716","display_name":"Nanxin Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nanxin Chen","raw_affiliation_strings":["Google, USA"],"affiliations":[{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032640894","display_name":"Rohit Prabhavalkar","orcid":"https://orcid.org/0000-0001-5331-6058"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rohit Prabhavalkar","raw_affiliation_strings":["Google, USA"],"affiliations":[{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070513394","display_name":"Tara N. Sainath","orcid":"https://orcid.org/0000-0002-4126-6556"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tara N. Sainath","raw_affiliation_strings":["Google, USA"],"affiliations":[{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032931723","display_name":"Trevor Strohman","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Trevor Strohman","raw_affiliation_strings":["Google, USA"],"affiliations":[{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":6.595,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":16,"citation_normalized_percentile":{"value":0.999754,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9955,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reprogramming","display_name":"Reprogramming","score":0.54046226}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7219776},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.54451424},{"id":"https://openalex.org/C77255625","wikidata":"https://www.wikidata.org/wiki/Q402093","display_name":"Reprogramming","level":3,"score":0.54046226},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.49980998},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45976156},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.06557664},{"id":"https://openalex.org/C1491633281","wikidata":"https://www.wikidata.org/wiki/Q7868","display_name":"Cell","level":2,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10094903","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2301.07851","pdf_url":"http://arxiv.org/pdf/2301.07851","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2301.07851","pdf_url":"https://arxiv.org/pdf/2301.07851","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10094903","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.78,"display_name":"Quality education","id":"https://metadata.un.org/sdg/4"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":36,"referenced_works":["https://openalex.org/W1828163288","https://openalex.org/W2033310064","https://openalex.org/W2550553598","https://openalex.org/W2633221078","https://openalex.org/W2896457183","https://openalex.org/W2928941594","https://openalex.org/W2952809536","https://openalex.org/W2962760690","https://openalex.org/W2962824709","https://openalex.org/W2963211188","https://openalex.org/W2964172053","https://openalex.org/W2964303773","https://openalex.org/W2970925270","https://openalex.org/W2971840980","https://openalex.org/W2972741966","https://openalex.org/W2995181338","https://openalex.org/W3015927303","https://openalex.org/W3036601975","https://openalex.org/W3095410713","https://openalex.org/W3097777922","https://openalex.org/W3125118953","https://openalex.org/W3163600291","https://openalex.org/W3172443934","https://openalex.org/W3174784402","https://openalex.org/W3176828726","https://openalex.org/W3198429080","https://openalex.org/W3201225328","https://openalex.org/W3204696009","https://openalex.org/W3212799896","https://openalex.org/W4210463634","https://openalex.org/W4210811812","https://openalex.org/W4225295099","https://openalex.org/W4226033575","https://openalex.org/W4226162428","https://openalex.org/W4293793697","https://openalex.org/W4298312696"],"related_works":["https://openalex.org/W3204019825","https://openalex.org/W3014941007","https://openalex.org/W2972394710","https://openalex.org/W2748952813","https://openalex.org/W2395945797","https://openalex.org/W2368295610","https://openalex.org/W2129470469","https://openalex.org/W1631979401","https://openalex.org/W1543102035","https://openalex.org/W1497716427"],"abstract_inverted_index":{"In":[0,127],"this":[1],"work,":[2],"we":[3,57,129],"propose":[4],"a":[5,67,71,77,107,116],"new":[6],"parameter-efficient":[7],"learning":[8],"framework":[9],"based":[10],"on":[11,41,54,76],"neural":[12,38],"model":[13,52,86,110],"reprogramming":[14,53,87],"for":[15,47],"cross-lingual":[16],"speech":[17,25,81,144],"recognition,":[18],"which":[19],"can":[20],"re-purpose":[21],"well-trained":[22],"English":[23],"automatic":[24],"recognition":[26],"(ASR)":[27],"models":[28],"to":[29,60,95,111,120,133],"recognize":[30],"the":[31,48],"other":[32],"languages.":[33,126],"We":[34],"design":[35],"different":[36,125,131],"auxiliary":[37],"architectures":[39,152],"focusing":[40],"learnable":[42],"pre-trained":[43,73,136],"feature":[44],"enhancement":[45],"that,":[46],"first":[49],"time,":[50],"empowers":[51],"ASR.":[55],"Specifically,":[56],"investigate":[58],"how":[59],"select":[61],"trainable":[62,104],"components":[63],"(i.e.,":[64],"encoder)":[65],"of":[66,93,99,101,118,163],"conformer-based":[68],"RNN-Transducer,":[69],"as":[70],"frozen":[72],"backbone.":[74],"Experiments":[75],"seven-language":[78],"multilingual":[79,143],"LibriSpeech":[80],"(MLS)":[82],"task":[83],"show":[84],"that":[85],"only":[88],"requires":[89],"4.2%":[90],"(11M":[91],"out":[92,98],"270M)":[94],"6.8%":[96],"(45M":[97],"660M)":[100],"its":[102],"original":[103],"parameters":[105],"from":[106],"full":[108],"ASR":[109,137,150],"perform":[112],"competitive":[113],"results":[114],"in":[115,139,161],"range":[117],"11.9%":[119],"8.1%":[121],"WER":[122,165],"averaged":[123],"across":[124],"addition,":[128],"discover":[130],"setups":[132],"make":[134],"large-scale":[135],"succeed":[138],"both":[140],"monolingual":[141],"and":[142,153,166],"recognition.":[145],"Our":[146],"methods":[147],"outperform":[148],"existing":[149],"tuning":[151],"their":[154],"extension":[155],"with":[156],"self-supervised":[157],"losses":[158],"(e.g.,":[159],"w2v-bert)":[160],"terms":[162],"lower":[164],"better":[167],"training":[168],"efficiency.":[169]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4372346241","counts_by_year":[{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":12}],"updated_date":"2024-12-12T10:49:22.474926","created_date":"2023-05-07"}