{"id":"https://openalex.org/W4392903331","doi":"https://doi.org/10.1109/icassp48485.2024.10448217","title":"USM-Lite: Quantization and Sparsity Aware Fine-Tuning for Speech Recognition with Universal Speech Models","display_name":"USM-Lite: Quantization and Sparsity Aware Fine-Tuning for Speech Recognition with Universal Speech Models","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903331","doi":"https://doi.org/10.1109/icassp48485.2024.10448217"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10448217","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.1109/icassp48485.2024.10448217","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058886181","display_name":"Shaojin Ding","orcid":"https://orcid.org/0000-0002-2108-3111"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shaojin Ding","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079902855","display_name":"David Qiu","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Qiu","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053114931","display_name":"David Rim","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Rim","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101319167","display_name":"Yanzhang He","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yanzhang He","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103149673","display_name":"Oleg Rybakov","orcid":"https://orcid.org/0009-0007-0021-047X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Oleg Rybakov","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100374448","display_name":"Bo Li","orcid":"https://orcid.org/0000-0002-6711-3603"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bo Li","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032640894","display_name":"Rohit Prabhavalkar","orcid":"https://orcid.org/0000-0001-5331-6058"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rohit Prabhavalkar","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101432591","display_name":"Weiran Wang","orcid":"https://orcid.org/0009-0000-0843-707X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weiran Wang","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070513394","display_name":"Tara N. Sainath","orcid":"https://orcid.org/0000-0002-4126-6556"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tara N. Sainath","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043621779","display_name":"Zhonglin Han","orcid":"https://orcid.org/0000-0001-8795-7953"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhonglin Han","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100402624","display_name":"Jian Li","orcid":"https://orcid.org/0000-0003-4977-1802"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Jian Li","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070172290","display_name":"Amir Yazdanbakhsh","orcid":"https://orcid.org/0000-0001-8199-7671"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Amir Yazdanbakhsh","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033596864","display_name":"Shivani Agrawal","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shivani Agrawal","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":83},"biblio":{"volume":null,"issue":null,"first_page":"10756","last_page":"10760"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.4256367}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.77594984},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.7420703},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.55272055},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.4258039},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.4256367},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.36523426}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10448217","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2312.08553","pdf_url":"https://arxiv.org/pdf/2312.08553","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10448217","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":35,"referenced_works":["https://openalex.org/W1922655562","https://openalex.org/W2108677974","https://openalex.org/W2127141656","https://openalex.org/W2144499799","https://openalex.org/W2242818861","https://openalex.org/W2327501763","https://openalex.org/W2526425061","https://openalex.org/W2748778702","https://openalex.org/W2916954108","https://openalex.org/W2962760690","https://openalex.org/W2962824709","https://openalex.org/W2963122961","https://openalex.org/W2964110616","https://openalex.org/W2964539095","https://openalex.org/W2973049979","https://openalex.org/W2975044525","https://openalex.org/W3007625080","https://openalex.org/W3036601975","https://openalex.org/W3081179222","https://openalex.org/W3095311338","https://openalex.org/W3097777922","https://openalex.org/W3127067080","https://openalex.org/W3196364802","https://openalex.org/W3209059054","https://openalex.org/W4221138270","https://openalex.org/W4225529283","https://openalex.org/W4287121455","https://openalex.org/W4297841557","https://openalex.org/W4323066695","https://openalex.org/W4378105483","https://openalex.org/W4378498746","https://openalex.org/W4385245566","https://openalex.org/W4385823023","https://openalex.org/W4385823026","https://openalex.org/W4385823182"],"related_works":["https://openalex.org/W2547793174","https://openalex.org/W2544241817","https://openalex.org/W2405439032","https://openalex.org/W2309273277","https://openalex.org/W2132658536","https://openalex.org/W2120730869","https://openalex.org/W2070212102","https://openalex.org/W2061937230","https://openalex.org/W1911859126","https://openalex.org/W1574295218"],"abstract_inverted_index":{"End-to-end":[0],"automatic":[1],"speech":[2,18],"recognition":[3],"(ASR)":[4],"models":[5,19],"have":[6,157],"seen":[7],"revolutionary":[8],"quality":[9],"gains":[10],"with":[11,66,95],"the":[12,31,77,81,117,135,154,160,163,180,185],"recent":[13],"development":[14],"of":[15,113,119,159,165],"large-scale":[16,102],"universal":[17],"(USM).":[20],"However,":[21,127],"deploying":[22],"these":[23],"massive":[24],"USMs":[25],"is":[26,41],"extremely":[27],"expensive":[28],"due":[29],"to":[30,46,106,121,133,156],"enormous":[32],"memory":[33],"usage":[34],"and":[35,70,87,124,144,182,187],"computational":[36],"cost.":[37],"Therefore,":[38],"model":[39,78,82,155],"compression":[40,130],"an":[42],"important":[43],"research":[44],"topic":[45],"fit":[47],"USM-based":[48],"ASR":[49],"under":[50,138],"budget":[51],"in":[52],"real-world":[53],"scenarios.":[54],"In":[55],"this":[56],"study,":[57],"we":[58],"propose":[59],"a":[60,67,96,101,128],"USM":[61,99],"fine-tuning":[62],"approach":[63],"for":[64,194],"ASR,":[65],"low-bit":[68],"quantization":[69,123,143],"N:M":[71],"structured":[72],"sparsity":[73],"aware":[74],"paradigm":[75],"on":[76,100,179,184],"weights,":[79],"reducing":[80],"complexity":[83],"from":[84],"parameter":[85,98],"precision":[86],"matrix":[88],"topology":[89],"perspectives.":[90],"We":[91,174],"conducted":[92],"extensive":[93],"experiments":[94],"2-billion":[97],"voice":[103],"search":[104],"dataset":[105],"evaluate":[107],"our":[108,149],"proposed":[109,150],"method.":[110],"A":[111],"series":[112],"ablation":[114],"studies":[115],"validate":[116],"effectiveness":[118],"up":[120],"int4":[122],"2:4":[125],"sparsity.":[126,146],"single":[129],"technique":[131],"fails":[132],"recover":[134],"performance":[136],"well":[137],"extreme":[139],"setups":[140],"including":[141],"int2":[142],"1:4":[145],"By":[147],"contrast,":[148],"method":[151],"can":[152],"compress":[153],"9.4%":[158],"size,":[161],"at":[162],"cost":[164],"only":[166],"7.3%":[167],"relative":[168],"word":[169],"error":[170],"rate":[171],"(WER)":[172],"regressions.":[173],"also":[175],"provided":[176],"in-depth":[177],"analyses":[178],"results":[181],"discussions":[183],"limitations":[186],"potential":[188],"solutions,":[189],"which":[190],"would":[191],"be":[192],"valuable":[193],"future":[195],"studies.":[196]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4392903331","counts_by_year":[],"updated_date":"2025-01-17T13:40:55.904944","created_date":"2024-03-19"}