{"id":"https://openalex.org/W4375850650","doi":"https://doi.org/10.1109/icassp49357.2023.10096799","title":"Resource-Efficient Transfer Learning from Speech Foundation Model Using Hierarchical Feature Fusion","display_name":"Resource-Efficient Transfer Learning from Speech Foundation Model Using Hierarchical Feature Fusion","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375850650","doi":"https://doi.org/10.1109/icassp49357.2023.10096799"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096799","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.1109/icassp49357.2023.10096799","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069872293","display_name":"Zhouyuan Huo","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhouyuan Huo","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032798707","display_name":"Khe Chai Sim","orcid":"https://orcid.org/0000-0002-0866-2223"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Khe Chai Sim","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100374448","display_name":"Bo Li","orcid":"https://orcid.org/0000-0002-6711-3603"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bo Li","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014088530","display_name":"Dongseong Hwang","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dongseong Hwang","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070513394","display_name":"Tara N. Sainath","orcid":"https://orcid.org/0000-0002-4126-6556"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tara N. Sainath","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032931723","display_name":"Trevor Strohman","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Trevor Strohman","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.824,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.558059,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":78,"max":84},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9978,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.57649565},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.49986005},{"id":"https://openalex.org/keywords/backpropagation","display_name":"Backpropagation","score":0.4299136}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.84368575},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.57649565},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.57056713},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5038015},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.49986005},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.48162884},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4610746},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.45608044},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4472712},{"id":"https://openalex.org/C155032097","wikidata":"https://www.wikidata.org/wiki/Q798503","display_name":"Backpropagation","level":3,"score":0.4299136},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.34996718},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08564249},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096799","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2211.02712","pdf_url":"https://arxiv.org/pdf/2211.02712","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096799","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":26,"referenced_works":["https://openalex.org/W2896457183","https://openalex.org/W2936774411","https://openalex.org/W2963122170","https://openalex.org/W2964303773","https://openalex.org/W2965373594","https://openalex.org/W2973049979","https://openalex.org/W3003875258","https://openalex.org/W3016011332","https://openalex.org/W3036601975","https://openalex.org/W3097777922","https://openalex.org/W3166396011","https://openalex.org/W3168867926","https://openalex.org/W3176828726","https://openalex.org/W3195577433","https://openalex.org/W3202037040","https://openalex.org/W3202079424","https://openalex.org/W3204696009","https://openalex.org/W3209059054","https://openalex.org/W4221161761","https://openalex.org/W4226033575","https://openalex.org/W4226380987","https://openalex.org/W4287122891","https://openalex.org/W4288089799","https://openalex.org/W4292779060","https://openalex.org/W4297808394","https://openalex.org/W4297841583"],"related_works":["https://openalex.org/W4375928479","https://openalex.org/W4281381188","https://openalex.org/W4206357785","https://openalex.org/W3198847674","https://openalex.org/W3192840557","https://openalex.org/W3167935049","https://openalex.org/W3131673289","https://openalex.org/W3096913503","https://openalex.org/W3023427754","https://openalex.org/W2951211570"],"abstract_inverted_index":{"Self-supervised":[0],"pre-training":[1],"of":[2,56,102,107,153],"a":[3,53,74,108,118],"speech":[4,18,113,129,144],"foundation":[5,24,34,63,109,130],"model,":[6],"followed":[7],"by":[8],"supervised":[9],"fine-tuning,":[10],"has":[11],"shown":[12],"impressive":[13],"quality":[14],"improvements":[15],"on":[16,111,143],"automatic":[17],"recognition":[19,114,145],"(ASR)":[20],"tasks.":[21,67],"Fine-tuning":[22],"separate":[23],"models":[25],"for":[26,124],"many":[27],"downstream":[28],"tasks":[29],"are":[30,58],"expensive":[31],"since":[32],"the":[33,62,88,96,100,112,136,171,176,181],"model":[35,64,110,183],"is":[36],"usually":[37],"very":[38],"big.":[39],"Parameter-efficient":[40],"fine-tuning":[41,180],"methods":[42,70],"(e.g.":[43],"adapter,":[44],"sparse":[45],"update":[46],"methods)":[47],"offer":[48],"an":[49],"alternative":[50],"paradigm":[51],"where":[52],"small":[54],"set":[55],"parameters":[57,189],"updated":[59],"to":[60,65],"adapt":[61],"new":[66],"However,":[68],"these":[69],"still":[71],"suffer":[72],"from":[73,128],"high":[75],"computational":[76,157],"memory":[77,158],"cost":[78,159],"and":[79,116,160,190],"slow":[80],"training":[81,162,193],"speed":[82],"because":[83],"they":[84],"require":[85],"backpropagation":[86],"through":[87],"entire":[89],"neural":[90],"network":[91],"at":[92,104,168],"each":[93],"step.":[94],"In":[95],"paper,":[97],"we":[98],"analyze":[99],"performance":[101,142,178],"features":[103],"different":[105],"layers":[106],"task":[115,146],"propose":[117],"novel":[119],"hierarchical":[120],"feature":[121],"fusion":[122],"method":[123,138,173],"resource-efficient":[125],"transfer":[126],"learning":[127],"models.":[131],"Experimental":[132],"results":[133],"show":[134],"that":[135],"proposed":[137,172],"can":[139,174],"achieve":[140,175],"better":[141],"than":[147],"existing":[148],"algorithms":[149],"with":[150,166,184],"fewer":[151,186],"number":[152],"trainable":[154,187],"parameters,":[155],"less":[156],"faster":[161,192],"speed.":[163,194],"After":[164],"combining":[165],"Adapters":[167],"all":[169],"layers,":[170],"same":[177],"as":[179],"whole":[182],"97%":[185],"encoder":[188],"53%":[191]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4375850650","counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2025-01-02T21:16:02.952810","created_date":"2023-05-10"}