{"id":"https://openalex.org/W4392903673","doi":"https://doi.org/10.1109/icassp48485.2024.10447627","title":"Towards Automatic Data Augmentation for Disordered Speech Recognition","display_name":"Towards Automatic Data Augmentation for Disordered Speech Recognition","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903673","doi":"https://doi.org/10.1109/icassp48485.2024.10447627"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447627","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.1109/icassp48485.2024.10447627","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075023049","display_name":"Zengrui Jin","orcid":"https://orcid.org/0000-0002-2637-7880"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zengrui Jin","raw_affiliation_strings":["The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035857878","display_name":"Xurong Xie","orcid":"https://orcid.org/0000-0002-6714-6296"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xurong Xie","raw_affiliation_strings":["Institute of Software, Chinese Academy of Sciences, China"],"affiliations":[{"raw_affiliation_string":"Institute of Software, Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106407750","display_name":"Tianzi Wang","orcid":"https://orcid.org/0009-0005-5823-3039"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianzi Wang","raw_affiliation_strings":["The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003895235","display_name":"Mengzhe Geng","orcid":"https://orcid.org/0000-0002-7886-439X"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengzhe Geng","raw_affiliation_strings":["The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106406454","display_name":"Jiajun Deng","orcid":"https://orcid.org/0000-0001-8874-4167"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiajun Deng","raw_affiliation_strings":["The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042515126","display_name":"Guinan Li","orcid":"https://orcid.org/0000-0002-2206-0237"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guinan Li","raw_affiliation_strings":["The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026334377","display_name":"Shujie Hu","orcid":"https://orcid.org/0000-0002-8475-4912"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shujie Hu","raw_affiliation_strings":["The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037109470","display_name":"Xunying Liu","orcid":"https://orcid.org/0000-0001-6725-1160"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xunying Liu","raw_affiliation_strings":["The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":"10626","last_page":"10630"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9981,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10863","display_name":"Voice and Speech Disorders","score":0.998,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.601916},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.49575767}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.80563533},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7013367},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6080455},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.601916},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5377557},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.50749606},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5029053},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.49575767},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.4423883},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.4203679},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.36885983},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.06989077},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447627","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2312.08641","pdf_url":"https://arxiv.org/pdf/2312.08641","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447627","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.5,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"grants":[{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":null},{"funder":"https://openalex.org/F4320335892","funder_display_name":"Youth Innovation Promotion Association","award_id":null}],"datasets":[],"versions":[],"referenced_works_count":41,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W180052447","https://openalex.org/W1932968309","https://openalex.org/W1979651826","https://openalex.org/W1989674786","https://openalex.org/W2166637769","https://openalex.org/W2168510624","https://openalex.org/W2250686550","https://openalex.org/W2396944218","https://openalex.org/W2401277329","https://openalex.org/W2407080277","https://openalex.org/W2696967604","https://openalex.org/W2883586237","https://openalex.org/W2888789389","https://openalex.org/W2888807255","https://openalex.org/W2936774411","https://openalex.org/W2936861580","https://openalex.org/W2949736877","https://openalex.org/W2962780374","https://openalex.org/W3014690389","https://openalex.org/W3015995734","https://openalex.org/W3035682985","https://openalex.org/W3047916742","https://openalex.org/W3095123370","https://openalex.org/W3097798370","https://openalex.org/W3134043075","https://openalex.org/W3160259745","https://openalex.org/W3163725792","https://openalex.org/W3174329270","https://openalex.org/W3196511136","https://openalex.org/W3198806843","https://openalex.org/W3204494926","https://openalex.org/W4221148457","https://openalex.org/W4226074601","https://openalex.org/W4230563027","https://openalex.org/W4312120835","https://openalex.org/W4319862247","https://openalex.org/W4372341972","https://openalex.org/W4375869369","https://openalex.org/W4385822966","https://openalex.org/W4385823199"],"related_works":["https://openalex.org/W4225124612","https://openalex.org/W3019226033","https://openalex.org/W2991483587","https://openalex.org/W2786391746","https://openalex.org/W2187490799","https://openalex.org/W2043806667","https://openalex.org/W2021633306","https://openalex.org/W2006801911","https://openalex.org/W1999699871","https://openalex.org/W1990237101"],"abstract_inverted_index":{"Automatic":[0],"recognition":[1],"of":[2,67,122,129],"disordered":[3],"speech":[4],"remains":[5],"a":[6,19],"highly":[7],"challenging":[8],"task":[9,55],"to":[10,13,103],"date":[11],"due":[12],"data":[14,25,95],"scarcity.":[15],"This":[16],"paper":[17],"presents":[18],"reinforcement":[20],"learning":[21],"(RL)":[22],"based":[23],"on-the-fly":[24],"augmentation":[26,96],"approach":[27],"for":[28],"training":[29],"state-of-the-art":[30],"PyChain":[31,115],"TDNN":[32,116],"and":[33,44,56,64,79],"end-to-end":[34],"Conformer":[35],"ASR":[36,83],"systems":[37],"on":[38,87,124],"such":[39],"data.":[40],"The":[41],"handcrafted":[42,109],"temporal":[43],"spectral":[45],"mask":[46],"operations":[47],"in":[48],"the":[49,88,92,125],"standard":[50],"SpecAugment":[51,110],"method":[52],"that":[53,104],"are":[54,70],"system":[57,84,117],"dependent,":[58],"together":[59],"with":[60,82],"additionally":[61],"introduced":[62],"minimum":[63],"maximum":[65],"cut-offs":[66],"these":[68],"masks,":[69],"now":[71],"automatically":[72],"learned":[73],"using":[74,106],"an":[75,119],"RNN-based":[76],"policy":[77],"controller":[78],"tightly":[80],"integrated":[81],"training.":[85],"Experiments":[86],"UASpeech":[89,126],"corpus":[90],"suggest":[91],"proposed":[93],"RL-based":[94],"consistently":[97],"produced":[98,118],"performance":[99],"superior":[100],"or":[101,108],"comparable":[102],"obtained":[105],"expert":[107],"policies.":[111],"Our":[112],"RL":[113],"auto-augmented":[114],"overall":[120],"WER":[121],"28.79%":[123],"test":[127],"set":[128],"16":[130],"dysarthric":[131],"speakers.":[132]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4392903673","counts_by_year":[],"updated_date":"2024-12-14T10:18:27.637103","created_date":"2024-03-19"}