{"id":"https://openalex.org/W3206645560","doi":"https://doi.org/10.1109/taslp.2022.3172632","title":"Towards Robust Waveform-Based Acoustic Models","display_name":"Towards Robust Waveform-Based Acoustic Models","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W3206645560","doi":"https://doi.org/10.1109/taslp.2022.3172632","mag":"3206645560"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3172632","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2110.08634","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061934775","display_name":"Dino Ogli\u0107","orcid":"https://orcid.org/0000-0002-4728-9644"},"institutions":[{"id":"https://openalex.org/I105036370","display_name":"AstraZeneca (United Kingdom)","ror":"https://ror.org/04r9x1a08","country_code":"GB","type":"company","lineage":["https://openalex.org/I105036370"]},{"id":"https://openalex.org/I183935753","display_name":"King's College London","ror":"https://ror.org/0220mzb33","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I183935753"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Dino Oglic","raw_affiliation_strings":["Applied Analytics and AI, Data Sciences and AI, BioPharmaceuticals R&D, AstraZeneca, Cambridge, U.K.","Department of Engineering, King's College London, London, U.K."],"affiliations":[{"raw_affiliation_string":"Applied Analytics and AI, Data Sciences and AI, BioPharmaceuticals R&D, AstraZeneca, Cambridge, U.K.","institution_ids":["https://openalex.org/I105036370"]},{"raw_affiliation_string":"Department of Engineering, King's College London, London, U.K.","institution_ids":["https://openalex.org/I183935753"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056805951","display_name":"Zoran Cvetkovi\u0107","orcid":"https://orcid.org/0000-0002-5128-5099"},"institutions":[{"id":"https://openalex.org/I4210119896","display_name":"King's College School","ror":"https://ror.org/02bbqcn27","country_code":"GB","type":"education","lineage":["https://openalex.org/I4210119896"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zoran Cvetkovic","raw_affiliation_strings":["Department of Engineering, King's College London, London, U.K."],"affiliations":[{"raw_affiliation_string":"Department of Engineering, King's College London, London, U.K.","institution_ids":["https://openalex.org/I4210119896"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069770227","display_name":"Peter Sollich","orcid":"https://orcid.org/0000-0003-0169-7893"},"institutions":[],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Peter Sollich","raw_affiliation_strings":["Department of Mathematics, King's College London, London, U.K."],"affiliations":[{"raw_affiliation_string":"Department of Mathematics, King's College London, London, U.K.","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027442277","display_name":"Steve Renals","orcid":"https://orcid.org/0000-0002-8790-3389"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Steve Renals","raw_affiliation_strings":["Center for Speech Technology Research, University of Edinburgh, Edinburgh, U.K."],"affiliations":[{"raw_affiliation_string":"Center for Speech Technology Research, University of Edinburgh, Edinburgh, U.K.","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015318204","display_name":"Bin Yu","orcid":"https://orcid.org/0000-0002-7461-625X"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bin Yu","raw_affiliation_strings":["Departments of Statistics and Electrical Engineering and Computer Sciences, UC Berkeley, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Departments of Statistics and Electrical Engineering and Computer Sciences, UC Berkeley, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.324,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":2,"citation_normalized_percentile":{"value":0.418212,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":70,"max":76},"biblio":{"volume":"30","issue":null,"first_page":"1977","last_page":"1992"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9963,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.6911111},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.43173206},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.32361072},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.11877096},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3172632","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2110.08634","pdf_url":"https://arxiv.org/pdf/2110.08634","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2110.08634","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2110.08634","pdf_url":"https://arxiv.org/pdf/2110.08634","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.76,"display_name":"Quality education","id":"https://metadata.un.org/sdg/4"}],"grants":[{"funder":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council","award_id":"EP/R012067/1"}],"datasets":[],"versions":["https://openalex.org/W3206645560"],"referenced_works_count":61,"referenced_works":["https://openalex.org/W1489048302","https://openalex.org/W1524333225","https://openalex.org/W1533861849","https://openalex.org/W1568183767","https://openalex.org/W1922655562","https://openalex.org/W1973669708","https://openalex.org/W1979651826","https://openalex.org/W1992475611","https://openalex.org/W2032558547","https://openalex.org/W2085267254","https://openalex.org/W2099621636","https://openalex.org/W2114973313","https://openalex.org/W2116217121","https://openalex.org/W2118020555","https://openalex.org/W2131548063","https://openalex.org/W2133815075","https://openalex.org/W2136439176","https://openalex.org/W2137295153","https://openalex.org/W2139698650","https://openalex.org/W2144068644","https://openalex.org/W2144908784","https://openalex.org/W2151239833","https://openalex.org/W2153773386","https://openalex.org/W2184343439","https://openalex.org/W2398826216","https://openalex.org/W2407080277","https://openalex.org/W2515753980","https://openalex.org/W2559260703","https://openalex.org/W2696967604","https://openalex.org/W2763188033","https://openalex.org/W2789942385","https://openalex.org/W2796892552","https://openalex.org/W2903799412","https://openalex.org/W2911634294","https://openalex.org/W2936774411","https://openalex.org/W2950048339","https://openalex.org/W2951735139","https://openalex.org/W2963403868","https://openalex.org/W2964012862","https://openalex.org/W2964052309","https://openalex.org/W2964138484","https://openalex.org/W2971109239","https://openalex.org/W2973049979","https://openalex.org/W2973053574","https://openalex.org/W2982427813","https://openalex.org/W2982456909","https://openalex.org/W2999905431","https://openalex.org/W3016042429","https://openalex.org/W3036601975","https://openalex.org/W3094855746","https://openalex.org/W3095947083","https://openalex.org/W3097919147","https://openalex.org/W3099782249","https://openalex.org/W3107298252","https://openalex.org/W3123517045","https://openalex.org/W3142067363","https://openalex.org/W3195185666","https://openalex.org/W4233141322","https://openalex.org/W4249820770","https://openalex.org/W4385245566","https://openalex.org/W4394666973"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W4367555392","https://openalex.org/W3040712279","https://openalex.org/W2748952813","https://openalex.org/W2374664672","https://openalex.org/W2364769705","https://openalex.org/W2176409448","https://openalex.org/W2129841057","https://openalex.org/W2056136368","https://openalex.org/W1974895211"],"abstract_inverted_index":{"We":[0,126],"study":[1],"the":[2,29,66,71,75,82,87,90,123,129,177,185,204,210,226],"problem":[3,23],"of":[4,25,31,52,81,89,110,143,230],"learning":[5,124],"robust":[6,119],"acoustic":[7,147,227],"models":[8,217],"in":[9,40,86,146,197],"adverse":[10],"environments,":[11],"characterized":[12],"by":[13,64],"a":[14,108,220],"significant":[15],"mismatch":[16],"between":[17],"training":[18,63,91,102,202,221],"and":[19,112,171],"test":[20,231],"conditions.":[21],"This":[22],"is":[24],"paramount":[26],"importance":[27],"for":[28],"deployment":[30],"speech":[32],"recognition":[33],"systems":[34],"that":[35,69,97,115,184],"need":[36],"to":[37,139,157,189,201,216,224],"perform":[38],"well":[39],"unseen":[41,190],"environments.":[42],"First,":[43],"we":[44,95,174],"characterize":[45],"data":[46,135],"augmentation":[47,136],"theoretically":[48,114],"as":[49],"an":[50,79],"instance":[51],"vicinal":[53],"risk":[54,60,206],"minimization,":[55],"which":[56,160],"aims":[57],"at":[58,101],"improving":[59],"estimates":[61],"during":[62],"replacing":[65],"delta":[67],"functions":[68],"define":[70],"empirical":[72,181],"density":[73,85],"over":[74],"input":[76],"space":[77],"with":[78,164,193],"approximation":[80],"marginal":[83],"population":[84],"vicinity":[88],"samples.":[92],"More":[93],"specifically,":[94],"assume":[96],"local":[98],"neighborhoods":[99],"centered":[100],"samples":[103],"can":[104,117,187],"be":[105],"approximated":[106],"using":[107,203,219],"mixture":[109,131],"Gaussians,":[111],"demonstrate":[113,212],"this":[116],"incorporate":[118],"inductive":[120],"bias":[121],"into":[122],"process.":[125],"then":[127],"specify":[128],"individual":[130],"components":[132],"implicitly":[133],"via":[134],"schemes,":[137],"designed":[138,223],"address":[140],"common":[141],"sources":[142],"spurious":[144],"correlations":[145],"models.":[148],"To":[149],"avoid":[150],"potential":[151],"confounding":[152],"effects":[153],"on":[154,176],"robustness":[155],"due":[156],"information":[158],"loss,":[159],"has":[161],"been":[162],"associated":[163],"standard":[165,205],"feature":[166],"extraction":[167],"techniques":[168],"(e.g.,":[169],"FBANK":[170],"MFCC":[172],"features),":[173],"focus":[175],"waveform-based":[178],"setting.":[179],"Our":[180],"results":[182,211],"show":[183],"approach":[186],"generalize":[188],"noise":[191],"conditions,":[192],"150%":[194],"relative":[195,215],"improvement":[196],"out-of-distribution":[198],"generalization":[199],"compared":[200],"minimization":[207],"principle.":[208],"Moreover,":[209],"competitive":[213],"performance":[214],"learned":[218],"sample":[222],"match":[225],"conditions":[228],"characteristic":[229],"utterances.":[232]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3206645560","counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2024-12-12T23:55:41.532495","created_date":"2021-10-25"}