{"id":"https://openalex.org/W3195185666","doi":"https://doi.org/10.1109/taslp.2021.3104193","title":"Learning Waveform-Based Acoustic Models Using Deep Variational Convolutional Neural Networks","display_name":"Learning Waveform-Based Acoustic Models Using Deep Variational Convolutional Neural Networks","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3195185666","doi":"https://doi.org/10.1109/taslp.2021.3104193","mag":"3195185666"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3104193","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/1906.09526","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061934775","display_name":"Dino Ogli\u0107","orcid":"https://orcid.org/0000-0002-4728-9644"},"institutions":[{"id":"https://openalex.org/I183935753","display_name":"King's College London","ror":"https://ror.org/0220mzb33","country_code":"GB","type":"funder","lineage":["https://openalex.org/I124357947","https://openalex.org/I183935753"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Dino Oglic","raw_affiliation_strings":["Department of Engineering, King\u2019s College London, London, U.K."],"affiliations":[{"raw_affiliation_string":"Department of Engineering, King\u2019s College London, London, U.K.","institution_ids":["https://openalex.org/I183935753"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056805951","display_name":"Zoran Cvetkovi\u0107","orcid":"https://orcid.org/0000-0002-5128-5099"},"institutions":[{"id":"https://openalex.org/I183935753","display_name":"King's College London","ror":"https://ror.org/0220mzb33","country_code":"GB","type":"funder","lineage":["https://openalex.org/I124357947","https://openalex.org/I183935753"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zoran Cvetkovic","raw_affiliation_strings":["Department of Engineering, King\u2019s College London, London, U.K."],"affiliations":[{"raw_affiliation_string":"Department of Engineering, King\u2019s College London, London, U.K.","institution_ids":["https://openalex.org/I183935753"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069770227","display_name":"Peter Sollich","orcid":"https://orcid.org/0000-0003-0169-7893"},"institutions":[{"id":"https://openalex.org/I183935753","display_name":"King's College London","ror":"https://ror.org/0220mzb33","country_code":"GB","type":"funder","lineage":["https://openalex.org/I124357947","https://openalex.org/I183935753"]},{"id":"https://openalex.org/I74656192","display_name":"University of G\u00f6ttingen","ror":"https://ror.org/01y9bpm73","country_code":"DE","type":"funder","lineage":["https://openalex.org/I74656192"]}],"countries":["DE","GB"],"is_corresponding":false,"raw_author_name":"Peter Sollich","raw_affiliation_strings":["Department of Mathematics, King\u2019s College London, London, U.K.","Institute for Theoretical Physics, University of G\u00f6ttingen, G\u00f6ttingen, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics, King\u2019s College London, London, U.K.","institution_ids":["https://openalex.org/I183935753"]},{"raw_affiliation_string":"Institute for Theoretical Physics, University of G\u00f6ttingen, G\u00f6ttingen, Germany","institution_ids":["https://openalex.org/I74656192"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.997,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":9,"citation_normalized_percentile":{"value":0.857047,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":85,"max":86},"biblio":{"volume":"29","issue":null,"first_page":"2850","last_page":"2863"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness","score":0.5010941}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6892754},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5783277},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5128198},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.50693804},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5010941},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.4964643},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48731622},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3999309},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3606469},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21086594},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3104193","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/1906.09526","pdf_url":"http://arxiv.org/pdf/1906.09526","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/1906.09526","pdf_url":"https://arxiv.org/pdf/1906.09526","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.1906.09526","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/1906.09526","pdf_url":"http://arxiv.org/pdf/1906.09526","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[{"funder":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council","award_id":"EP/R012067/1"}],"datasets":[],"versions":["https://openalex.org/W3191151211","https://openalex.org/W3195185666"],"referenced_works_count":114,"referenced_works":["https://openalex.org/W145385712","https://openalex.org/W1494192115","https://openalex.org/W1502922572","https://openalex.org/W1522301498","https://openalex.org/W1524333225","https://openalex.org/W1542280630","https://openalex.org/W1565746575","https://openalex.org/W1568183767","https://openalex.org/W1590183771","https://openalex.org/W1724524793","https://openalex.org/W1746819321","https://openalex.org/W1826234144","https://openalex.org/W1904365287","https://openalex.org/W1959608418","https://openalex.org/W1971947347","https://openalex.org/W1994906459","https://openalex.org/W1998563636","https://openalex.org/W1999974018","https://openalex.org/W2001414552","https://openalex.org/W2002342963","https://openalex.org/W2007069447","https://openalex.org/W2018586823","https://openalex.org/W2032558547","https://openalex.org/W2062989416","https://openalex.org/W2078279533","https://openalex.org/W2089624763","https://openalex.org/W2093231248","https://openalex.org/W2094438648","https://openalex.org/W2095705004","https://openalex.org/W2108677974","https://openalex.org/W2116217121","https://openalex.org/W2118020555","https://openalex.org/W2123838014","https://openalex.org/W2131548063","https://openalex.org/W2133815075","https://openalex.org/W2142416747","https://openalex.org/W2144068644","https://openalex.org/W2144908784","https://openalex.org/W2148154194","https://openalex.org/W2154833897","https://openalex.org/W2164411961","https://openalex.org/W2167270514","https://openalex.org/W2261689926","https://openalex.org/W2331927446","https://openalex.org/W2335317505","https://openalex.org/W2398826216","https://openalex.org/W2402146185","https://openalex.org/W2408093180","https://openalex.org/W2508048623","https://openalex.org/W2514741789","https://openalex.org/W2515753980","https://openalex.org/W2557283755","https://openalex.org/W2579029485","https://openalex.org/W2582199702","https://openalex.org/W2582745083","https://openalex.org/W2587210085","https://openalex.org/W2727300753","https://openalex.org/W2785397276","https://openalex.org/W2794209590","https://openalex.org/W2796892552","https://openalex.org/W2799958557","https://openalex.org/W2801179766","https://openalex.org/W2803125181","https://openalex.org/W2888909726","https://openalex.org/W2891874693","https://openalex.org/W2903382683","https://openalex.org/W2903799412","https://openalex.org/W2904818793","https://openalex.org/W2936481169","https://openalex.org/W2937814805","https://openalex.org/W2939173691","https://openalex.org/W2939776061","https://openalex.org/W2942544643","https://openalex.org/W2949117887","https://openalex.org/W2949382160","https://openalex.org/W2950604486","https://openalex.org/W2951266961","https://openalex.org/W2951595529","https://openalex.org/W2952088488","https://openalex.org/W2955174253","https://openalex.org/W2962901777","https://openalex.org/W2963071736","https://openalex.org/W2963135265","https://openalex.org/W2963175699","https://openalex.org/W2963669405","https://openalex.org/W2964094335","https://openalex.org/W2964121744","https://openalex.org/W2964227577","https://openalex.org/W2973053574","https://openalex.org/W2982427813","https://openalex.org/W2996261834","https://openalex.org/W2996383576","https://openalex.org/W3006835112","https://openalex.org/W3016005719","https://openalex.org/W3016042429","https://openalex.org/W3037932933","https://openalex.org/W3094855746","https://openalex.org/W3095947083","https://openalex.org/W3104896896","https://openalex.org/W3106050203","https://openalex.org/W35527955","https://openalex.org/W394609054","https://openalex.org/W4205130185","https://openalex.org/W4211049957","https://openalex.org/W4239353198","https://openalex.org/W4246858143","https://openalex.org/W4251742697","https://openalex.org/W4252684946","https://openalex.org/W4288347168","https://openalex.org/W4295177495","https://openalex.org/W4300223101","https://openalex.org/W4301866048","https://openalex.org/W4302613435","https://openalex.org/W4394666973"],"related_works":["https://openalex.org/W4375867731","https://openalex.org/W4312417841","https://openalex.org/W4293226380","https://openalex.org/W4226493464","https://openalex.org/W3193565141","https://openalex.org/W3167935049","https://openalex.org/W3133861977","https://openalex.org/W3103566983","https://openalex.org/W3029198973","https://openalex.org/W2951211570"],"abstract_inverted_index":{"We":[0,73,130],"investigate":[1],"the":[2,55,70,120,127,137,143,158,174,185,201],"potential":[3],"of":[4,30,61,64,100,136,152,184,213],"stochastic":[5,146],"neural":[6,52,78,139,209],"networks":[7],"for":[8,163,165,211],"learning":[9,71,212],"effective":[10,170],"waveform-based":[11,15,190],"acoustic":[12,215],"models.":[13],"The":[14,104],"setting,":[16],"inherent":[17],"to":[18,112,198],"fully":[19],"end-to-end":[20],"speech":[21,34,83],"recognition":[22,35],"systems,":[23],"is":[24],"motivated":[25],"by":[26,97,126],"several":[27],"comparative":[28],"studies":[29],"automatic":[31],"and":[32,141,192],"human":[33],"that":[36,80,194],"associate":[37],"standard":[38,108,218],"non-adaptive":[39],"feature":[40],"extraction":[41],"techniques":[42],"with":[43,217],"information":[44],"loss,":[45],"which":[46,166],"can":[47],"adversely":[48],"affect":[49],"robustness.":[50,199],"Stochastic":[51],"networks,":[53],"on":[54,132,173],"other":[56],"hand,":[57],"are":[58,95],"a":[59,75,133,181,204],"class":[60],"models":[62,216],"capable":[63],"incorporating":[65],"rich":[66],"regularization":[67],"mechanisms":[68],"into":[69,84],"process.":[72],"consider":[74],"deep":[76,207],"convolutional":[77,91,208],"network":[79,105,210],"first":[81],"decomposes":[82],"frequency":[85],"sub-bands":[86],"via":[87],"an":[88,153,169],"adaptive":[89],"parametric":[90,128],"block":[92],"where":[93],"filters":[94],"specified":[96],"cosine":[98],"modulations":[99],"compactly":[101],"supported":[102],"windows.":[103],"then":[106],"employs":[107],"non-parametric":[109],"1D":[110],"convolutions":[111],"extract":[113],"relevant":[114],"spectro-temporal":[115],"patterns":[116],"while":[117],"gradually":[118],"compressing":[119],"structured":[121],"high":[122],"dimensional":[123],"representation":[124],"generated":[125],"block.":[129],"rely":[131],"probabilistic":[134],"parametrization":[135],"proposed":[138,186,206],"architecture":[140],"learn":[142],"model":[144],"using":[145],"variational":[147],"inference.":[148],"This":[149],"requires":[150],"evaluation":[151],"analytically":[154],"intractable":[155],"integral":[156],"defining":[157],"Kullback-Leibler":[159],"divergence":[160],"term":[161],"responsible":[162],"regularization,":[164],"we":[167],"propose":[168],"approximation":[171],"based":[172],"Gauss-Hermite":[175],"quadrature.":[176],"Our":[177],"empirical":[178],"results":[179],"demonstrate":[180],"superior":[182],"performance":[183],"approach":[187,202],"over":[188],"comparable":[189],"baselines":[191],"indicate":[193],"it":[195],"could":[196],"lead":[197],"Moreover,":[200],"outperforms":[203],"recently":[205],"robust":[214],"FBANK":[219],"features.":[220]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3195185666","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1}],"updated_date":"2025-04-18T06:07:56.294595","created_date":"2021-08-30"}