{"id":"https://openalex.org/W4380434618","doi":"https://doi.org/10.1109/taslp.2023.3285241","title":"Speech Enhancement and Dereverberation With Diffusion-Based Generative Models","display_name":"Speech Enhancement and Dereverberation With Diffusion-Based Generative Models","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4380434618","doi":"https://doi.org/10.1109/taslp.2023.3285241"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3285241","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2208.05830","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087017732","display_name":"Julius Richter","orcid":"https://orcid.org/0000-0002-7870-4839"},"institutions":[],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Julius Richter","raw_affiliation_strings":["Signal Processing Group, Department of Informatics, Universität Hamburg, Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Signal Processing Group, Department of Informatics, Universität Hamburg, Hamburg, Germany","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037310428","display_name":"Simon Welker","orcid":"https://orcid.org/0000-0002-6349-8462"},"institutions":[],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Simon Welker","raw_affiliation_strings":["Signal Processing Group, Department of Informatics, Universität Hamburg, Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Signal Processing Group, Department of Informatics, Universität Hamburg, Hamburg, Germany","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006784595","display_name":"Jean-Marie Lemercier","orcid":"https://orcid.org/0000-0002-8704-7658"},"institutions":[],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jean-Marie Lemercier","raw_affiliation_strings":["Signal Processing Group, Department of Informatics, Universität Hamburg, Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Signal Processing Group, Department of Informatics, Universität Hamburg, Hamburg, Germany","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027458841","display_name":"Bunlong Lay","orcid":"https://orcid.org/0000-0002-0847-7896"},"institutions":[],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Bunlong Lay","raw_affiliation_strings":["Signal Processing Group, Department of Informatics, Universität Hamburg, Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Signal Processing Group, Department of Informatics, Universität Hamburg, Hamburg, Germany","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087022569","display_name":"Timo Gerkmann","orcid":"https://orcid.org/0000-0002-8678-4699"},"institutions":[],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Timo Gerkmann","raw_affiliation_strings":["Signal Processing Group, Department of Informatics, Universität Hamburg, Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Signal Processing Group, Department of Informatics, Universität Hamburg, Hamburg, Germany","institution_ids":[]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":32.495,"has_fulltext":false,"cited_by_count":77,"citation_normalized_percentile":{"value":0.999802,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"31","issue":null,"first_page":"2351","last_page":"2364"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9975,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9957,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.686748},{"id":"https://openalex.org/keywords/formalism","display_name":"Formalism (music)","score":0.4477468}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.77023005},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.686748},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.6428287},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5422306},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5135937},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.49147022},{"id":"https://openalex.org/C73301696","wikidata":"https://www.wikidata.org/wiki/Q5469984","display_name":"Formalism (music)","level":3,"score":0.4477468},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.41562667},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3112136},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.26698017},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15966871},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3285241","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2208.05830","pdf_url":"http://arxiv.org/pdf/2208.05830","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2208.05830","pdf_url":"http://arxiv.org/pdf/2208.05830","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false},"sustainable_development_goals":[{"score":0.65,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"grants":[{"funder":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft","award_id":null}],"datasets":[],"versions":[],"referenced_works_count":78,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1552314771","https://openalex.org/W1901129140","https://openalex.org/W1983108229","https://openalex.org/W1991111872","https://openalex.org/W2012770786","https://openalex.org/W2013035813","https://openalex.org/W2109744426","https://openalex.org/W2129069237","https://openalex.org/W2144144709","https://openalex.org/W2147166770","https://openalex.org/W2289394825","https://openalex.org/W2291877678","https://openalex.org/W2516001803","https://openalex.org/W2603567530","https://openalex.org/W2606943906","https://openalex.org/W2763188033","https://openalex.org/W2765833400","https://openalex.org/W2766672686","https://openalex.org/W2774389566","https://openalex.org/W2791686384","https://openalex.org/W2883322837","https://openalex.org/W2885308148","https://openalex.org/W2913314773","https://openalex.org/W2949756029","https://openalex.org/W2952716587","https://openalex.org/W2959300817","https://openalex.org/W2962866211","https://openalex.org/W2963341071","https://openalex.org/W2963828919","https://openalex.org/W2964058413","https://openalex.org/W2972436155","https://openalex.org/W3004970274","https://openalex.org/W3011982609","https://openalex.org/W3025844872","https://openalex.org/W3026111682","https://openalex.org/W3035574324","https://openalex.org/W3036167779","https://openalex.org/W3036601975","https://openalex.org/W3093931768","https://openalex.org/W3097549261","https://openalex.org/W3097627357","https://openalex.org/W3097756030","https://openalex.org/W3097906045","https://openalex.org/W3099330747","https://openalex.org/W3105013723","https://openalex.org/W3110257065","https://openalex.org/W3123097577","https://openalex.org/W3130335839","https://openalex.org/W3131332223","https://openalex.org/W3160567113","https://openalex.org/W3162926177","https://openalex.org/W3169386841","https://openalex.org/W3174264304","https://openalex.org/W3184410885","https://openalex.org/W3191448984","https://openalex.org/W3197912330","https://openalex.org/W3207551191","https://openalex.org/W3213188934","https://openalex.org/W3217536461","https://openalex.org/W4221143458","https://openalex.org/W4221144097","https://openalex.org/W4221145205","https://openalex.org/W4225302959","https://openalex.org/W4225566824","https://openalex.org/W4226021997","https://openalex.org/W4281820413","https://openalex.org/W4281969232","https://openalex.org/W4286850199","https://openalex.org/W4288574863","https://openalex.org/W4297841790","https://openalex.org/W4298289240","https://openalex.org/W4307199125","https://openalex.org/W4372268522","https://openalex.org/W4372341094","https://openalex.org/W4385245566","https://openalex.org/W4391602018","https://openalex.org/W4393689014"],"related_works":["https://openalex.org/W4389116644","https://openalex.org/W4231424160","https://openalex.org/W4205463238","https://openalex.org/W3103844505","https://openalex.org/W3096184950","https://openalex.org/W2965546495","https://openalex.org/W2761785940","https://openalex.org/W259157601","https://openalex.org/W2153315159","https://openalex.org/W2110523656"],"abstract_inverted_index":{"In":[0,137],"this":[1,92],"work,":[2],"we":[3,50,111,142,215],"build":[4],"upon":[5],"our":[6,73,134,186],"previous":[7],"publication":[8],"and":[9,34,68,154,180,207,226,236],"use":[10],"diffusion-based":[11],"generative":[12],"models":[13,153],"for":[14,166,196,224],"speech":[15,67,80,83,104,118],"enhancement.":[16],"We":[17,89,168],"present":[18],"a":[19,30,63,86,161,181],"detailed":[20],"overview":[21],"of":[22,41,65,133,210],"the":[23,54,108,117,123,127,130,145,170,198,205,211,218],"diffusion":[24,98],"process":[25,56,75,200],"that":[26,91,122,144,217],"is":[27,189,221],"based":[28],"on":[29,160],"stochastic":[31],"differential":[32],"equation":[33],"delve":[35],"into":[36],"an":[37,138,173],"extensive":[38,139],"theoretical":[39],"examination":[40],"its":[42],"implications.":[43],"Opposed":[44],"to":[45,81,100,114,203,230],"usual":[46],"conditional":[47],"generation":[48],"tasks,":[49],"do":[51],"not":[52,228],"start":[53],"reverse":[55,199],"from":[57,62,78],"pure":[58],"Gaussian":[59,69],"noise":[60,233],"but":[61],"mixture":[64],"noisy":[66,82,178],"noise.":[70],"This":[71],"matches":[72],"forward":[74],"which":[76,185],"moves":[77],"clean":[79,103],"by":[84],"including":[85],"drift":[87],"term.":[88],"show":[90,143,216],"procedure":[93],"enables":[94],"using":[95,176],"only":[96],"30":[97],"steps":[99],"generate":[101],"high-quality":[102],"estimates.":[105],"By":[106],"adapting":[107],"network":[109],"architecture,":[110],"are":[112,239],"able":[113],"significantly":[115],"improve":[116],"enhancement":[119],"performance,":[120],"indicating":[121],"network,":[124],"rather":[125],"than":[126,164],"formalism,":[128],"was":[129],"main":[131],"limitation":[132],"original":[135],"approach.":[136],"cross-dataset":[140],"evaluation,":[141],"improved":[146],"method":[147,188,220],"can":[148],"compete":[149],"with":[150,172],"recent":[151],"discriminative":[152],"achieves":[155],"better":[156],"generalization":[157],"when":[158],"evaluating":[159],"different":[162,193],"corpus":[163],"used":[165],"training.":[167],"complement":[169],"results":[171],"instrumental":[174],"evaluation":[175],"real-world":[177],"recordings":[179],"listening":[182],"experiment,":[183],"in":[184],"proposed":[187,212,219],"rated":[190],"best.":[191],"Examining":[192],"sampler":[194],"configurations":[195],"solving":[197],"allows":[201],"us":[202],"balance":[204],"performance":[206],"computational":[208],"speed":[209],"method.":[213],"Moreover,":[214],"also":[222],"suitable":[223],"dereverberation":[225],"thus":[227],"limited":[229],"additive":[231],"background":[232],"removal.":[234],"Code":[235],"audio":[237],"examples":[238],"available":[240],"online":[241],"1":[244],"https://github.com/sp-uhh/sgmse":[247],".":[248]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4380434618","counts_by_year":[{"year":2024,"cited_by_count":49},{"year":2023,"cited_by_count":24},{"year":2022,"cited_by_count":4}],"updated_date":"2024-12-31T00:12:22.581802","created_date":"2023-06-14"}