{"id":"https://openalex.org/W4291238569","doi":"https://doi.org/10.48550/arxiv.2208.05830","title":"Speech Enhancement and Dereverberation with Diffusion-based Generative Models","display_name":"Speech Enhancement and Dereverberation with Diffusion-based Generative Models","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4291238569","doi":"https://doi.org/10.48550/arxiv.2208.05830"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2208.05830","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2208.05830","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087017732","display_name":"Julius Richter","orcid":"https://orcid.org/0000-0002-7870-4839"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Richter, Julius","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037310428","display_name":"Simon Welker","orcid":"https://orcid.org/0000-0002-6349-8462"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Welker, Simon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006784595","display_name":"Jean-Marie Lemercier","orcid":"https://orcid.org/0000-0002-8704-7658"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lemercier, Jean-Marie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027458841","display_name":"Bunlong Lay","orcid":"https://orcid.org/0000-0002-0847-7896"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lay, Bunlong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5087022569","display_name":"Timo Gerkmann","orcid":"https://orcid.org/0000-0002-8678-4699"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gerkmann, Timo","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.700951,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":70,"max":76},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10822","display_name":"Acoustic Wave Phenomena Research","score":0.986,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9813,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6936359},{"id":"https://openalex.org/keywords/formalism","display_name":"Formalism (music)","score":0.42520452}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7831209},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.69761735},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6936359},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5480086},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5298607},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.48611617},{"id":"https://openalex.org/C73301696","wikidata":"https://www.wikidata.org/wiki/Q5469984","display_name":"Formalism (music)","level":3,"score":0.42520452},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.41621327},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30144298},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.2831416},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14525002},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2208.05830","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2208.05830","pdf_url":"http://arxiv.org/pdf/2208.05830","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2208.05830","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2208.05830","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.66,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4389116644","https://openalex.org/W4231424160","https://openalex.org/W4205463238","https://openalex.org/W3103844505","https://openalex.org/W3096184950","https://openalex.org/W259157601","https://openalex.org/W2275432853","https://openalex.org/W2153315159","https://openalex.org/W197907117","https://openalex.org/W1482209366"],"abstract_inverted_index":{"In":[0,137],"this":[1,92],"work,":[2],"we":[3,50,111,142,215],"build":[4],"upon":[5],"our":[6,73,134,186],"previous":[7],"publication":[8],"and":[9,34,68,154,180,207,226,236],"use":[10],"diffusion-based":[11],"generative":[12],"models":[13,153],"for":[14,166,196,224],"speech":[15,67,80,83,104,118],"enhancement.":[16],"We":[17,89,168],"present":[18],"a":[19,30,63,86,161,181],"detailed":[20],"overview":[21],"of":[22,41,65,133,210],"the":[23,54,108,117,123,127,130,145,170,198,205,211,218],"diffusion":[24,98],"process":[25,56,75,200],"that":[26,91,122,144,217],"is":[27,189,221],"based":[28],"on":[29,160],"stochastic":[31],"differential":[32],"equation":[33],"delve":[35],"into":[36],"an":[37,138,173],"extensive":[38,139],"theoretical":[39],"examination":[40],"its":[42],"implications.":[43],"Opposed":[44],"to":[45,81,100,114,203,230],"usual":[46],"conditional":[47],"generation":[48],"tasks,":[49],"do":[51],"not":[52,228],"start":[53],"reverse":[55,199],"from":[57,62,78],"pure":[58],"Gaussian":[59,69],"noise":[60,233],"but":[61],"mixture":[64],"noisy":[66,82,178],"noise.":[70],"This":[71],"matches":[72],"forward":[74],"which":[76,185],"moves":[77],"clean":[79,103],"by":[84],"including":[85],"drift":[87],"term.":[88],"show":[90,143,216],"procedure":[93],"enables":[94],"using":[95,176],"only":[96],"30":[97],"steps":[99],"generate":[101],"high-quality":[102],"estimates.":[105],"By":[106],"adapting":[107],"network":[109],"architecture,":[110],"are":[112,239],"able":[113],"significantly":[115],"improve":[116],"enhancement":[119],"performance,":[120],"indicating":[121],"network,":[124],"rather":[125],"than":[126,164],"formalism,":[128],"was":[129],"main":[131],"limitation":[132],"original":[135],"approach.":[136],"cross-dataset":[140],"evaluation,":[141],"improved":[146],"method":[147,188,220],"can":[148],"compete":[149],"with":[150,172],"recent":[151],"discriminative":[152],"achieves":[155],"better":[156],"generalization":[157],"when":[158],"evaluating":[159],"different":[162,193],"corpus":[163],"used":[165],"training.":[167],"complement":[169],"results":[171],"instrumental":[174],"evaluation":[175],"real-world":[177],"recordings":[179],"listening":[182],"experiment,":[183],"in":[184],"proposed":[187,212,219],"rated":[190],"best.":[191],"Examining":[192],"sampler":[194],"configurations":[195],"solving":[197],"allows":[201],"us":[202],"balance":[204],"performance":[206],"computational":[208],"speed":[209],"method.":[213],"Moreover,":[214],"also":[222],"suitable":[223],"dereverberation":[225],"thus":[227],"limited":[229],"additive":[231],"background":[232],"removal.":[234],"Code":[235],"audio":[237],"examples":[238],"available":[240],"online,":[241],"see":[242],"https://github.com/sp-uhh/sgmse":[243]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4291238569","counts_by_year":[{"year":2023,"cited_by_count":2}],"updated_date":"2024-12-17T08:42:23.511642","created_date":"2022-08-13"}