{"id":"https://openalex.org/W4372270323","doi":"https://doi.org/10.1109/icassp49357.2023.10096304","title":"Improving Speech Enhancement via Event-Based Query","display_name":"Improving Speech Enhancement via Event-Based Query","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372270323","doi":"https://doi.org/10.1109/icassp49357.2023.10096304"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096304","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.1109/icassp49357.2023.10096304","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063679203","display_name":"Yifei Xin","orcid":"https://orcid.org/0000-0001-7792-6352"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yifei Xin","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024977056","display_name":"Xiulian Peng","orcid":"https://orcid.org/0000-0001-8213-4878"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiulian Peng","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100756584","display_name":"Yan Lu","orcid":"https://orcid.org/0000-0001-5383-6424"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Lu","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.413,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.601087,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":84,"max":88},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8311384},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.65189373},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5833373},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.5751059},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.56578124},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.5234962},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.46744597},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.43583283},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41595286},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.13406211},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096304","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2302.11558","pdf_url":"https://arxiv.org/pdf/2302.11558","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096304","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.57,"display_name":"Reduced inequalities"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":24,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1552314771","https://openalex.org/W2067191022","https://openalex.org/W2094721231","https://openalex.org/W2133564696","https://openalex.org/W2144404214","https://openalex.org/W2593116425","https://openalex.org/W2962866211","https://openalex.org/W2973016613","https://openalex.org/W3094550259","https://openalex.org/W3096408984","https://openalex.org/W3113290170","https://openalex.org/W3138516171","https://openalex.org/W3160971452","https://openalex.org/W3161950572","https://openalex.org/W3185109982","https://openalex.org/W3196974791","https://openalex.org/W3198543387","https://openalex.org/W4225302959","https://openalex.org/W4225905067","https://openalex.org/W4226442948","https://openalex.org/W4283803134","https://openalex.org/W4289242435","https://openalex.org/W4297841486"],"related_works":["https://openalex.org/W4252682934","https://openalex.org/W3110551121","https://openalex.org/W2955597484","https://openalex.org/W2747006289","https://openalex.org/W2653598178","https://openalex.org/W2294333436","https://openalex.org/W2120771489","https://openalex.org/W2089240210","https://openalex.org/W2072884270","https://openalex.org/W2051376034"],"abstract_inverted_index":{"Existing":[0],"deep":[1],"learning":[2],"based":[3],"speech":[4,27,34,52,57,78,84,90,99],"enhancement":[5],"(SE)":[6],"methods":[7],"either":[8],"use":[9],"blind":[10],"end-to-end":[11],"training":[12],"or":[13,18],"explicitly":[14],"incorporate":[15],"speaker":[16],"embedding":[17],"phonetic":[19],"information":[20],"into":[21,75],"the":[22,64,94,131,141],"SE":[23,95,109],"network":[24],"to":[25,92,107,147],"enhance":[26],"quality.":[28],"In":[29],"this":[30],"paper,":[31],"we":[32],"perceive":[33],"and":[35,43,105,111,119,140],"noises":[36,59],"as":[37],"different":[38,108,148],"types":[39],"of":[40],"sound":[41,65],"events":[42],"propose":[44],"an":[45],"event-based":[46],"query":[47],"method":[48,133],"for":[49,124],"SE.":[50],"Specifically,":[51],"embeddings":[53,71],"that":[54,130],"can":[55,101],"discriminate":[56],"from":[58],"are":[60,72,144],"first":[61],"pre-trained":[62],"with":[63,138],"event":[66],"detection":[67],"(SED)":[68],"task.":[69],"The":[70,97],"then":[73],"clustered":[74],"fixed":[76],"golden":[77,98,142],"queries,":[79],"i.e.,":[80],"general":[81],"but":[82],"representative":[83],"embeddings,":[85],"on":[86],"a":[87],"diverse":[88],"clean":[89],"dataset":[91],"assist":[93],"network.":[96],"queries":[100,143],"be":[102],"obtained":[103],"offline":[104],"generalizable":[106],"datasets":[110],"networks.":[112],"Therefore,":[113],"little":[114],"extra":[115],"complexity":[116],"is":[117,122],"introduced":[118],"no":[120],"enrollment":[121],"needed":[123],"each":[125],"speaker.":[126],"Experimental":[127],"results":[128],"show":[129],"proposed":[132],"yields":[134],"significant":[135],"gains":[136],"compared":[137],"baselines":[139],"well":[145],"generalized":[146],"datasets.":[149]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4372270323","counts_by_year":[{"year":2023,"cited_by_count":3}],"updated_date":"2024-12-17T08:28:30.980867","created_date":"2023-05-07"}