{"id":"https://openalex.org/W3206315738","doi":"https://doi.org/10.1109/taslp.2022.3140556","title":"Stepwise-Refining Speech Separation Network via Fine-Grained Encoding in High-Order Latent Domain","display_name":"Stepwise-Refining Speech Separation Network via Fine-Grained Encoding in High-Order Latent Domain","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W3206315738","doi":"https://doi.org/10.1109/taslp.2022.3140556","mag":"3206315738"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3140556","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2110.04791","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066643565","display_name":"Zengwei Yao","orcid":"https://orcid.org/0000-0002-2331-2387"},"institutions":[{"id":"https://openalex.org/I158809036","display_name":"Shenzhen Institute of Information Technology","ror":"https://ror.org/03wrf9427","country_code":"CN","type":"education","lineage":["https://openalex.org/I158809036"]},{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"funder","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zengwei Yao","raw_affiliation_strings":["Department of Computer Science, Harbin Institute of Technology at Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Harbin Institute of Technology at Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I158809036","https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078487642","display_name":"Wenjie Pei","orcid":"https://orcid.org/0000-0001-8117-2696"},"institutions":[{"id":"https://openalex.org/I158809036","display_name":"Shenzhen Institute of Information Technology","ror":"https://ror.org/03wrf9427","country_code":"CN","type":"education","lineage":["https://openalex.org/I158809036"]},{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"funder","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenjie Pei","raw_affiliation_strings":["Department of Computer Science, Harbin Institute of Technology at Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Harbin Institute of Technology at Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I158809036","https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101523298","display_name":"Fanglin Chen","orcid":"https://orcid.org/0000-0002-9193-5412"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"funder","lineage":["https://openalex.org/I204983213"]},{"id":"https://openalex.org/I158809036","display_name":"Shenzhen Institute of Information Technology","ror":"https://ror.org/03wrf9427","country_code":"CN","type":"education","lineage":["https://openalex.org/I158809036"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fanglin Chen","raw_affiliation_strings":["Department of Computer Science, Harbin Institute of Technology at Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Harbin Institute of Technology at Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I204983213","https://openalex.org/I158809036"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030843117","display_name":"Guangming Lu","orcid":"https://orcid.org/0000-0003-1578-2634"},"institutions":[{"id":"https://openalex.org/I158809036","display_name":"Shenzhen Institute of Information Technology","ror":"https://ror.org/03wrf9427","country_code":"CN","type":"education","lineage":["https://openalex.org/I158809036"]},{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"funder","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangming Lu","raw_affiliation_strings":["Department of Computer Science, Harbin Institute of Technology at Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Harbin Institute of Technology at Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I158809036","https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100325058","display_name":"David Zhang","orcid":"https://orcid.org/0000-0002-5027-5286"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"David Zhang","raw_affiliation_strings":["School of Science and Engineering, The Chinese University of Hong Kong at Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Science and Engineering, The Chinese University of Hong Kong at Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.115,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":6,"citation_normalized_percentile":{"value":0.800254,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":84,"max":86},"biblio":{"volume":"30","issue":null,"first_page":"378","last_page":"393"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.999,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/separation","display_name":"Separation (statistics)","score":0.52270615},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.4410111},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source Separation","score":0.42213097}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.659983},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.62146664},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.6107575},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.52270615},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.48505726},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.45324647},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.4410111},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.42213097},{"id":"https://openalex.org/C19118579","wikidata":"https://www.wikidata.org/wiki/Q786423","display_name":"Frequency domain","level":2,"score":0.42070535},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3985523},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.21161419},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.19817626},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1419738},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3140556","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2110.04791","pdf_url":"https://arxiv.org/pdf/2110.04791","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2110.04791","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2110.04791","pdf_url":"https://arxiv.org/pdf/2110.04791","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.69,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, justice, and strong institutions"}],"grants":[{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"62006060"},{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"U2013210"},{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"62176077"},{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"62002085"},{"funder":"https://openalex.org/F4320335803","funder_display_name":"Shenzhen Fundamental Research and Discipline Layout project","award_id":"JCYJ20210324132210025"},{"funder":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","award_id":"2019Bl515120055"},{"funder":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","award_id":"2021A1515012528"}],"datasets":[],"versions":["https://openalex.org/W3206315738"],"referenced_works_count":56,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1522301498","https://openalex.org/W1524333225","https://openalex.org/W1599512239","https://openalex.org/W1677182931","https://openalex.org/W1901129140","https://openalex.org/W2020997493","https://openalex.org/W2026369565","https://openalex.org/W2064675550","https://openalex.org/W2120847449","https://openalex.org/W2127851351","https://openalex.org/W2147768505","https://openalex.org/W2163605009","https://openalex.org/W2191779130","https://openalex.org/W2221409856","https://openalex.org/W2460742184","https://openalex.org/W2519091744","https://openalex.org/W2561557072","https://openalex.org/W2618530766","https://openalex.org/W2734774145","https://openalex.org/W2735663686","https://openalex.org/W2889540509","https://openalex.org/W2890820256","https://openalex.org/W2891405874","https://openalex.org/W2949382160","https://openalex.org/W2952218014","https://openalex.org/W2962715207","https://openalex.org/W2962788625","https://openalex.org/W2962866211","https://openalex.org/W2962935966","https://openalex.org/W2963403868","https://openalex.org/W2964058413","https://openalex.org/W2970971581","https://openalex.org/W2972460025","https://openalex.org/W2972541922","https://openalex.org/W2981436548","https://openalex.org/W2990666817","https://openalex.org/W2996969697","https://openalex.org/W2998657200","https://openalex.org/W3004940340","https://openalex.org/W3008880747","https://openalex.org/W3010851250","https://openalex.org/W3015191643","https://openalex.org/W3015199127","https://openalex.org/W3016066622","https://openalex.org/W3027008958","https://openalex.org/W3035268204","https://openalex.org/W3094280184","https://openalex.org/W3094607766","https://openalex.org/W3096893582","https://openalex.org/W3099330747","https://openalex.org/W3102190437","https://openalex.org/W3124972797","https://openalex.org/W3163652268","https://openalex.org/W3185109982","https://openalex.org/W4233392025"],"related_works":["https://openalex.org/W4385464961","https://openalex.org/W4308092240","https://openalex.org/W4292513318","https://openalex.org/W320684304","https://openalex.org/W2287611352","https://openalex.org/W2077498359","https://openalex.org/W2071676784","https://openalex.org/W2060903012","https://openalex.org/W2021161555","https://openalex.org/W1509352139"],"abstract_inverted_index":{"The":[0],"crux":[1],"of":[2,12,67,156,191,223,237],"single-channel":[3],"speech":[4,33,38,77,102,185,199,224,228,238],"separation":[5,34,45,119,138,179,200,239],"is":[6],"how":[7],"to":[8,43,48,94,128,161,175,180,233],"encode":[9],"the":[10,21,37,64,82,109,140,144,157,168,189,235],"mixture":[11],"signals":[13,22,39,229],"into":[14,40],"such":[15],"a":[16,50,56,96,117,124,136,148,163,177,182,202],"latent":[17,57,89,126,150,159,165],"embedding":[18,52,71,83],"space":[19,53,72,84,100,132],"that":[20,81],"from":[23],"different":[24],"speakers":[25],"can":[26],"be":[27],"precisely":[28],"separated.":[29],"Existing":[30],"methods":[31,68],"for":[32,76,101],"either":[35],"transform":[36],"frequency":[41],"domain":[42,58,90,127,151,160,166],"perform":[44,176,221],"or":[46],"seek":[47],"learn":[49],"separable":[51,98],"by":[54,86,194,230],"constructing":[55],"based":[59],"on":[60,206,215,226],"convolutional":[61],"filters.":[62],"While":[63],"latter":[65],"type":[66],"learning":[69],"an":[70,130],"achieves":[73],"substantial":[74],"improvement":[75],"separation,":[78],"we":[79,107,219],"argue":[80],"defined":[85],"only":[87],"one":[88],"does":[91],"not":[92],"suffice":[93],"provide":[95],"thoroughly":[97],"encoding":[99,131],"separation.":[103,186],"In":[104],"this":[105],"paper,":[106],"propose":[108],"Stepwise-Refining":[110],"Speech":[111],"Separation":[112],"Network":[113],"(SRSSN),":[114],"which":[115,171],"follows":[116],"coarse-to-fine":[118],"framework.":[120],"It":[121],"first":[122],"learns":[123,147],"1-order":[125],"define":[129],"and":[133],"thereby":[134],"performs":[135],"rough":[137],"in":[139,167,201,212],"coarse":[141],"phase.":[142],"Then":[143],"proposed":[145],"SRSSN":[146,193],"new":[149],"along":[152],"each":[153],"basis":[154],"function":[155],"existing":[158],"obtain":[162],"high-order":[164],"refining":[169,178],"phase,":[170],"enables":[172],"our":[173,192,231],"model":[174,232],"achieve":[181],"more":[183],"precise":[184],"We":[187],"demonstrate":[188],"effectiveness":[190],"conducting":[195],"extensive":[196],"experiments,":[197],"including":[198],"clean":[203],"(noise-free)":[204],"setting":[205],"WSJ0-2/3mix":[207],"datasets":[208],"as":[209,211],"well":[210],"noisy/reverberant":[213],"settings":[214],"WHAM!/WHAMR!":[216],"datasets.":[217],"Furthermore,":[218],"also":[220],"experiments":[222],"recognition":[225],"separated":[227],"evaluate":[234],"performance":[236],"indirectly.":[240]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3206315738","counts_by_year":[{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":1}],"updated_date":"2025-04-24T01:11:12.302638","created_date":"2021-10-25"}