{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T14:05:44Z","timestamp":1730297144146,"version":"3.28.0"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T00:00:00Z","timestamp":1673222400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T00:00:00Z","timestamp":1673222400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,1,9]]},"DOI":"10.1109\/slt54892.2023.10022488","type":"proceedings-article","created":{"date-parts":[[2023,1,27]],"date-time":"2023-01-27T13:54:03Z","timestamp":1674827643000},"page":"436-443","source":"Crossref","is-referenced-by-count":1,"title":["Spatial-DCCRN: DCCRN Equipped with Frame-Level Angle Feature and Hybrid Filtering for Multi-Channel Speech Enhancement"],"prefix":"10.1109","author":[{"given":"Shubo","family":"LV","sequence":"first","affiliation":[{"name":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU),Xi'an,China"}]},{"given":"Yihui","family":"Fu","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU),Xi'an,China"}]},{"given":"Yukai","family":"JV","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU),Xi'an,China"}]},{"given":"Lei","family":"Xie","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU),Xi'an,China"}]},{"given":"Weixin","family":"Zhu","sequence":"additional","affiliation":[{"name":"Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China"}]},{"given":"Wei","family":"Rao","sequence":"additional","affiliation":[{"name":"Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China"}]},{"given":"Yannan","family":"Wang","sequence":"additional","affiliation":[{"name":"Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1457"},{"key":"ref12","first-page":"260","article-title":"Fasnet: Low-latency adaptive beam-forming for multi-microphone audio processing","author":"luo","year":"2019","journal-title":"Proc ASRU"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2585878"},{"key":"ref15","first-page":"arxiv","article-title":"S-dccrn: Super wide band dccrn with learnable complex feature for speech enhancement","author":"lv","year":"2021","journal-title":"ArXiv e-prints"},{"key":"ref14","article-title":"End-to-end multi-channel speech separation","author":"gu","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref31","first-page":"776","article-title":"Audio set: An on-tology and human-labeled dataset for audio events","author":"gemmeke","year":"2017","journal-title":"Proc ICASSP"},{"key":"ref30","article-title":"Mu-san: A music, speech, and noise corpus","author":"snyder","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref11","article-title":"Embedding and beamforming: All-neural causal beamformer for multichannel speech enhance-ment","author":"li","year":"2021","journal-title":"ArXiv Preprint"},{"journal-title":"International Telecommunication Union Geneva","article-title":"P. 862. 2: Wideband extension to rec-ommendation p. 862 for the assessment of wideband telephone networks and speech codecs","year":"2005","key":"ref33"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404829"},{"key":"ref32","first-page":"12449","article-title":"wav2vec 2. 0: A framework for self-supervised learning of speech representations","volume":"33","author":"baevski","year":"2020","journal-title":"Ad-vances in Neural Information Processing Systems"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-22482-4_11"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2842159"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1482"},{"key":"ref16","first-page":"6629","article-title":"Densely con-nected neural network with dilated convolutions for real-time speech enhancement in the time domain","author":"pandey","year":"2020","journal-title":"Proc ICASSP"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2915167"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2021.108499"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP52302.2021.9596418"},{"key":"ref26","article-title":"Simultaneous measurement of impulse response and distortion with a swept-sine technique","author":"farina","year":"2000","journal-title":"Audio engineering society convention 108 Audio Engi-neering Society"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3133208"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6489"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1673"},{"key":"ref21","first-page":"4214","article-title":"A short-time objective intelligibility measure for time-frequency weighted noisy speech","author":"taal","year":"2010","journal-title":"Proc ICASSP"},{"key":"ref28","article-title":"Aishell-3: A multi-speaker mandarin tts corpus and the baselines","author":"shi","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICSDA.2017.8384449"},{"journal-title":"Cstr vctk corpus English multi-speaker cor-pus for cstr voice cloning toolkit (version 0 92)","year":"2019","author":"yamagishi","key":"ref29"},{"key":"ref8","article-title":"Interspeech 2021 conferencingspeech challenge: Towards far-field multi-channel speech enhancement for video conferencing","author":"rao","year":"2021","journal-title":"Proc INTERSPEECH"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746872"},{"key":"ref9","first-page":"26","article-title":"A study of learning based beamforming methods for speech recognition","author":"xiao","year":"2016","journal-title":"CHiME 2016 workshop"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2266"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053833"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383604"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2881912"}],"event":{"name":"2022 IEEE Spoken Language Technology Workshop (SLT)","start":{"date-parts":[[2023,1,9]]},"location":"Doha, Qatar","end":{"date-parts":[[2023,1,12]]}},"container-title":["2022 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10022052\/10022330\/10022488.pdf?arnumber=10022488","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,20]],"date-time":"2023-02-20T17:08:34Z","timestamp":1676912914000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10022488\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1,9]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/slt54892.2023.10022488","relation":{},"subject":[],"published":{"date-parts":[[2023,1,9]]}}}