{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T19:48:54Z","timestamp":1730231334710,"version":"3.28.0"},"reference-count":46,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,4]],"date-time":"2023-06-04T00:00:00Z","timestamp":1685836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,4]],"date-time":"2023-06-04T00:00:00Z","timestamp":1685836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6,4]]},"DOI":"10.1109\/icasspw59220.2023.10193575","type":"proceedings-article","created":{"date-parts":[[2023,8,2]],"date-time":"2023-08-02T17:30:54Z","timestamp":1690997454000},"page":"1-5","source":"Crossref","is-referenced-by-count":0,"title":["Pa\u15e7-HuBERT: Self-Supervised Music Source Separation Via Primitive Auditory Clustering And Hidden-Unit Bert"],"prefix":"10.1109","author":[{"given":"Ke","family":"Chen","sequence":"first","affiliation":[{"name":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA"}]},{"given":"Gordon","family":"Wichern","sequence":"additional","affiliation":[{"name":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA"}]},{"given":"Fran\u00e7ois G.","family":"Germain","sequence":"additional","affiliation":[{"name":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA"}]},{"given":"Jonathan","family":"Le Roux","sequence":"additional","affiliation":[{"name":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475576"},{"key":"ref35","article-title":"Music source separation in the waveform domain","volume":"abs 1911 13254","author":"d\u00e9fossez","year":"2019","journal-title":"CoRR"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747669"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21105\/joss.02154"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2021.3067635"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9052942"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414405"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref31","first-page":"342","article-title":"Decoupling magnitude and phase estimation with deep resunet for music source separation","author":"kong","year":"2021","journal-title":"Proc ISMIR"},{"key":"ref30","first-page":"1","article-title":"KUIELab-MDXNet: A two-stream neural network for music demixing","author":"kim","year":"2021","journal-title":"Proc MDX Workshop"},{"key":"ref11","first-page":"125","article-title":"HEAR: Holistic evaluation of audio representations","author":"turian","year":"2022","journal-title":"Proc 2021 Compet Demonstrations Track (NeurIPS)"},{"key":"ref33","article-title":"Hybrid transformers for music source separation","author":"rouard","year":"2022","journal-title":"arXiv preprint arXiv 2211 08553"},{"key":"ref10","article-title":"BEATs: Audio pre-training with acoustic tokenizers","author":"chen","year":"2022","journal-title":"arXiv preprint arXiv 2212 09058"},{"key":"ref32","article-title":"Music source separation with band-split RNN","volume":"abs 2209 15174","author":"luo","year":"2022","journal-title":"CoRR"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2019.2918706"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.3389\/frsip.2021.808395"},{"key":"ref17","first-page":"451","article-title":"Exploring WavLM on speech enhancement","author":"song","year":"2022","journal-title":"Proc SLT"},{"key":"ref39","article-title":"Separation of a monaural audio signal into harmonic\/percussive components by complementary diffusion on spectrogram","author":"ono","year":"2008","journal-title":"Proc EUSIPCO"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"ref38","article-title":"The Northwestern University source separation library","author":"manilow","year":"2018","journal-title":"Proc ISMIR"},{"key":"ref19","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc NeurIPS"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.23919\/APSIPAASC55919.2022.9980218"},{"key":"ref24","article-title":"Empirical derivation of acoustic grouping cues from natural sound statistics","author":"mcdermott","year":"2011","journal-title":"Proc Assoc Res Otolaryngol Annu Meet"},{"key":"ref46","first-page":"293","article-title":"The 2018 signal separation evaluation campaign","author":"st\u00f6ter","year":"2018","journal-title":"Proc LVA\/ICA"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2012.2188515"},{"key":"ref45","article-title":"D3Net: Densely connected multidilated densenet for music source separation","author":"takahashi","year":"2021","journal-title":"arXiv preprint arXiv 2211 08553"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1873"},{"key":"ref25","article-title":"Bootstrapping unsupervised deep music separation from primitive auditory grouping principles","author":"seetharaman","year":"2020","journal-title":"Proc Workshop Self-Superv Audio Speech"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2017.8169990"},{"key":"ref42","first-page":"4171","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"0","journal-title":"Proc NAACL-HLT 2019"},{"key":"ref41","first-page":"316","article-title":"FMA: A dataset for music analysis","author":"defferrard","year":"2017","journal-title":"Proc ISMIR"},{"key":"ref22","first-page":"611","article-title":"Extending harmonic-percussive separation of audio signals","author":"driedger","year":"2014","journal-title":"Proc ISMIR"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.21105\/joss.01667"},{"key":"ref21","first-page":"71","article-title":"Repeating pattern extraction technique (REPET): A simple method for music\/voice separation","volume":"21","author":"rafii","year":"2013","journal-title":"IEEE Trans Speech Audio Process"},{"key":"ref43","article-title":"Accurate, large minibatch SGD: Training ImageNet in 1 hour","author":"goyal","year":"2017","journal-title":"arXiv preprint arXiv 1706 02677"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462116"},{"key":"ref27","first-page":"334","article-title":"Wave-U-Net: A multi-scale neural network for end-to-end audio source separation","author":"stoller","year":"2018","journal-title":"Proc ISMIR"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746312"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i10.21315"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10766"},{"key":"ref9","article-title":"Masked autoencoders that listen","author":"huang","year":"2022","journal-title":"Proc NeurIPS"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2021.3134634"},{"article-title":"The MUSDB18 corpus for music separation","year":"2017","author":"rafii","key":"ref3"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-2013"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"ref40","first-page":"583","article-title":"Music\/voice separation using the similarity matrix","author":"rafii","year":"2012","journal-title":"Proc ISMIR"}],"event":{"name":"2023 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","start":{"date-parts":[[2023,6,4]]},"location":"Rhodes Island, Greece","end":{"date-parts":[[2023,6,10]]}},"container-title":["2023 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10192576\/10192577\/10193575.pdf?arnumber=10193575","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,21]],"date-time":"2023-08-21T17:42:40Z","timestamp":1692639760000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10193575\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,4]]},"references-count":46,"URL":"https:\/\/doi.org\/10.1109\/icasspw59220.2023.10193575","relation":{},"subject":[],"published":{"date-parts":[[2023,6,4]]}}}