{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T14:05:42Z","timestamp":1730297142561,"version":"3.28.0"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T00:00:00Z","timestamp":1673222400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T00:00:00Z","timestamp":1673222400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,1,9]]},"DOI":"10.1109\/slt54892.2023.10022470","type":"proceedings-article","created":{"date-parts":[[2023,1,27]],"date-time":"2023-01-27T13:54:03Z","timestamp":1674827643000},"page":"599-604","source":"Crossref","is-referenced-by-count":6,"title":["A Comprehensive Study on Self-Supervised Distillation for Speaker Representation Learning"],"prefix":"10.1109","author":[{"given":"Zhengyang","family":"Chen","sequence":"first","affiliation":[{"name":"AI Institute, Shanghai Jiao Tong University,MoE Key Lab of Artificial Intelligence, X-LANCE Lab,Department of Computer Science and Engineering"}]},{"given":"Yao","family":"Qian","sequence":"additional","affiliation":[{"name":"Microsoft Cognitive Services Research,USA"}]},{"given":"Bing","family":"Han","sequence":"additional","affiliation":[{"name":"AI Institute, Shanghai Jiao Tong University,MoE Key Lab of Artificial Intelligence, X-LANCE Lab,Department of Computer Science and Engineering"}]},{"given":"Yanmin","family":"Qian","sequence":"additional","affiliation":[{"name":"AI Institute, Shanghai Jiao Tong University,MoE Key Lab of Artificial Intelligence, X-LANCE Lab,Department of Computer Science and Engineering"}]},{"given":"Michael","family":"Zeng","sequence":"additional","affiliation":[{"name":"Microsoft Cognitive Services Research,USA"}]}],"member":"263","reference":[{"key":"ref13","first-page":"21271","article-title":"Bootstrap your own latent-a new approach to self-supervised learning","volume":"33","author":"grill","year":"2020","journal-title":"NeurIPS"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747526"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-126"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.76.036102"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413351"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414973"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"999","DOI":"10.21437\/Interspeech.2017-620","article-title":"Deep neural network embeddings for text-independent speaker verification","author":"snyder","year":"2017","journal-title":"InterSpeech"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2015.07.003"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2842"},{"key":"ref16","article-title":"Self-supervised curriculum learning for speaker verification","author":"heo","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1508"},{"key":"ref18","first-page":"9729","article-title":"Momentum contrast for unsupervised visual rep-resentation learning","author":"he","year":"2020","journal-title":"Proc IEEE CVPR 2022"},{"key":"ref24","article-title":"MU-SAN: A Music, Speech, and Noise Corpus","author":"snyder","year":"2015","journal-title":"ArXiv"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1929"},{"key":"ref26","article-title":"Weight normalization: A simple reparameterization to accelerate training of deep neural networks","volume":"29","author":"salimans","year":"2016","journal-title":"NIPS"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"2616","DOI":"10.21437\/Interspeech.2017-950","article-title":"Voxceleb: A large-scale speaker identification dataset","author":"nagrani","year":"2017","journal-title":"InterSpeech"},{"key":"ref20","article-title":"The dku-dukeece systems for voxceleb speaker recognition chal-lenge 2020","author":"wang","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref22","first-page":"arxiv","article-title":"The sjtu x-lance lab system for cnsrc 2022","author":"chen","year":"2022","journal-title":"ArXiv e-prints"},{"key":"ref21","article-title":"The speakin system for voxceleb speaker recognition challange 2021","author":"zhao","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747162"},{"key":"ref27","article-title":"Unsupervised representation learning for speaker recognition via contrastive equilibrium learning","author":"mun","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref29","first-page":"6147","article-title":"Large-scale self-supervised speech representation learning for auto-matic speaker verification","author":"chen","year":"2022","journal-title":"Proc IEEE ICASSP 2022"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPAASC47483.2019.9023039"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00482"},{"key":"ref9","article-title":"Augmentation adversarial training for unsu-pervised speaker recognition","author":"huh","year":"2020","journal-title":"Workshop on Self-Supervised Learning for Speech and Audio Processing NeurIPS"},{"key":"ref4","article-title":"But system description to vox-celeb speaker recognition challenge 2019","author":"zeinali","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref3","first-page":"5329","article-title":"X-vectors: Robust dnn em-beddings for speaker recognition","author":"snyder","year":"2018","journal-title":"Proc IEEE ICASSP 2018"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462665"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2650"}],"event":{"name":"2022 IEEE Spoken Language Technology Workshop (SLT)","start":{"date-parts":[[2023,1,9]]},"location":"Doha, Qatar","end":{"date-parts":[[2023,1,12]]}},"container-title":["2022 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10022052\/10022330\/10022470.pdf?arnumber=10022470","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,20]],"date-time":"2023-02-20T17:08:41Z","timestamp":1676912921000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10022470\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1,9]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/slt54892.2023.10022470","relation":{},"subject":[],"published":{"date-parts":[[2023,1,9]]}}}