{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,7,2]],"date-time":"2024-07-02T17:39:08Z","timestamp":1719941948873},"reference-count":54,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Computer Speech & Language"],"published-print":{"date-parts":[[2020,5]]},"DOI":"10.1016\/j.csl.2019.101033","type":"journal-article","created":{"date-parts":[[2019,11,13]],"date-time":"2019-11-13T17:01:43Z","timestamp":1573664503000},"page":"101033","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":14,"special_numbering":"C","title":["NEC-TT System for Mixed-Bandwidth and Multi-Domain Speaker Recognition"],"prefix":"10.1016","volume":"61","author":[{"given":"Kong Aik","family":"Lee","sequence":"first","affiliation":[]},{"given":"Hitoshi","family":"Yamamoto","sequence":"additional","affiliation":[]},{"given":"Koji","family":"Okabe","sequence":"additional","affiliation":[]},{"given":"Qiongqiong","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Ling","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Takafumi","family":"Koshinaka","sequence":"additional","affiliation":[]},{"given":"Jiacen","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Koichi","family":"Shinoda","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.csl.2019.101033_bib0001","series-title":"Odyssey: The Speaker and Language Recognition Workshop","first-page":"176","article-title":"Speaker verification in mismatched conditions with frustratingly easy domain adaptation","author":"Alam","year":"2018"},{"issue":"2","key":"10.1016\/j.csl.2019.101033_bib0002","doi-asserted-by":"crossref","first-page":"356","DOI":"10.1109\/TASL.2011.2125954","article-title":"Speaker diarization: A review of recent research","volume":"20","author":"Anguera","year":"2012","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"10.1016\/j.csl.2019.101033_bib0003","series-title":"Advances in Neural Information Processing Systems","first-page":"932","article-title":"A neural probabilistic language model","author":"Bengio","year":"2000"},{"key":"10.1016\/j.csl.2019.101033_bib0004","series-title":"Proc. Interspeech","first-page":"1517","article-title":"Deep speaker embeddings for short-duration speaker verification","author":"Bhattacharya","year":"2017"},{"key":"10.1016\/j.csl.2019.101033_bib0005","series-title":"Individual Differences in Speech Production and Perception","first-page":"255","article-title":"Forensic speaker recognition: mirages and reality","author":"Bonastre","year":"2015"},{"key":"10.1016\/j.csl.2019.101033_bib0006","series-title":"Odyssey: The Speaker and Language Recognition Workshop","first-page":"260","article-title":"Unsupervised domain adaptation for i-vector speaker recognition","author":"Br\u00fcmmer","year":"2014"},{"key":"10.1016\/j.csl.2019.101033_bib0007","article-title":"Attention-based models for text-dependent speaker verification","author":"Chowdhury","year":"2017","journal-title":"arXiv preprint arXiv:1710.10470"},{"key":"10.1016\/j.csl.2019.101033_bib0008","series-title":"Proc. Interspeech","first-page":"1086","article-title":"VoxCeleb2: Deep speaker recognition","author":"Chung","year":"2018"},{"issue":"9","key":"10.1016\/j.csl.2019.101033_bib0009","doi-asserted-by":"crossref","first-page":"1469","DOI":"10.1109\/TASLP.2015.2438544","article-title":"Data augmentation for deep neural network acoustic modeling","volume":"23","author":"Cui","year":"2015","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"10.1016\/j.csl.2019.101033_bib0010","series-title":"Proceedings of NIST 2011 workshop","article-title":"Promoting robustness for speaker modeling in the community: the PRISM evaluation set","author":"Ferrer","year":"2011"},{"key":"10.1016\/j.csl.2019.101033_bib0011","series-title":"Proc. IEEE ICASSP","first-page":"4047","article-title":"Supervised domain adaptation for i-vector based speaker recognition","author":"Garcia-Romero","year":"2014"},{"issue":"6","key":"10.1016\/j.csl.2019.101033_bib0012","doi-asserted-by":"crossref","first-page":"74","DOI":"10.1109\/MSP.2015.2462851","article-title":"Speaker recognition by machines and humans: a tutorial review","volume":"32","author":"Hansen","year":"2015","journal-title":"IEEE Signal Processing Magazine"},{"issue":"6","key":"10.1016\/j.csl.2019.101033_bib0013","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","article-title":"Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups","volume":"29","author":"Hinton","year":"2012","journal-title":"IEEE Signal Processing Magazine"},{"key":"10.1016\/j.csl.2019.101033_bib0014","series-title":"Proc. of the 9th European Conference on Computer Vision - Volume Part IV","first-page":"531","article-title":"Probabilistic linear discriminant analysis","author":"Ioffe","year":"2006"},{"key":"10.1016\/j.csl.2019.101033_bib0015","series-title":"Proc. Interspeech","first-page":"2621","article-title":"Call my net corpus: A multilingual corpus for evaluation of speaker recognition technology","author":"Jones","year":"2017"},{"key":"10.1016\/j.csl.2019.101033_bib0016","series-title":"Odyssey: The Speaker and Language Recognition Workshop","article-title":"Bayesian speaker verification with heavy-tailed priors","author":"Kenny","year":"2010"},{"issue":"1","key":"10.1016\/j.csl.2019.101033_bib0017","doi-asserted-by":"crossref","first-page":"12","DOI":"10.1016\/j.specom.2009.08.009","article-title":"An overview of text-independent speaker recognition: from features to supervectors","volume":"52","author":"Kinnunen","year":"2010","journal-title":"Speech Communication"},{"issue":"1","key":"10.1016\/j.csl.2019.101033_bib0018","doi-asserted-by":"crossref","first-page":"7","DOI":"10.1186\/s13634-016-0306-6","article-title":"A summary of the REVERB challenge: state-of-the-art and remaining challenges in reverberant speech processing research","volume":"2016","author":"Kinoshita","year":"2016","journal-title":"EURASIP Journal on Advances in Signal Processing"},{"key":"10.1016\/j.csl.2019.101033_bib0019","series-title":"Proc. IEEE ICASSP","first-page":"5220","article-title":"A study on data augmentation of reverberant speech for robust speech recognition","author":"Ko","year":"2017"},{"key":"10.1016\/j.csl.2019.101033_bib0020","series-title":"Proc. Interspeech","first-page":"1497","article-title":"I4U submission to NIST SRE 2018: Leveraging from a decade of shared experiences","author":"Lee","year":"2019"},{"key":"10.1016\/j.csl.2019.101033_bib0021","article-title":"Speaker verification makes its debut in smartphone","author":"Lee","year":"2013","journal-title":"IEEE Signal Processing Society Speech and language Technical Committee Newsletter"},{"key":"10.1016\/j.csl.2019.101033_bib0022","series-title":"Proc. IEEE ICASSP","first-page":"5821","article-title":"The CORAL+ algorithm for unsupervised domain adaptation of PLDA","author":"Lee","year":"2019"},{"key":"10.1016\/j.csl.2019.101033_bib0023","article-title":"The NEC-TT speaker verification system for SRE18","author":"Lee","year":"2018","journal-title":"NIST SRE 2018 Workshop"},{"key":"10.1016\/j.csl.2019.101033_bib0024","series-title":"Proc. Interspeech","first-page":"4355","article-title":"The NEC-TT 2018 speaker verification system","author":"Lee","year":"2019"},{"key":"10.1016\/j.csl.2019.101033_bib0025","series-title":"Proc. IEEE SLT Workshop","first-page":"131","article-title":"Improving wideband speech recognition using mixed-bandwidth training data in CDDNN-HMM","author":"Li","year":"2012"},{"key":"10.1016\/j.csl.2019.101033_bib0026","series-title":"Proc. Interspeech","first-page":"2575","article-title":"DNN-based speech bandwidth expansion and its application to adding high-frequency missing features for automatic speech recognition of narrowband speech","author":"Li","year":"2015"},{"key":"10.1016\/j.csl.2019.101033_bib0027","series-title":"Proc. Interspeech","doi-asserted-by":"crossref","first-page":"818","DOI":"10.21437\/Interspeech.2016-1129","article-title":"The speakers in the wild (sitw) speaker recognition database","author":"McLaren","year":"2016"},{"key":"10.1016\/j.csl.2019.101033_bib0028","series-title":"Advances in Neural Information Processing Systems","first-page":"3111","article-title":"Distributed representations of words and phrases and their compositionality","author":"Mikolov","year":"2013"},{"key":"10.1016\/j.csl.2019.101033_bib0029","series-title":"Proc. Interspeech","first-page":"2616","article-title":"Voxceleb: A large-scale speaker identification dataset","author":"Nagrani","year":"2017"},{"key":"10.1016\/j.csl.2019.101033_bib0030","series-title":"Proc. Interspeech","first-page":"1111","article-title":"Investigation on bandwidth extension for speaker recognition","author":"Nidadavolu","year":"2018"},{"key":"10.1016\/j.csl.2019.101033_bib0031","article-title":"NIST 2018 Speaker Recognition Evaluation Plan","author":"National Institute of Standards","year":"2018","journal-title":"NIST SRE"},{"key":"10.1016\/j.csl.2019.101033_bib0032","series-title":"Proc. Interspeech","first-page":"2252","article-title":"Attentive statistics pooling for deep speaker embedding","author":"Okabe","year":"2018"},{"key":"10.1016\/j.csl.2019.101033_bib0033","series-title":"Proc. Interspeech","first-page":"3214","article-title":"A time delay neural network architecture for efficient modeling of long temporal contexts","author":"Peddinti","year":"2015"},{"key":"10.1016\/j.csl.2019.101033_bib0034","series-title":"Proc. ICCV","first-page":"1","article-title":"Probabilistic linear discriminant analysis for inferences about identity","author":"Prince","year":"2007"},{"key":"10.1016\/j.csl.2019.101033_bib0035","series-title":"CVPR","first-page":"815","article-title":"FaceNet: A unified embedding for face recognition and clustering","author":"Schroff","year":"2015"},{"key":"10.1016\/j.csl.2019.101033_bib0036","series-title":"Proc. IEEE SLT Workshop","first-page":"413","article-title":"Speaker diarization with PLDA i-vector scoring and unsupervised calibration","author":"Sell","year":"2014"},{"key":"10.1016\/j.csl.2019.101033_bib0037","series-title":"Proc. Interspeech","first-page":"72","article-title":"Fast variational bayes for heavy-tailed plda applied to i-vectors and x-vectors","author":"Silnova","year":"2018"},{"key":"10.1016\/j.csl.2019.101033_bib0038","series-title":"arXiv:1510.08484","article-title":"MUSAN: a music, speech, and noise corpus","author":"Snyder","year":"2015"},{"key":"10.1016\/j.csl.2019.101033_bib0039","series-title":"Proc. Interspeech","first-page":"999","article-title":"Deep neural network embeddings for text-independent speaker verification","author":"Snyder","year":"2017"},{"key":"10.1016\/j.csl.2019.101033_bib0040","series-title":"Proc. IEEE ICASSP","first-page":"5329","article-title":"X-vectors: Robust DNN embeddings for speaker recognition","author":"Snyder","year":"2018"},{"key":"10.1016\/j.csl.2019.101033_bib0041","series-title":"Proc. IEEE SLT Workshop","first-page":"165","article-title":"Deep neural network-based speaker embeddings for end-to-end speaker verification","author":"Snyder","year":"2016"},{"key":"10.1016\/j.csl.2019.101033_bib0042","unstructured":"SoX \u2013 Sound eXchange http:\/\/sox.sourceforge.net\/."},{"key":"10.1016\/j.csl.2019.101033_bib0043","series-title":"Linear Algebra and Learning from Data","author":"Strang","year":"2019"},{"key":"10.1016\/j.csl.2019.101033_bib0044","series-title":"Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence (AAAI-16)","first-page":"2058","article-title":"Return of frustratingly easy domain adaptation","author":"Sun","year":"2016"},{"key":"10.1016\/j.csl.2019.101033_bib0045","series-title":"Proc. Eleventh International Conference on Language Resources and Evaluation (LREC)","first-page":"4318","article-title":"Vast: A corpus of video annotation for speech technologies","author":"Tracey","year":"2018"},{"key":"10.1016\/j.csl.2019.101033_bib0046","series-title":"Proc. IEEE ICASSP","first-page":"4052","article-title":"Deep neural networks for small footprint text-dependent speaker verification","author":"Variani","year":"2014"},{"key":"10.1016\/j.csl.2019.101033_bib0047","series-title":"Advances in Neural Information Processing Systems","first-page":"5998","article-title":"Attention is all you need","author":"Vaswani","year":"2017"},{"key":"10.1016\/j.csl.2019.101033_bib0048","series-title":"Proc. Interspeech","first-page":"1488","article-title":"State-of-the-art speaker recognition for telephone and video speech: the JHU-MIT submission for NIST SRE18","author":"Villalba","year":"2019"},{"key":"10.1016\/j.csl.2019.101033_bib0049","article-title":"The JHU-MIT system description for NIST SRE18","author":"Villalba","year":"2018","journal-title":"NIST SRE 2018 Workshop"},{"key":"10.1016\/j.csl.2019.101033_bib0050","series-title":"Proc. IEEE SLT Workshop","first-page":"1052","article-title":"Attention mechanism in speaker recognition: What does it learn in deep speaker embedding?","author":"Wang","year":"2018"},{"key":"10.1016\/j.csl.2019.101033_bib0051","series-title":"Proc. Interspeech","first-page":"1497","article-title":"What does the speaker embedding encode?","author":"Wang","year":"2017"},{"key":"10.1016\/j.csl.2019.101033_bib0052","series-title":"Proc. Interspeech","first-page":"406","article-title":"Speaker augmentation and bandwidth extension for deep speaker embedding","author":"Yamamoto","year":"2019"},{"key":"10.1016\/j.csl.2019.101033_bib0053","series-title":"Proc. IEEE ICASSP","first-page":"6141","article-title":"How to improve your speaker embeddings extractor in generic toolkits","author":"Zeinali","year":"2019"},{"key":"10.1016\/j.csl.2019.101033_bib0054","series-title":"Proc. Interspeech","first-page":"1487","article-title":"End-to-end text-independent speaker verification with triplet loss on short utterances","author":"Zhang","year":"2017"}],"container-title":["Computer Speech & Language"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0885230819302773?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0885230819302773?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2020,1,10]],"date-time":"2020-01-10T11:12:47Z","timestamp":1578654767000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0885230819302773"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,5]]},"references-count":54,"alternative-id":["S0885230819302773"],"URL":"https:\/\/doi.org\/10.1016\/j.csl.2019.101033","relation":{},"ISSN":["0885-2308"],"issn-type":[{"value":"0885-2308","type":"print"}],"subject":[],"published":{"date-parts":[[2020,5]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"NEC-TT System for Mixed-Bandwidth and Multi-Domain Speaker Recognition","name":"articletitle","label":"Article Title"},{"value":"Computer Speech & Language","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.csl.2019.101033","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2019 Elsevier Ltd. All rights reserved.","name":"copyright","label":"Copyright"}],"article-number":"101033"}}