{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T02:40:06Z","timestamp":1723084806686},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2020,6,20]],"date-time":"2020-06-20T00:00:00Z","timestamp":1592611200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,6,20]],"date-time":"2020-06-20T00:00:00Z","timestamp":1592611200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Qual User Exp"],"published-print":{"date-parts":[[2020,12]]},"DOI":"10.1007\/s41233-020-00036-z","type":"journal-article","created":{"date-parts":[[2020,6,20]],"date-time":"2020-06-20T04:02:42Z","timestamp":1592625762000},"update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["On the use of the i-vector speech representation for instrumental quality measurement"],"prefix":"10.1007","volume":"5","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-3088-5116","authenticated-orcid":false,"given":"Anderson R.","family":"Avila","sequence":"first","affiliation":[]},{"given":"Jahangir","family":"Alam","sequence":"additional","affiliation":[]},{"given":"Douglas","family":"O\u2019Shaughnessy","sequence":"additional","affiliation":[]},{"given":"Tiago H.","family":"Falk","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,6,20]]},"reference":[{"issue":"1","key":"36_CR1","doi-asserted-by":"publisher","first-page":"232","DOI":"10.1109\/MCOM.2015.7010539","volume":"53","author":"D Wu","year":"2015","unstructured":"Wu D et al (2015) Millimeter-wave multimedia communications: challenges, methodology, and applications. IEEE Commun Mag 53(1):232\u2013238","journal-title":"IEEE Commun Mag"},{"key":"36_CR2","unstructured":"ITU-T. Recommendation P.800 (1998) Methods for subjectiuve determination of transmission quality"},{"issue":"6","key":"36_CR3","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1109\/MSP.2011.942469","volume":"28","author":"S Moller","year":"2006","unstructured":"Moller S et al (2006) Speech quality estimation: models and trends. IEEE Signal Process Mag 28(6):18\u201328","journal-title":"IEEE Signal Process Mag"},{"key":"36_CR4","doi-asserted-by":"crossref","unstructured":"Avila A R et al (2016) Performance comparison of intrusive and non-intrusive instrumental quality measures for enhanced speech. IWAENC","DOI":"10.1109\/IWAENC.2016.7602907"},{"key":"36_CR5","unstructured":"ITU-T. Recommendation P.862 (2001) Perceptual evaluation of speech quality (PESQ), an objective method for end-to-end speech quality assessment of narrowband telephone networks and speech codecs"},{"key":"36_CR6","unstructured":"ITU-T (2007) Recommendation P.862.2: Wideband extension to recommendation p. 862 for the assessment of wideband telephone networks and speech codecs"},{"key":"36_CR7","unstructured":"Recommendation P.863"},{"key":"36_CR8","unstructured":"ITU-T. Recommendation P.863 (2018) Perceptual objective listening quality prediction: telephone transmission quality, telephone installation, local line networks\u2013methods for objective and subjective assessment of speech quality"},{"issue":"2","key":"36_CR9","doi-asserted-by":"publisher","first-page":"114","DOI":"10.1109\/MSP.2014.2358871","volume":"32","author":"TH Falk","year":"2015","unstructured":"Falk TH et al (2015) Objective quality and intelligibility prediction for users of assistive listening devices: advantages and limitations of existing tools. IEEE Signal Process Mag 32(2):114\u2013124","journal-title":"IEEE Signal Process Mag"},{"issue":"6","key":"36_CR10","doi-asserted-by":"publisher","first-page":"1924","DOI":"10.1109\/TASL.2006.883177","volume":"14","author":"L Malfait","year":"2006","unstructured":"Malfait L, Berger J, Kastner M (2006) P. 563 the ITU-T standard for single-ended speech quality assessment. IEEE Trans Audio Speech Lang Process 14(6):1924\u20131934","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"36_CR11","doi-asserted-by":"crossref","unstructured":"Avila A R et al (2016) Performance comparison of intrusive and non-intrusive instrumental quality measures for enhanced speech. In: 2016 IEEE international workshop on acoustic signal enhancement (IWAENC), pp 1\u20135. IEEE","DOI":"10.1109\/IWAENC.2016.7602907"},{"key":"36_CR12","doi-asserted-by":"crossref","unstructured":"Avila A R et al (2019) Non-intrusive speech quality assessment using neural networks. In: International conference on acoustics, speech and signal processing (ICASSP), pp. 631\u2013635. IEEE,","DOI":"10.1109\/ICASSP.2019.8683175"},{"key":"36_CR13","doi-asserted-by":"crossref","unstructured":"Avila AR et al (2019) Intrusive quality measurement of noisy and enhanced speech based on i-vector similarity. In: 2019 Eleventh international conference on quality of multimedia experience (QoMEX), pp 1\u20135. IEEE","DOI":"10.1109\/QoMEX.2019.8743285"},{"issue":"1","key":"36_CR14","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1109\/TASL.2009.2023679","volume":"18","author":"T Falk","year":"2009","unstructured":"Falk T, Chan WY (2009) Modulation spectral features for robust far-field speaker identification. IEEE Trans Audio Speech Lang Process 18(1):90\u2013100","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"4","key":"36_CR15","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1109\/TASL.2010.2064307","volume":"19","author":"N Dehak","year":"2011","unstructured":"Dehak N et al (2011) Front-end factor analysis for speaker verification. IEEE Trans Audio Speech Lang Process 19(4):788\u2013798","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"36_CR16","doi-asserted-by":"crossref","unstructured":"Garcia-Romero D, Zhou X, Espy-Wilson CY (2012) Multicondition training of gaussian plda models in i-vector space for noise and reverberation robust speaker recognition. In: IEEE international conference on acoustics, speech and signal processing (ICASSP), pp. 4257\u20134260. IEEE","DOI":"10.1109\/ICASSP.2012.6288859"},{"key":"36_CR17","unstructured":"Dehak N et al (2010) Cosine similarity scoring without score normalization techniques. In: Odyssey, pp.\u00a015"},{"issue":"4","key":"36_CR18","doi-asserted-by":"publisher","first-page":"1435","DOI":"10.1109\/TASL.2006.881693","volume":"15","author":"P Kenny","year":"2007","unstructured":"Kenny P et al (2007) Joint factor analysis versus eigenchannels in speaker recognition. IEEE Trans Audio Speech Lang Process 15(4):1435\u20131447","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"36_CR19","unstructured":"Kenny P (2005) Joint factor analysis of speaker and session variability: theory and algorithms. In: CRIM, Montreal,(Report) CRIM-06\/08-13, vol 14, pp 28\u201329"},{"issue":"6","key":"36_CR20","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1109\/MSP.2015.2462851","volume":"32","author":"JHL Hansen","year":"2015","unstructured":"Hansen JHL, Hasan T (2015) Speaker recognition by machines and humans: a tutorial review. IEEE Signal Process Mag 32(6):74\u201399","journal-title":"IEEE Signal Process Mag"},{"issue":"3","key":"36_CR21","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1109\/TSA.2004.840940","volume":"13","author":"P Kenny","year":"2005","unstructured":"Kenny P, Boulianne G, Dumouchel P (2005) Eigenvoice modeling with sparse training data. IEEE Trans Speech Audio Process 13(3):345\u2013354","journal-title":"IEEE Trans Speech Audio Process"},{"key":"36_CR22","doi-asserted-by":"crossref","unstructured":"Garcia-Romero D, Espy-Wilson CY (2011) Analysis of i-vector length normalization in speaker recognition systems. In: Twelfth annual conference of the international speech communication association","DOI":"10.21437\/Interspeech.2011-53"},{"issue":"4","key":"36_CR23","first-page":"1","volume":"1","author":"SO Sadjadi","year":"2013","unstructured":"Sadjadi SO, Slaney M, Heck L (2013) Msr identity toolbox v1. 0: a matlab toolbox for speaker-recognition research. Speech Lang Process Tech Comm Newslett 1(4):1\u201332","journal-title":"Speech Lang Process Tech Comm Newslett"},{"key":"36_CR24","first-page":"1","volume":"270","author":"B Logan","year":"2000","unstructured":"Logan B et al (2000) Mel frequency cepstral coefficients for music modeling. Ismir 270:1\u201311","journal-title":"Ismir"},{"issue":"4","key":"36_CR25","doi-asserted-by":"publisher","first-page":"978","DOI":"10.1109\/TIM.2009.2024697","volume":"59","author":"TH Falk","year":"2010","unstructured":"Falk TH, Chan WY (2010) Temporal dynamics for blind measurement of room acoustical parameters. IEEE Trans Instrum Meas 59(4):978\u2013989","journal-title":"IEEE Trans Instrum Meas"},{"issue":"1","key":"36_CR26","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1109\/TASL.2009.2023679","volume":"18","author":"TH Falk","year":"2010","unstructured":"Falk TH, Chan WY (2010) Modulation spectral features for robust far-field speaker identification. IEEE Trans Audio Speech Lang Process 18(1):90\u2013100","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"36_CR27","first-page":"8","volume":"35","author":"M Slaney","year":"1993","unstructured":"Slaney M et al (1993) An efficient implementation of the patterson-holdsworth auditory filter bank. Apple Computer, Perception Group. Tech Rep 35:8","journal-title":"Tech Rep"},{"issue":"3","key":"36_CR28","doi-asserted-by":"publisher","first-page":"1181","DOI":"10.1121\/1.1288665","volume":"108","author":"SD Ewert","year":"2000","unstructured":"Ewert SD, Dau T (2000) Characterizing frequency selectivity for envelope fluctuations. J Acoust Soc Am 108(3):1181\u20131196","journal-title":"J Acoust Soc Am"},{"key":"36_CR29","unstructured":"Shum S et al (2010) Unsupervised speaker adaptation based on the cosine similarity for text-independent speaker verification. In: Odyssey, pp 16"},{"key":"36_CR30","first-page":"569","volume":"9","author":"LVM Laurens","year":"2008","unstructured":"Laurens LVM, Hinton G (2008) Visualizing data using t-sne. J Mach Learn Res 9:569","journal-title":"J Mach Learn Res"},{"issue":"7","key":"36_CR31","doi-asserted-by":"publisher","first-page":"1766","DOI":"10.1109\/TASL.2010.2052247","volume":"18","author":"TH Falk","year":"2010","unstructured":"Falk TH, Zheng C, Chan WY (2010) A non-intrusive quality and intelligibility measure of reverberant and dereverberated speech. IEEE Trans Audio Speech Lang Process 18(7):1766\u20131774","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"5","key":"36_CR32","doi-asserted-by":"publisher","first-page":"2303","DOI":"10.1121\/1.4744083","volume":"109","author":"T Halmrast","year":"2001","unstructured":"Halmrast T (2001) Sound coloration from (very) early reflections. J Acoust Soc Am 109(5):2303","journal-title":"J Acoust Soc Am"},{"issue":"3","key":"36_CR33","doi-asserted-by":"publisher","first-page":"643","DOI":"10.1121\/1.380711","volume":"58","author":"WB Joyce","year":"1975","unstructured":"Joyce WB (1975) Sabine\u2019s reverberation time and ergodic auditoriums. J Acoust Soc Am 58(3):643\u2013655","journal-title":"J Acoust Soc Am"},{"key":"36_CR34","unstructured":"ITU-R Rec. Itu-r bs. 1534-1 (2003) Method for the subjective assessment of intermediate quality level of coding systems"},{"key":"36_CR35","doi-asserted-by":"crossref","unstructured":"Jin C, Kubichek R (1996) Vector quantization techniques for output-based objective speech quality. In: 1996 IEEE international conference on acoustics, speech, and signal processing conference proceedings, vol\u00a01, pp 491\u2013494. IEEE","DOI":"10.1109\/ICASSP.1996.541140"},{"issue":"7","key":"36_CR36","doi-asserted-by":"publisher","first-page":"1151","DOI":"10.1109\/TASLP.2019.2912123","volume":"27","author":"B Cauchi","year":"2019","unstructured":"Cauchi B et al (2019) Non-intrusive speech quality prediction using modulation energies and lstm network. IEEE\/ACM Trans Audio Speech Lang Process 27(7):1151\u20131163","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"36_CR37","unstructured":"Ruder S (2016) An overview of gradient descent optimization algorithms. arXiv preprint arXiv:1609.04747"},{"key":"36_CR38","volume-title":"Noisy Speech Database for Training Speech Enhancement Algorithms and tts Models","author":"C Valentini-Botinhao","year":"2017","unstructured":"Valentini-Botinhao C et al (2017) Noisy Speech Database for Training Speech Enhancement Algorithms and tts Models. University of Edinburgh. School of Informatics, Centre for Speech Technology Research (CSTR), Edinburgh"},{"key":"36_CR39","doi-asserted-by":"crossref","unstructured":"Santos J, Falk TH (2019) Towards the development of a non-intrusive objective quality measure for dnn-enhanced speech. In: 2019 eleventh international conference on quality of multimedia experience (QoMEX), pp. 1\u20136. IEEE","DOI":"10.1109\/QoMEX.2019.8743156"},{"issue":"7\u20138","key":"36_CR40","doi-asserted-by":"publisher","first-page":"588","DOI":"10.1016\/j.specom.2006.12.006","volume":"49","author":"Y Hu","year":"2007","unstructured":"Hu Y, Loizou PC (2007) Subjective comparison and evaluation of speech enhancement algorithms. Speech Commun 49(7\u20138):588\u2013601","journal-title":"Speech Commun"},{"key":"36_CR41","doi-asserted-by":"crossref","unstructured":"Pascual S, Bonafonte A, Serr\u00e0 J (2017) Segan: Speech enhancement generative adversarial network. arXiv preprint arXiv:1703.09452","DOI":"10.21437\/Interspeech.2017-1428"},{"key":"36_CR42","doi-asserted-by":"crossref","unstructured":"Veaux C, Yamagishi J, King S (2013) The voice bank corpus: Design, collection and data analysis of a large regional accent speech database. In: 2013 international conference oriental COCOSDA held jointly with 2013 conference on Asian spoken language research and evaluation (O-COCOSDA\/CASLRE), pp. 1\u20134. IEEE","DOI":"10.1109\/ICSDA.2013.6709856"},{"issue":"3","key":"36_CR43","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1016\/0167-6393(93)90095-3","volume":"12","author":"A Varga","year":"1993","unstructured":"Varga A, Steeneken HJM (1993) Asessment for automatic speech recognition: Ii. NOISEX-92: a database and an experiment to study the effect of additive noise on speech recognition systems. Speech Commun 12(3):247\u2013251","journal-title":"Speech Commun"},{"issue":"6","key":"36_CR44","doi-asserted-by":"publisher","first-page":"1429","DOI":"10.1109\/TASL.2009.2035038","volume":"18","author":"EA Lehmann","year":"2009","unstructured":"Lehmann EA, Johansson AM (2009) Diffuse reverberation model for efficient image-source simulation of room impulse responses. IEEE Trans Audio Speech Lang Process 18(6):1429\u20131439","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"36_CR45","doi-asserted-by":"crossref","unstructured":"Hirsch H, Pearce D (2000) The aurora experimental framework for the performance evaluation of speech recognition systems under noisy conditions. In: ASR2000\u2013automatic speech recognition: challenges for the new Millennium ISCA tutorial and research workshop (ITRW)","DOI":"10.21437\/ICSLP.2000-743"},{"key":"36_CR46","unstructured":"Rix AW (2003) Comparison between subjective listening quality and p. 862 pesq score. In: Proceedings measurement of speech and audio quality in networks (MESAQIN03), Prague, Czech Republic"},{"issue":"24","key":"36_CR47","first-page":"171","volume":"24","author":"MV Shcherbakov","year":"2013","unstructured":"Shcherbakov MV et al (2013) A survey of forecast error measures. World Appl Sci J 24(24):171\u2013176","journal-title":"World Appl Sci J"},{"issue":"7","key":"36_CR48","doi-asserted-by":"publisher","first-page":"1236","DOI":"10.1109\/TASLP.2018.2821899","volume":"26","author":"JF Santos","year":"2018","unstructured":"Santos JF, Falk TH (2018) Speech dereverberation with context-aware recurrent neural networks. IEEE\/ACM Trans Audio Speech Lang Process 26(7):1236\u20131246","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"issue":"7","key":"36_CR49","doi-asserted-by":"publisher","first-page":"1492","DOI":"10.1109\/TASLP.2017.2696307","volume":"25","author":"DS Williamson","year":"2017","unstructured":"Williamson DS, Wang D (2017) Time-frequency masking in the complex domain for speech dereverberation and denoising. IEEE\/ACM Trans Audio Speech Lang Process 25(7):1492\u20131501","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"issue":"1","key":"36_CR50","doi-asserted-by":"publisher","first-page":"102","DOI":"10.1109\/TASLP.2016.2623559","volume":"25","author":"B Wu","year":"2016","unstructured":"Wu B et al (2016) A reverberation-time-aware approach to speech dereverberation based on deep neural networks. IEEE\/ACM Trans Audio Speech Lang Process 25(1):102\u2013111","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"issue":"1","key":"36_CR51","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1109\/TSA.2003.819949","volume":"12","author":"Y Hu","year":"2004","unstructured":"Hu Y, Loizou PC (2004) Speech enhancement based on wavelet thresholding the multitaper spectrum. IEEE Trans Speech Audio Process 12(1):59\u201367","journal-title":"IEEE Trans Speech Audio Process"},{"issue":"6","key":"36_CR52","doi-asserted-by":"publisher","first-page":"497","DOI":"10.1109\/89.641296","volume":"5","author":"DE Tsoukalas","year":"1997","unstructured":"Tsoukalas DE, Mourjopoulos JN, Kokkinakis G (1997) Speech enhancement based on audible noise suppression. IEEE Trans Speech Audio Process 5(6):497\u2013514","journal-title":"IEEE Trans Speech Audio Process"}],"container-title":["Quality and User Experience"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41233-020-00036-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s41233-020-00036-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41233-020-00036-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T01:34:53Z","timestamp":1723080893000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s41233-020-00036-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,6,20]]},"references-count":52,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2020,12]]}},"alternative-id":["36"],"URL":"https:\/\/doi.org\/10.1007\/s41233-020-00036-z","relation":{},"ISSN":["2366-0139","2366-0147"],"issn-type":[{"type":"print","value":"2366-0139"},{"type":"electronic","value":"2366-0147"}],"subject":[],"published":{"date-parts":[[2020,6,20]]},"assertion":[{"value":"7 November 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 June 2020","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"6"}}