{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,7,23]],"date-time":"2024-07-23T00:20:45Z","timestamp":1721694045962},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2024,5,24]],"date-time":"2024-05-24T00:00:00Z","timestamp":1716508800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,5,24]],"date-time":"2024-05-24T00:00:00Z","timestamp":1716508800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1007\/s10772-024-10104-w","type":"journal-article","created":{"date-parts":[[2024,5,24]],"date-time":"2024-05-24T05:01:42Z","timestamp":1716526902000},"page":"309-317","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["VAD system under uncontrolled environment: A solution for strengthening the noise robustness using MMSE-SPZC"],"prefix":"10.1007","volume":"27","author":[{"given":"B. G.","family":"Nagaraja","sequence":"first","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-3266-9732","authenticated-orcid":false,"given":"G. Thimmaraja","family":"Yadava","sequence":"additional","affiliation":[]},{"given":"Prashanth","family":"Kabballi","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Patil","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,5,24]]},"reference":[{"issue":"5","key":"10104_CR1","doi-asserted-by":"publisher","first-page":"783","DOI":"10.1080\/02533839.2006.9671177","volume":"29","author":"SS Ahn","year":"2006","unstructured":"Ahn, S. S., & Lee, Y. C. (2006). An improved statistical model-based VAD algorithm with an adaptive threshold. Journal of the Chinese Institute of Engineers, 29(5), 783\u2013789.","journal-title":"Journal of the Chinese Institute of Engineers"},{"key":"10104_CR2","unstructured":"Anees, M., Nagaraja, B. G., & Yadava, T. G. (2023). Speech coding techniques and challenges: A comprehensive literature survey. Multimedia Tools and Applications, 1\u201321."},{"issue":"6","key":"10104_CR3","doi-asserted-by":"publisher","first-page":"1965","DOI":"10.1109\/TSP.2006.874403","volume":"54","author":"JH Chang","year":"2006","unstructured":"Chang, J. H., Kim, N. S., & Mitra, S. K. (2006). Voice activity detection based on multiple statistical models. IEEE Transactions on Signal Processing, 54(6), 1965\u20131976.","journal-title":"IEEE Transactions on Signal Processing"},{"issue":"5","key":"10104_CR4","doi-asserted-by":"publisher","first-page":"EL405","DOI":"10.1121\/1.4800189","volume":"133","author":"F Chen","year":"2013","unstructured":"Chen, F., & Hu, Y. (2013). Modifying the normalized covariance metric measure to account for nonlinear distortions introduced by noise-reduction algorithms. The Journal of the Acoustical Society of America, 133(5), EL405\u2013EL411.","journal-title":"The Journal of the Acoustical Society of America"},{"issue":"9","key":"10104_CR5","doi-asserted-by":"publisher","DOI":"10.1088\/0266-5611\/29\/9\/095017","volume":"29","author":"M Dashti","year":"2013","unstructured":"Dashti, M., Law, K. J., Stuart, A. M., & Voss, J. (2013). MAP estimators and their consistency in Bayesian nonparametric inverse problems. Inverse Problems, 29(9), 095017.","journal-title":"Inverse Problems"},{"key":"10104_CR6","doi-asserted-by":"crossref","unstructured":"Ding, S., Rikhye, R., Liang, Q., He, Y., Wang, Q., Narayanan, A., O\u2019Malley, T., & McGraw, I. (2022). Personal VAD 2.0: Optimizing personal voice activity detection for on-device speech recognition, arXiv preprint arXiv:2204.03793.","DOI":"10.21437\/Interspeech.2022-856"},{"issue":"3","key":"10104_CR7","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TASL.2010.2052803","volume":"19","author":"PK Ghosh","year":"2010","unstructured":"Ghosh, P. K., Tsiartas, A., & Narayanan, S. (2010). Robust voice activity detection using long-term signal variability. IEEE Transactions on Audio, Speech, and Language Processing, 19(3), 600\u2013613.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"10104_CR8","doi-asserted-by":"crossref","unstructured":"Hendriks, R. C., Heusdens, R., & Jensen, J. (2010). MMSE based noise PSD tracking with low complexity. In IEEE international conference on acoustics, speech and signal processing (pp. 4266\u20134269).","DOI":"10.1109\/ICASSP.2010.5495680"},{"issue":"4","key":"10104_CR9","doi-asserted-by":"publisher","first-page":"578","DOI":"10.1109\/89.326616","volume":"2","author":"H Hermansky","year":"1994","unstructured":"Hermansky, H., & Morgan, N. (1994). RASTA processing of speech. IEEE Transactions on Speech and Audio Processing, 2(4), 578\u2013589.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"issue":"1","key":"10104_CR10","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1109\/TASL.2007.911054","volume":"16","author":"Y Hu","year":"2008","unstructured":"Hu, Y., & Loizou, P. (2008). Evaluation of objective quality measures for speech enhancement. IEEE Transactions on Speech and Audio Processing, 16(1), 229\u2013238.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"issue":"1","key":"10104_CR11","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1109\/TASL.2007.911054","volume":"16","author":"Y Hu","year":"2008","unstructured":"Hu, Y., & Loizou, P. C. (2008). Evaluation of objective quality measures for speech enhancement. IEEE Transactions on Audio, Speech, and Language Processing, 16(1), 229\u2013238.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"1\u20132","key":"10104_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1504\/IJSISE.2020.113552","volume":"12","author":"SJ Jainar","year":"2020","unstructured":"Jainar, S. J., Sale, P. L., & Nagaraja, B. G. (2020). VAD, feature extraction and modelling techniques for speaker recognition: A review. International Journal of Signal and Imaging Systems Engineering, 12(1\u20132), 1\u201318.","journal-title":"International Journal of Signal and Imaging Systems Engineering"},{"key":"10104_CR13","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1007\/978-981-16-8129-5_10","volume-title":"International conference on robotics, vision, signal processing and power applications: enhancing research and innovation through the fourth industrial revolution","author":"R Jaiswal","year":"2022","unstructured":"Jaiswal, R. (2022). Performance analysis of voice activity detector in presence of non-stationary noise. In\u00a0International conference on robotics, vision, signal processing and power applications: Enhancing research and innovation through the fourth industrial revolution (pp. 59\u201365). Springer."},{"key":"10104_CR14","doi-asserted-by":"publisher","unstructured":"Kinnunen, T., & Rajan, P. (2013). A practical, self-adaptive voice activity detector for speaker verification with noisy telephone and microphone data. In IEEE international conference on acoustics, speech and signal processing (pp. 7229\u20137233). https:\/\/doi.org\/10.1109\/NCC.2013.6488011","DOI":"10.1109\/NCC.2013.6488011"},{"key":"10104_CR15","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2022.103520","volume":"74","author":"Y Korkmaz","year":"2022","unstructured":"Korkmaz, Y., & Boyac\u0131, A. (2022). milVAD: A bag-level MNIST modelling of voice activity detection using deep multiple instance learning. Biomedical Signal Processing and Control, 74, 103520.","journal-title":"Biomedical Signal Processing and Control"},{"key":"10104_CR16","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2022.104408","volume":"80","author":"Y Korkmaz","year":"2023","unstructured":"Korkmaz, Y., & Boyac\u0131, A. (2023). Hybrid voice activity detection system based on LSTM and auditory speech features. Biomedical Signal Processing and Control, 80, 104408.","journal-title":"Biomedical Signal Processing and Control"},{"issue":"5","key":"10104_CR17","doi-asserted-by":"publisher","first-page":"3387","DOI":"10.1121\/1.3097493","volume":"125","author":"J Ma","year":"2009","unstructured":"Ma, J., Hu, Y., & Loizou, P. (2009). Objective measures for predicting speech intelligibility in noisy conditions based on new band-importance functions. Journal of the Acoustical Society of America, 125(5), 3387\u20133405.","journal-title":"Journal of the Acoustical Society of America"},{"key":"10104_CR18","doi-asserted-by":"publisher","first-page":"753","DOI":"10.1007\/s10772-018-9525-6","volume":"21","author":"H Mukherjee","year":"2018","unstructured":"Mukherjee, H., Obaidullah, S. M., Santosh, K. C., Phadikar, S., & Roy, K. (2018). Line spectral frequency-based features and extreme learning machine for voice activity detection from audio signal. International Journal of Speech Technology, 21, 753\u2013760.","journal-title":"International Journal of Speech Technology"},{"issue":"9","key":"10104_CR19","doi-asserted-by":"publisher","first-page":"14","DOI":"10.5815\/ijigsp.2013.09.03","volume":"5","author":"BG Nagaraja","year":"2013","unstructured":"Nagaraja, B. G., & Jayanna, H. S. (2013). Kannada language parameters for speaker identification with the constraint of limited data. International Journal of Image, Graphics and Signal Processing, 5(9), 14.","journal-title":"International Journal of Image, Graphics and Signal Processing"},{"issue":"2","key":"10104_CR20","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1504\/IJSISE.2016.075000","volume":"9","author":"BG Nagaraja","year":"2016","unstructured":"Nagaraja, B. G., & Jayanna, H. S. (2016). Feature extraction and modelling techniques for multilingual speaker recognition: A review. International Journal of Signal and Imaging Systems Engineering, 9(2), 67\u201378.","journal-title":"International Journal of Signal and Imaging Systems Engineering"},{"key":"10104_CR21","unstructured":"Osawa, K., Swaroop, S., Khan, M. E. E., Jain, A., Eschenhagen, R., Turner, R. E., & Yokota, R. (2019) Practical deep learning with Bayesian principles. Advances in Neural Information Processing Systems, 32."},{"key":"10104_CR22","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2023.105704","volume":"89","author":"Y Pan","year":"2024","unstructured":"Pan, Y., Shang, Y., Wang, W., Shao, Z., Han, Z., Liu, T., Guo, G., & Ding, H. (2024). Multi-feature deep supervised voiceprint adversarial network for depression recognition from speech. Biomedical Signal Processing and Control, 89, 105704.","journal-title":"Biomedical Signal Processing and Control"},{"issue":"3","key":"10104_CR23","doi-asserted-by":"publisher","first-page":"1065","DOI":"10.1214\/aoms\/1177704472","volume":"33","author":"E Parzen","year":"1962","unstructured":"Parzen, E. (1962). On estimation of a probability density function and mode. The Annals of Mathematical Statistics, 33(3), 1065\u20131076.","journal-title":"The Annals of Mathematical Statistics"},{"key":"10104_CR24","doi-asserted-by":"crossref","unstructured":"Rho, D., Park, J., & Ko, J. H. (2022). NAS-VAD: Neural architecture search for voice activity detection. arXiv preprint arXiv:2201.09032.","DOI":"10.21437\/Interspeech.2022-975"},{"key":"10104_CR25","first-page":"749","volume":"2","author":"AW Rix","year":"2001","unstructured":"Rix, A. W., Beerends, J. G., Hollier, M. P., & Hekstra, A. P. (2001). Perceptual evaluation of speech quality (PESQ)-a new method for speech quality assessment of telephone networks and codecs. IEEE International Conference on Acoustics, Speech, and Signal Processing, 2, 749\u2013752.","journal-title":"IEEE International Conference on Acoustics, Speech, and Signal Processing"},{"key":"10104_CR26","doi-asserted-by":"publisher","unstructured":"Shahnawazuddin, S., Thotappa, D., Dey, A., Imani, S., Prasanna, S. R. M., & Sinha, R. (2016). Improvements in IITG Assamese spoken query system: Background noise suppression and alternate acoustic modeling. Journal of Signal Processing Systems. Advanced online publication. https:\/\/doi.org\/10.1007\/s11265-016-1133-6","DOI":"10.1007\/s11265-016-1133-6"},{"key":"10104_CR27","doi-asserted-by":"crossref","unstructured":"Shahnawazuddin, S., Thotappa, D., Sarma, B. D., Deka, A., Prasanna, S. R. M., & Sinha, R. (2013). Assamese spoken query system to access the price of agricultural commodities. National Conference on Communications, 1\u20135.","DOI":"10.1109\/NCC.2013.6488011"},{"key":"10104_CR28","doi-asserted-by":"publisher","first-page":"132","DOI":"10.1016\/j.csl.2017.07.005","volume":"47","author":"A Sholokhov","year":"2018","unstructured":"Sholokhov, A., Sahidullah, M., & Kinnunen, T. (2018). Semi-supervised speech activity detection with an application to automatic speaker verification. Computer Speech & Language, 47, 132\u2013156.","journal-title":"Computer Speech & Language"},{"key":"10104_CR29","doi-asserted-by":"crossref","unstructured":"Tan, X., & Zhang, X. L. (2021). Speech enhancement aided end-to-end multi-task learning for voice activity detection. In IEEE international conference on acoustics, speech and signal processing (pp. 6823\u20136827).","DOI":"10.1109\/ICASSP39728.2021.9414445"},{"key":"10104_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.csl.2019.06.005","volume":"59","author":"ZH Tan","year":"2020","unstructured":"Tan, Z. H., Sarkar, A. K., & Dehak, N. (2020). rVAD: An unsupervised segment-based robust voice activity detection method. Computer Speech & Language, 59, 1\u201321.","journal-title":"Computer Speech & Language"},{"key":"10104_CR31","doi-asserted-by":"crossref","unstructured":"Yadava, T. G., Nagaraja, B. G., & Jayanna, H. S. (2022). Performance evaluation of spectral subtraction with VAD and time-frequency filtering for speech enhancement. In Emerging research in computing, information, communication and applications (pp. 407\u2013414).","DOI":"10.1007\/978-981-19-5482-5_35"},{"issue":"3","key":"10104_CR32","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1007\/s10772-018-9506-9","volume":"22","author":"TG Yadava","year":"2018","unstructured":"Yadava, T. G., & Jayanna, H. S. (2018). Speech enhancement by combining spectral subtraction and minimum mean square error-spectrum power estimator based on zero crossing. International Journal of Speech Technology, 22(3), 639\u2013648.","journal-title":"International Journal of Speech Technology"},{"key":"10104_CR33","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1007\/s10772-020-09671-5","volume":"23","author":"TG Yadava","year":"2020","unstructured":"Yadava, T. G., & Jayanna, H. S. (2020). Enhancements in automatic Kannada speech recognition system by background noise elimination and alternate acoustic modelling. International Journal of Speech Technology, 23, 149\u2013167.","journal-title":"International Journal of Speech Technology"},{"key":"10104_CR34","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1007\/s10772-020-09786-9","volume":"24","author":"TG Yadava","year":"2021","unstructured":"Yadava, T. G., Nagaraja, B. G., & Jayanna, H. S. (2021). Speech enhancement and encoding by combining SS-VAD and LPC. International Journal of Speech Technology, 24, 165\u2013172.","journal-title":"International Journal of Speech Technology"},{"key":"10104_CR35","doi-asserted-by":"crossref","unstructured":"Zhang, X. L., & Xu, M. (2022). AUC optimization for deep learning-based voice activity detection. EURASIP Journal on Audio, Speech, and Music Processing, 1\u201312.","DOI":"10.1186\/s13636-022-00260-9"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-024-10104-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-024-10104-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-024-10104-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,22]],"date-time":"2024-07-22T16:05:49Z","timestamp":1721664349000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-024-10104-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,24]]},"references-count":35,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2024,6]]}},"alternative-id":["10104"],"URL":"https:\/\/doi.org\/10.1007\/s10772-024-10104-w","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,5,24]]},"assertion":[{"value":"27 November 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 May 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 May 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}