{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T08:56:26Z","timestamp":1726476986147},"reference-count":60,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Expert Systems with Applications"],"published-print":{"date-parts":[[2022,5]]},"DOI":"10.1016\/j.eswa.2021.116469","type":"journal-article","created":{"date-parts":[[2022,1,5]],"date-time":"2022-01-05T16:57:20Z","timestamp":1641401840000},"page":"116469","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":25,"special_numbering":"C","title":["Emotional speaker identification using a novel capsule nets model"],"prefix":"10.1016","volume":"193","author":[{"given":"Ali Bou","family":"Nassif","sequence":"first","affiliation":[]},{"given":"Ismail","family":"Shahin","sequence":"additional","affiliation":[]},{"given":"Ashraf","family":"Elnagar","sequence":"additional","affiliation":[]},{"given":"Divya","family":"Velayudhan","sequence":"additional","affiliation":[]},{"given":"Adi","family":"Alhudhaif","sequence":"additional","affiliation":[]},{"given":"Kemal","family":"Polat","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.eswa.2021.116469_b0005","unstructured":"Abadi, M., Barham, P., Chen, J., Chen, Z., Davis, A., Dean, J., Devin, M., Ghemawat, S., Irving, G., Isard, M., Kudlur, M., Levenberg, J., Monga, R., Moore, S., Murray, D. G., Steiner, B., Tucker, P., Vasudevan, V., Warden, P., \u2026 Zheng, X. (2016). TensorFlow: A system for large-scale machine learning. Proceedings of the 12th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2016."},{"key":"10.1016\/j.eswa.2021.116469_b0010","doi-asserted-by":"crossref","DOI":"10.1007\/s00521-016-2501-7","article-title":"Speaker recognition with hybrid features from a deep belief network","author":"Ali","year":"2018","journal-title":"Neural Computing and Applications"},{"key":"10.1016\/j.eswa.2021.116469_b0015","article-title":"A study on text-independent speaker recognition systems in emotional conditions using different pattern recognition models","author":"Alluri","year":"2017","journal-title":"Lecture Notes in Computer Science (Including Subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)"},{"issue":"2","key":"10.1016\/j.eswa.2021.116469_b0020","doi-asserted-by":"crossref","first-page":"343","DOI":"10.1016\/j.ipm.2017.07.002","article-title":"Improved Arabic speech recognition system through the automatic generation of fine-grained phonetic transcriptions","volume":"56","author":"Alsharhan","year":"2019","journal-title":"Information Processing & Management"},{"key":"10.1016\/j.eswa.2021.116469_b0025","unstructured":"End-to-end speech command recognition with capsule network, 2018-Septe (2018) (testimony of Jaesung Bae & Dae Shik Kim)."},{"key":"10.1016\/j.eswa.2021.116469_b0030","series-title":"Proceedings of the Annual Conference of the International Speech Communication Association","article-title":"Emotion attribute projection for speaker recognition on emotional speech","author":"Bao","year":"2007"},{"issue":"1","key":"10.1016\/j.eswa.2021.116469_b0035","doi-asserted-by":"crossref","DOI":"10.1186\/s13636-018-0133-9","article-title":"Robust emotional speech recognition based on binaural model and emotional auditory mask in noisy environments","volume":"2018","author":"Bashirpour","year":"2018","journal-title":"Eurasip Journal on Audio, Speech, and Music Processing"},{"key":"10.1016\/j.eswa.2021.116469_b0040","doi-asserted-by":"crossref","DOI":"10.18178\/ijmlc.2019.9.2.778","article-title":"Text-independent speaker identification using deep learning model of convolution neural network","author":"Bunrit","year":"2019","journal-title":"International Journal of Machine Learning and Computing. https:\/\/"},{"issue":"4","key":"10.1016\/j.eswa.2021.116469_b0045","doi-asserted-by":"crossref","first-page":"807","DOI":"10.1016\/j.ipm.2013.01.005","article-title":"Leveraging relevance cues for language modeling in speech recognition","volume":"49","author":"Chen","year":"2013","journal-title":"Information Processing & Management"},{"key":"10.1016\/j.eswa.2021.116469_b0050","doi-asserted-by":"crossref","unstructured":"Chen, L., & Yang, Y. (2013). A preliminary study on GMM weight transformation for emotional speaker recognition. Proceedings - 2013 Humaine Association Conference on Affective Computing and Intelligent Interaction, ACII 2013. https:\/\/doi.org\/10.1109\/ACII.2013.12.","DOI":"10.1109\/ACII.2013.12"},{"key":"10.1016\/j.eswa.2021.116469_b0055","doi-asserted-by":"crossref","unstructured":"De Pinto, M. G., Polignano, M., Lops, P., & Semeraro, G. (2020). Emotions Understanding Model from Spoken Language using Deep Neural Networks and Mel-Frequency Cepstral Coefficients. IEEE Conference on Evolving and Adaptive Intelligent Systems, 2020-May. https:\/\/doi.org\/10.1109\/EAIS48028.2020.9122698.","DOI":"10.1109\/EAIS48028.2020.9122698"},{"key":"10.1016\/j.eswa.2021.116469_b0060","series-title":"1st International Conference on Artificial Intelligence in Information and Communication","article-title":"A New Unsupervised Short-Utterance based Speaker Identification Approach with Parametric t-SNE Dimensionality Reduction","author":"Elnaggar","year":"2019"},{"issue":"1","key":"10.1016\/j.eswa.2021.116469_b0065","first-page":"194","volume":"2","author":"Farrell","year":"1994","journal-title":"Speaker recognition using neural networks and conventional classifiers."},{"issue":"4","key":"10.1016\/j.eswa.2021.116469_b0070","doi-asserted-by":"crossref","first-page":"389","DOI":"10.1016\/j.neunet.2005.03.006","article-title":"Emotion recognition in human-computer interaction","volume":"18","author":"Fragopanagos","year":"2005","journal-title":"Neural Networks"},{"key":"10.1016\/j.eswa.2021.116469_b0075","article-title":"A study of the effect of emotional state upon text-independent speaker identification","author":"Ghiurcau","year":"2011","journal-title":"ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings."},{"key":"10.1016\/j.eswa.2021.116469_b0080","series-title":"Proceedings of the Annual Conference of the International Speech Communication Association","article-title":"Speaker recognition using temporal contours in linguistic units: The case of formant and formant-bandwidth trajectories","author":"Gonzalez-Rodriguez","year":"2011"},{"key":"10.1016\/j.eswa.2021.116469_b0085","doi-asserted-by":"crossref","DOI":"10.21437\/Eurospeech.1997-494","article-title":"Getting Started with SUSAS: A Speech Under Simulated and Actual Stress Database","author":"Hansen","year":"1997","journal-title":"Eurospeech."},{"key":"10.1016\/j.eswa.2021.116469_b0090","series-title":"Speech under stress: Analysis, modeling and recognition","first-page":"108","author":"Hansen","year":"2007"},{"issue":"PART 1","key":"10.1016\/j.eswa.2021.116469_b0095","first-page":"44","article-title":"Transforming auto-encoders","volume":"6791 LNCS","author":"Hinton","year":"2011","journal-title":"Lecture Notes in Computer Science (Including Subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)"},{"key":"10.1016\/j.eswa.2021.116469_b0100","unstructured":"J., L., I.Tashev, Lee, J., & Tashev, I. (2015). High-level feature representation using recurrent neural network for speech emotion recognition."},{"key":"10.1016\/j.eswa.2021.116469_b0105","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2021.114591","article-title":"Speaker identification through artificial intelligence techniques: A comprehensive review and research challenges","volume":"171","author":"Jahangir","year":"2021","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.eswa.2021.116469_b0110","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-642-27552-4_77","article-title":"Text-independent speaker identification in emotional environments: A classifier fusion approach","author":"Jawarkar","year":"2012","journal-title":"Advances in Intelligent and Soft Computing"},{"key":"10.1016\/j.eswa.2021.116469_b0115","series-title":"3rd International Conference on Learning Representations, ICLR 2015 - Conference Track Proceedings","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2015"},{"issue":"1145\/2381716","key":"10.1016\/j.eswa.2021.116469_b0120","first-page":"2381739","article-title":"Speaker recognition in the case of emotional environment using transformation of speech features","volume":"10","author":"Koolagudi","year":"2012","journal-title":"ACM International Conference Proceeding Series. doi"},{"key":"10.1016\/j.eswa.2021.116469_b0125","article-title":"Capsule Networks \u2013 A survey","author":"Kwabena Patrick","year":"2019","journal-title":"Journal of King Saud University - Computer and Information Sciences, xxxx."},{"key":"10.1016\/j.eswa.2021.116469_b0130","series-title":"Lecture Notes in Computer Science (Including Subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)","article-title":"Emotion-state conversion for speaker recognition","author":"Li","year":"2005"},{"key":"10.1016\/j.eswa.2021.116469_b0135","doi-asserted-by":"crossref","DOI":"10.1109\/TASLP.2016.2544660","article-title":"Improving Short Utterance Speaker Recognition by Modeling Speech Unit Classes","author":"Li","year":"2016","journal-title":"IEEE\/ACM Transactions on Audio Speech and Language Processing"},{"year":"2013","series-title":"Network In Network (paper)","author":"Lin","key":"10.1016\/j.eswa.2021.116469_b0140"},{"key":"10.1016\/j.eswa.2021.116469_b0145","article-title":"GMM and CNN Hybrid Method for Short Utterance Speaker Recognition","author":"Liu","year":"2018","journal-title":"IEEE Transactions on Industrial Informatics"},{"issue":"5","key":"10.1016\/j.eswa.2021.116469_b0150","doi-asserted-by":"crossref","DOI":"10.1371\/journal.pone.0196391","article-title":"The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English","volume":"13","author":"Livingstone","year":"2018","journal-title":"PloS One"},{"key":"10.1016\/j.eswa.2021.116469_b0155","series-title":"IEEE International Workshop ON Machine Learning For Signal Processing","article-title":"Speaker identification and clustering using convolutional neural networks","author":"Lukic","year":"2016"},{"key":"10.1016\/j.eswa.2021.116469_b0160","series-title":"Proceedings of 25th International Conference Radioelektronika","article-title":"Best feature selection for emotional speaker verification in i-vector representation","author":"Mackova","year":"2015"},{"issue":"1","key":"10.1016\/j.eswa.2021.116469_b0165","first-page":"93","article-title":"Speaker recognition from emotional speech using I-vector approach","volume":"7","author":"Mackov\u00e1","year":"2014","journal-title":"Journal of Electrical and Electronics Engineering"},{"key":"10.1016\/j.eswa.2021.116469_b0170","doi-asserted-by":"crossref","DOI":"10.1007\/s11042-018-6256-2","article-title":"Emotional speaker recognition in real life conditions using multiple descriptors and i-vector speaker modeling technique","author":"Mansour","year":"2019","journal-title":"Multimedia Tools and Applications"},{"key":"10.1016\/j.eswa.2021.116469_b0175","doi-asserted-by":"crossref","first-page":"60070","DOI":"10.1109\/ACCESS.2020.2983029","article-title":"Speaker Identification in Different Emotional States in Arabic and English","volume":"8","author":"Meftah","year":"2020","journal-title":"IEEE Access"},{"key":"10.1016\/j.eswa.2021.116469_b0180","doi-asserted-by":"crossref","first-page":"19143","DOI":"10.1109\/ACCESS.2019.2896880","article-title":"Speech Recognition Using Deep Neural Networks: A Systematic Review","volume":"7","author":"Nassif","year":"2019","journal-title":"IEEE Access"},{"key":"10.1016\/j.eswa.2021.116469_b0185","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.asoc.2021.107141","article-title":"CASA-Based Speaker Identification Using Cascaded GMM-CNN Classifier in Noisy and Emotional Talking Conditions","volume":"103","author":"Nassif","year":"2021","journal-title":"Applied Soft Computing"},{"key":"10.1016\/j.eswa.2021.116469_b0190","article-title":"X-Vectors Meet Emotions: A Study on Dependencies between Emotion and Speaker Recognition","author":"Pappagari","year":"2020","journal-title":"ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings."},{"key":"10.1016\/j.eswa.2021.116469_b0195","series-title":"2017 7th International Conference on Affective Computing and Intelligent Interaction","article-title":"Predicting speaker recognition reliability by considering emotional content","author":"Parthasarathy","year":"2017"},{"key":"10.1016\/j.eswa.2021.116469_b0200","article-title":"A study of speaker verification performance with expressive speech","volume":"5540\u20135544","author":"Parthasarathy","year":"2017","journal-title":"ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings"},{"key":"10.1016\/j.eswa.2021.116469_b0205","series-title":"Proceedings of the 8th HCI International on Human-Computer Interaction: Ergonomics and User Interfaces","article-title":"Affective Computing for HCI","author":"Picard","year":"1999"},{"key":"10.1016\/j.eswa.2021.116469_b0210","doi-asserted-by":"crossref","unstructured":"Praveen Kumar, P. S., Thimmaraja Yadava, G., & Jayanna, H. S. (2018). Text Independent Speaker Identification: A Review. 2017 2nd International Conference On Emerging Computation and Information Technologies, ICECIT 2017. https:\/\/doi.org\/10.1109\/ICECIT.2017.8453360.","DOI":"10.1109\/ICECIT.2017.8453360"},{"key":"10.1016\/j.eswa.2021.116469_b0215","unstructured":"Sabour, S., Frosst, N., & Hinton, G. E. (2017). Dynamic routing between capsules. Advances in Neural Information Processing Systems, 2017-Decem, 3857\u20133867."},{"key":"10.1016\/j.eswa.2021.116469_b0220","doi-asserted-by":"crossref","DOI":"10.1109\/TIE.2007.891647","article-title":"A speech-and-speaker identification system: Feature extraction, description, and classification of speech-signal image","author":"Saeed","year":"2007","journal-title":"IEEE Transactions on Industrial Electronics"},{"key":"10.1016\/j.eswa.2021.116469_b0225","unstructured":"Salehghaffari, H. (2018). Speaker Verification using Convolutional Neural Networks. ArXiv, abs\/1803.0."},{"key":"10.1016\/j.eswa.2021.116469_b0230","doi-asserted-by":"crossref","unstructured":"Scherer, K. R., Johnstone, T., Klasmeyer, G., & B\u00e4nziger, T. (2000). Can automatic speaker verification be improved by training the algorithms on emotional speech? 6th International Conference on Spoken Language Processing, ICSLP 2000.","DOI":"10.21437\/ICSLP.2000-392"},{"key":"10.1016\/j.eswa.2021.116469_b0235","series-title":"2020 2nd International Multidisciplinary Information Technology and Engineering Conference (IMITEC)","first-page":"1","article-title":"Emotional Speaker Recognition based on Machine and Deep Learning","author":"Sefara","year":"2020"},{"issue":"3","key":"10.1016\/j.eswa.2021.116469_b0240","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1007\/s10772-010-9075-z","article-title":"Speaker recognition under stressed condition","volume":"13","author":"Senthil Raja","year":"2010","journal-title":"International Journal of Speech Technology"},{"key":"10.1016\/j.eswa.2021.116469_b0245","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2021.116080","article-title":"Novel dual-channel long short-term memory compressed capsule networks for emotion recognition","volume":"188","author":"Shahin","year":"2022","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.eswa.2021.116469_b0250","series-title":"2019 International Conference on Electrical and Computing Technologies and Applications","article-title":"Speaker Verification in Emotional Talking Environments based on Third-Order Circular Suprasegmental Hidden Markov Model","author":"Shahin","year":"2019"},{"key":"10.1016\/j.eswa.2021.116469_b0255","article-title":"Novel cascaded Gaussian mixture model-deep neural network classifier for speaker identification in emotional talking environments","volume":"1\u201313","author":"Shahin","year":"2018","journal-title":"Neural Computing and Applications"},{"key":"10.1016\/j.eswa.2021.116469_b0260","doi-asserted-by":"crossref","first-page":"26777","DOI":"10.1109\/ACCESS.2019.2901352","article-title":"Emotion Recognition Using Hybrid Gaussian Mixture Model and Deep Neural Network","volume":"7","author":"Shahin","year":"2019","journal-title":"IEEE Access"},{"key":"10.1016\/j.eswa.2021.116469_b0265","doi-asserted-by":"crossref","DOI":"10.1007\/s00521-021-06226-w","article-title":"Novel hybrid DNN approaches for speaker verification in emotional and stressful talking environments","author":"Shahin","year":"2021","journal-title":"Neural Computing and Applications"},{"key":"10.1016\/j.eswa.2021.116469_b0270","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2017.08.015","article-title":"Speaker identification features extraction methods: A systematic review","author":"Tirumala","year":"2017","journal-title":"In Expert Systems with Applications"},{"key":"10.1016\/j.eswa.2021.116469_b0275","series-title":"2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"4052","article-title":"Deep neural networks for small footprint text-dependent speaker verification","author":"Variani","year":"2014"},{"key":"10.1016\/j.eswa.2021.116469_b0280","doi-asserted-by":"crossref","DOI":"10.1016\/j.specom.2010.08.013","article-title":"Automatic speech emotion recognition using modulation spectral features","author":"Wu","year":"2011","journal-title":"Speech Communication"},{"key":"10.1016\/j.eswa.2021.116469_b0285","article-title":"Improving speaker recognition by training on emotion-added models","author":"Wu","year":"2005","journal-title":"Lecture Notes in Computer Science (Including Subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)"},{"key":"10.1016\/j.eswa.2021.116469_b0290","series-title":"INTERSPEECH 2006 and 9th International Conference on Spoken Language Processing","article-title":"Study on speaker verification on emotional speech","author":"Wu","year":"2006"},{"key":"10.1016\/j.eswa.2021.116469_b0295","article-title":"Speech Emotion Recognition Using Capsule Networks","author":"Wu","year":"2019","journal-title":"ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings."},{"key":"10.1016\/j.eswa.2021.116469_b0300","article-title":"Rules based feature modification for affective speaker recognition","volume":"1","author":"Zhaohui","year":"2006","journal-title":"ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings"}],"container-title":["Expert Systems with Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417421017498?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417421017498?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,2,20]],"date-time":"2024-02-20T20:00:56Z","timestamp":1708459256000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0957417421017498"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5]]},"references-count":60,"alternative-id":["S0957417421017498"],"URL":"https:\/\/doi.org\/10.1016\/j.eswa.2021.116469","relation":{},"ISSN":["0957-4174"],"issn-type":[{"type":"print","value":"0957-4174"}],"subject":[],"published":{"date-parts":[[2022,5]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Emotional speaker identification using a novel capsule nets model","name":"articletitle","label":"Article Title"},{"value":"Expert Systems with Applications","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.eswa.2021.116469","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2021 Elsevier Ltd. All rights reserved.","name":"copyright","label":"Copyright"}],"article-number":"116469"}}