{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,5,14]],"date-time":"2024-05-14T00:28:57Z","timestamp":1715646537285},"reference-count":23,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,3,1]],"date-time":"2024-03-01T00:00:00Z","timestamp":1709251200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,3,1]],"date-time":"2024-03-01T00:00:00Z","timestamp":1709251200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2024,3]]},"DOI":"10.1007\/s10772-024-10092-x","type":"journal-article","created":{"date-parts":[[2024,3,29]],"date-time":"2024-03-29T19:01:51Z","timestamp":1711738911000},"page":"211-223","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Speech recognition based on the transformer's multi-head attention in Arabic"],"prefix":"10.1007","volume":"27","author":[{"ORCID":"http:\/\/orcid.org\/0000-0003-3211-6633","authenticated-orcid":false,"given":"Omayma","family":"Mahmoudi","sequence":"first","affiliation":[]},{"given":"Mouncef","family":"Filali-Bouami","sequence":"additional","affiliation":[]},{"given":"Mohamed","family":"Benchat","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,3,29]]},"reference":[{"key":"10092_CR1","unstructured":"Benamer, L., & Alkishriwo, O. (2020). Database for Arabic speech commands recognition, In CEST the third conference for engineering sciences and technology, 1\u20133 December, Al Khums, Libya."},{"key":"10092_CR2","doi-asserted-by":"crossref","unstructured":"Berg, A., O'Connor, M., & Cruz, M. T. (2021). Keyword transformer: A self-attention model for keyword spotting.\u00a0arXiv preprint arXiv:2104.00769.","DOI":"10.21437\/Interspeech.2021-1286"},{"issue":"3","key":"10092_CR3","doi-asserted-by":"publisher","first-page":"480","DOI":"10.1109\/72.286919","volume":"5","author":"WL Buntine","year":"1994","unstructured":"Buntine, W. L., & Weigend, A. S. (1994). Computing second derivatives in feed-forward networks: A review. IEEE Transactions on Neural Networks, 5(3), 480\u2013488.","journal-title":"IEEE Transactions on Neural Networks"},{"key":"10092_CR4","doi-asserted-by":"crossref","unstructured":"Chen, K., Du, X., Zhu, B., Ma, Z., Berg-Kirkpatrick, T., & Dubnov, S. (2022). HTS-AT: A hierarchical token-semantic audio transformer for sound classification and detection. In 2022 IEEE international conference on acoustics, speech and signal processing (ICASSP 2022) (pp. 646\u2013650). IEEE.","DOI":"10.1109\/ICASSP43922.2022.9746312"},{"issue":"10","key":"10092_CR5","doi-asserted-by":"publisher","first-page":"1315","DOI":"10.1038\/nbt1004-1315","volume":"22","author":"SR Eddy","year":"2004","unstructured":"Eddy, S. R. (2004). What is a hidden Markov model? Nature Biotechnology, 22(10), 1315\u20131316.","journal-title":"Nature Biotechnology"},{"key":"10092_CR6","unstructured":"Falcon-Perez, R. (2022). Curriculum learning with audio domain data augmentation for sound event localization and detection. In Challenge of detection and classification of acoustic scenes and events."},{"key":"10092_CR7","doi-asserted-by":"publisher","unstructured":"Ferreira-Paiva, L., Alfaro-Espinoza, E., Almeida, V. M., Felix, L. B., & Neves, R. V. (2022). A survey of data augmentation for audio classification, In XXIV Brazilian congress of automatics (CBA). https:\/\/doi.org\/10.20906\/CBA2022\/3469.","DOI":"10.20906\/CBA2022\/3469"},{"key":"10092_CR8","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.4662481","author":"A Ghandoura","year":"2021","unstructured":"Ghandoura, A. (2021). Arabic speech commands dataset (v1.0). Zenodo. https:\/\/doi.org\/10.5281\/zenodo.4662481."},{"key":"10092_CR9","doi-asserted-by":"crossref","unstructured":"Gupta, J., Pathak, S., & Kumar, G. (2022). Deep learning (CNN) and transfer learning: A review. Journal of Physics: Conference Series 2273(1), 012029.","DOI":"10.1088\/1742-6596\/2273\/1\/012029"},{"key":"10092_CR10","unstructured":"Li, H., Chaudhari, P., Yang, H., Lam, M., Ravichandran, A., Bhotika, R., & Soatto, S. (2020). Rethinking the hyperparameters for fine-tuning.\u00a0arXiv preprint arXiv:2002.11770."},{"issue":"2","key":"10092_CR11","doi-asserted-by":"publisher","first-page":"69","DOI":"10.3390\/info13020069","volume":"13","author":"L Liao","year":"2022","unstructured":"Liao, L., Afedzie Kwofie, F., Chen, Z., Han, G., Wang, Y., Lin, Y., & Hu, D. (2022). A bidirectional context embedding transformer for automatic speech recognition. Information, 13(2), 69.","journal-title":"Information"},{"issue":"3","key":"10092_CR12","doi-asserted-by":"publisher","first-page":"467","DOI":"10.18280\/ria.360315","volume":"36","author":"O Mahmoudi","year":"2022","unstructured":"Mahmoudi, O., Bouami, M. F., & Badri, M. (2022). Arabic language modeling based on supervised machine learning. Revue d'Intelligence Artificielle, 36(3), 467.","journal-title":"Revue D'intelligence Artificielle"},{"key":"10092_CR13","doi-asserted-by":"crossref","unstructured":"Mahmoudi, O., & Bouami, M. F. (2023). RNN and LSTM models for Arabic speech commands recognition using PyTorch and GPU. In International conference on artificial intelligence & industrial applications (pp. 462\u2013470). Springer.","DOI":"10.1007\/978-3-031-43520-1_39"},{"key":"10092_CR14","doi-asserted-by":"crossref","unstructured":"Mahmoudi, O., & Bouami, M. F. (2023). Arabic speech commands recognition with LSTM & GRU models using CUDA toolkit implementation. In 2023 3rd international conference on innovative research in applied science, engineering and technology (IRASET) (pp. 1\u20134). IEEE.","DOI":"10.1109\/IRASET57153.2023.10152979"},{"key":"10092_CR15","doi-asserted-by":"crossref","unstructured":"Mahmoudi, O., & Bouami, M. F. (2023). Arabic speech emotion recognition using deep neural network. In International conference on digital technologies and applications (pp. 124\u2013133). Springer.","DOI":"10.1007\/978-3-031-29860-8_13"},{"key":"10092_CR16","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2020.107389","volume":"167","author":"Z Mushtaq","year":"2020","unstructured":"Mushtaq, Z., & Su, S. F. (2020). Environmental sound classification using a regularized deep convolutional neural network with data augmentation. Applied Acoustics, 167, 107389.","journal-title":"Applied Acoustics"},{"key":"10092_CR17","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1016\/j.neucom.2021.03.091","volume":"452","author":"Z Niu","year":"2021","unstructured":"Niu, Z., Zhong, G., & Yu, H. (2021). A review on the attention mechanism of deep learning. Neurocomputing, 452, 48\u201362.","journal-title":"Neurocomputing"},{"key":"10092_CR18","first-page":"1","volume":"78","author":"M Obaid","year":"2023","unstructured":"Obaid, M., Hodrob, R., Abu Mwais, A., & Aldababsa, M. (2023). Small vocabulary isolated-word automatic speech recognition for single-word commands in Arabic spoken. Soft Computing, 78, 1\u201314.","journal-title":"Soft Computing"},{"issue":"5","key":"10092_CR19","doi-asserted-by":"publisher","first-page":"879","DOI":"10.1109\/JPROC.2008.917757","volume":"96","author":"JD Owens","year":"2008","unstructured":"Owens, J. D., Houston, M., Luebke, D., Green, S., Stone, J. E., & Phillips, J. C. (2008). GPU computing. Proceedings of the IEEE, 96(5), 879\u2013899.","journal-title":"Proceedings of the IEEE"},{"issue":"8","key":"10092_CR20","doi-asserted-by":"publisher","first-page":"2326","DOI":"10.3390\/s20082326","volume":"20","author":"A Pervaiz","year":"2020","unstructured":"Pervaiz, A., Hussain, F., Israr, H., Tahir, M. A., Raja, F. R., Baloch, N. K., & Zikria, Y. B. (2020). Incorporating noise robustness in speech command recognition by noise augmentation of training data. Sensors, 20(8), 2326.","journal-title":"Sensors"},{"issue":"3","key":"10092_CR21","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1109\/LSP.2017.2657381","volume":"24","author":"J Salamon","year":"2017","unstructured":"Salamon, J., & Bello, J. P. (2017). Deep convolutional neural networks and data augmentation for environmental sound classification. IEEE Signal Processing Letters, 24(3), 279\u2013283.","journal-title":"IEEE Signal Processing Letters"},{"issue":"6","key":"10092_CR22","doi-asserted-by":"publisher","first-page":"1445","DOI":"10.1007\/s10278-022-00666-z","volume":"35","author":"M Usman","year":"2022","unstructured":"Usman, M., Zia, T., & Tariq, A. (2022). Analyzing transfer learning of vision transformers for interpreting chest radiography. Journal of Digital Imaging, 35(6), 1445\u20131462.","journal-title":"Journal of Digital Imaging"},{"key":"10092_CR23","doi-asserted-by":"publisher","first-page":"582","DOI":"10.1007\/BF02943243","volume":"16","author":"F Zheng","year":"2001","unstructured":"Zheng, F., Zhang, G., & Song, Z. (2001). Comparison of different implementations of MFCC. Journal of Computer Science and Technology, 16, 582\u2013589.","journal-title":"Journal of Computer Science and Technology"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-024-10092-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-024-10092-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-024-10092-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T15:15:53Z","timestamp":1715613353000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-024-10092-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3]]},"references-count":23,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2024,3]]}},"alternative-id":["10092"],"URL":"https:\/\/doi.org\/10.1007\/s10772-024-10092-x","relation":{"references":[{"id-type":"uri","id":"","asserted-by":"subject"}]},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,3]]},"assertion":[{"value":"16 November 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 February 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 March 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"None. We hereby confirm that all the Figures and Tables in the manuscript are ours. Besides, the Figures and images, which are note ours, have been given permission for re-publication attached with the manuscript. The autors declare they have no financial interests. Our manuscript has associated data in a data repository.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}