{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T15:10:01Z","timestamp":1730560201128,"version":"3.28.0"},"publisher-location":"Cham","reference-count":17,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031483110"},{"type":"electronic","value":"9783031483127"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-48312-7_9","type":"book-chapter","created":{"date-parts":[[2023,11,21]],"date-time":"2023-11-21T20:03:21Z","timestamp":1700597001000},"page":"114-126","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Study of\u00a0Various End-to-End Keyword Spotting Systems on\u00a0the\u00a0Bengali Language Under Low-Resource Condition"],"prefix":"10.1007","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-9870-3980","authenticated-orcid":false,"given":"Achintya Kr.","family":"Sarkar","sequence":"first","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-6249-9463","authenticated-orcid":false,"given":"Tulika","family":"Basu","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-6520-320X","authenticated-orcid":false,"given":"Rajib","family":"Roy","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-8278-3402","authenticated-orcid":false,"given":"Joyanta","family":"Basu","sequence":"additional","affiliation":[]},{"given":"Michael","family":"Tongbram","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-1350-7199","authenticated-orcid":false,"given":"Yamben Jina","family":"Chanu","sequence":"additional","affiliation":[]},{"given":"Priyanka","family":"Dwivedi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,11,22]]},"reference":[{"key":"9_CR1","doi-asserted-by":"crossref","unstructured":"Abdoli, S., Cardinal, P., Koerich, A.L.: End-to-end environmental sound classification using a 1D convolutional neural network. arXiv:1904.08990 (2019)","DOI":"10.1016\/j.eswa.2019.06.040"},{"key":"9_CR2","unstructured":"Basu, J., Bepari, M.S., Roy, R., Khan, S.: Design of telephonic speech data collection and transcription methodology for speech recognition systems. In: Proceedings of FRSM, pp. 147\u2013153. India (2012)"},{"key":"9_CR3","doi-asserted-by":"publisher","unstructured":"Basu, J., Bepari, M.S., Roy, R., Khan, S.: Real time challenges to handle the telephonic speech recognition system. In: S, M., Kumar, S. (eds.) Proceedings of the Fourth International Conference on Signal and Image Processing (ICSIP), vol. 222, pp. 395\u2013408. Springer, India (2013). https:\/\/doi.org\/10.1007\/978-81-322-1000-9_38","DOI":"10.1007\/978-81-322-1000-9_38"},{"key":"9_CR4","doi-asserted-by":"crossref","unstructured":"Berg, A., O\u2019Connor, M., Cruz, M.T.: Keyword transformer: a self-attention model for keyword spotting. In: Proceedings of Interspeech, pp. 4249\u20134253 (2021)","DOI":"10.21437\/Interspeech.2021-1286"},{"key":"9_CR5","doi-asserted-by":"crossref","unstructured":"Brian, M., et al.: librosa: audio and music signal analysis in python. In: Proceedings of the 14th Python in Science Conference, pp. 18\u201325 (2015)","DOI":"10.25080\/Majora-7b98e3ed-003"},{"key":"9_CR6","doi-asserted-by":"crossref","unstructured":"D. Bruyne, L., Singh, P., D. Clercq, O., Lefever, E., Hoste, V.: How language-dependent is emotion detection? Evidence from multilingual BERT. In: Proceedings of the 2nd Workshop on Multi-lingual Representation Learning (MRL), pp. 76\u201385. Association for Computational Linguistics (2022)","DOI":"10.18653\/v1\/2022.mrl-1.7"},{"key":"9_CR7","doi-asserted-by":"crossref","unstructured":"Du, X., Zhu, M., Chai, M., Shi, X.: End to end model for keyword spotting with trainable window function and Densenet. In: Proceedings of IEEE International Conference on Digital Signal Processing, pp. 1\u20135 (2018)","DOI":"10.1109\/ICDSP.2018.8631574"},{"key":"9_CR8","doi-asserted-by":"crossref","unstructured":"Hsu, W.N., Bolte, B., Tsai, Y.H.H., Lakhotia, K., Salakhutdinov, R., Mohamed, A.: HuBERT: self-supervised speech representation learning by masked prediction of hidden units. arXiv:2106.07447 (2021)","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"9_CR9","doi-asserted-by":"crossref","unstructured":"Pastor, M., Ribas, D., Ortega, A., Miguel, A., Lleida, E.: Cross-corpus speech emotion recognition with HuBERT self-supervised representation. In: Proceedings of Interspeech, pp. 76\u201380 (2022)","DOI":"10.21437\/IberSPEECH.2022-16"},{"key":"9_CR10","unstructured":"Povey, D., et al.: The kaldi speech recognition toolkit. In: Proceedings of IEEE Workshop on Automatic Speech Recognition and Understanding (2011)"},{"key":"9_CR11","doi-asserted-by":"crossref","unstructured":"Rybakov, O., Kononenko, N., Subrahmanya, N., Visontai, M., Laurenzo, S.: Streaming keyword spotting on mobile devices. In: Proceedings of Interspeech, pp. 2277\u20132281 (2020)","DOI":"10.21437\/Interspeech.2020-1003"},{"key":"9_CR12","doi-asserted-by":"crossref","unstructured":"Shan, C., Zhang, J., Wang, Y., Xie, L.: Attention-based end-to-end models for small-footprint keyword spotting. arXiv:1803.10916 (2018)","DOI":"10.21437\/Interspeech.2018-1777"},{"key":"9_CR13","doi-asserted-by":"crossref","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv:1409.1556 (2015)","DOI":"10.1109\/ICCV.2015.314"},{"key":"9_CR14","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.csl.2019.06.005","volume":"59","author":"ZH Tan","year":"2020","unstructured":"Tan, Z.H., Sarkar, A.K., Dehak, N.: rVAD: an unsupervised segment-based robust voice activity detection method. Comput. Speech Lang. 59, 1\u201321 (2020)","journal-title":"Comput. Speech Lang."},{"issue":"6","key":"9_CR15","doi-asserted-by":"publisher","first-page":"857","DOI":"10.1177\/0165551520985507","volume":"48","author":"A U\u00e7an","year":"2022","unstructured":"U\u00e7an, A., D\u00f6rterler, M., Ak\u00e7ap\u0131nar Sezer, E.: A study of Turkish emotion classification with pretrained language models. J. Inf. Sci. 48(6), 857\u2013865 (2022)","journal-title":"J. Inf. Sci."},{"key":"9_CR16","unstructured":"Warden, P.: Speech commands: a dataset for limited-vocabulary speech recognition. arXiv:1804.03209 (2018)"},{"key":"9_CR17","doi-asserted-by":"crossref","unstructured":"Yang, J.: Ensemble deep learning with HuBERT for speech emotion recognition. In: Proceedings of IEEE 17th International Conference on Semantic Computing (ICSC), pp. 153\u2013154 (2023)","DOI":"10.1109\/ICSC56153.2023.00032"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-48312-7_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T14:44:51Z","timestamp":1730558691000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-48312-7_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031483110","9783031483127"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-48312-7_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"22 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Dharwad","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 November 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 December 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.iitdh.ac.in\/specom-2023\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"174","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"94","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"54% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}