{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,13]],"date-time":"2024-09-13T10:59:46Z","timestamp":1726225186196},"publisher-location":"Cham","reference-count":28,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031390586"},{"type":"electronic","value":"9783031390593"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-39059-3_14","type":"book-chapter","created":{"date-parts":[[2023,7,30]],"date-time":"2023-07-30T13:01:37Z","timestamp":1690722097000},"page":"209-222","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Phoneme-Based Multi-task Assessment of\u00a0Affective Vocal Bursts"],"prefix":"10.1007","author":[{"given":"Tobias","family":"Hallmen","sequence":"first","affiliation":[]},{"given":"Silvan","family":"Mertes","sequence":"additional","affiliation":[]},{"given":"Dominik","family":"Schiller","sequence":"additional","affiliation":[]},{"given":"Florian","family":"Lingenfelser","sequence":"additional","affiliation":[]},{"given":"Elisabeth","family":"Andr\u00e9","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,7,31]]},"reference":[{"key":"14_CR1","unstructured":"Anuchitanukul, A., Specia, L.: Burst2vec: an adversarial multi-task approach for predicting emotion, age, and origin from vocal bursts. arXiv preprint arXiv:2206.12469 (2022)"},{"key":"14_CR2","unstructured":"Atmaja, B.T., Sasou, A.: Predicting affective vocal bursts with finetuned wav2vec 2.0. arXiv preprint arXiv:2209.13146 (2022)"},{"key":"14_CR3","doi-asserted-by":"crossref","unstructured":"Atmaja, B.T., Sasou, A., et al.: Jointly predicting emotion, age, and country using pre-trained acoustic embedding. arXiv preprint arXiv:2207.10333 (2022)","DOI":"10.1109\/ACIIW57231.2022.10085991"},{"key":"14_CR4","unstructured":"Baevski, A., Hsu, W.N., Xu, Q., Babu, A., Gu, J., Auli, M.: Data2vec: a general framework for self-supervised learning in speech, vision and language. arXiv preprint arXiv:2202.03555 (2022)"},{"key":"14_CR5","unstructured":"Baevski, A., Zhou, Y., Mohamed, A., Auli, M.: wav2vec 2.0: a framework for self-supervised learning of speech representations. In: Advances in Neural Information Processing Systems vol. 33, pp. 12449\u201312460 (2020)"},{"key":"14_CR6","doi-asserted-by":"publisher","unstructured":"Baird, A., Tzirakis, P., Batliner, A., Schuller, B., Keltner, D., Cowen, A.: The ACII 2022 affective vocal bursts workshop and competition: Understanding a critically understudied modality of emotional expression. arXiv preprint arXiv:2207.03572v1 (2022). https:\/\/doi.org\/10.48550\/arXiv.2207.03572","DOI":"10.48550\/arXiv.2207.03572"},{"key":"14_CR7","doi-asserted-by":"publisher","unstructured":"Baird, A., et al.: The ICML 2022 expressive vocalizations workshop and competition: Recognizing, generating, and personalizing vocal bursts. arXiv preprint arXiv:2205.01780v3 (2022). https:\/\/doi.org\/10.48550\/ARXIV.2205.01780","DOI":"10.48550\/ARXIV.2205.01780"},{"issue":"1","key":"14_CR8","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1037\/emo0000100","volume":"16","author":"DT Cordaro","year":"2016","unstructured":"Cordaro, D.T., Keltner, D., Tshering, S., Wangchuk, D., Flynn, L.M.: The voice conveys emotion in ten globalized cultures and one remote village in Bhutan. Emotion 16(1), 117 (2016)","journal-title":"Emotion"},{"key":"14_CR9","doi-asserted-by":"publisher","unstructured":"Cowen, A., et al.: The Hume vocal burst competition dataset (H-VB) | raw data [exvo: updated 02.28.22] [data set]. Zenodo (2022). https:\/\/doi.org\/10.5281\/zenodo.6308780","DOI":"10.5281\/zenodo.6308780"},{"issue":"6","key":"14_CR10","doi-asserted-by":"publisher","first-page":"698","DOI":"10.1037\/amp0000399","volume":"74","author":"AS Cowen","year":"2019","unstructured":"Cowen, A.S., Elfenbein, H.A., Laukka, P., Keltner, D.: Mapping 24 emotions conveyed by brief human vocalization. Am. Psychol. 74(6), 698 (2019)","journal-title":"Am. Psychol."},{"key":"14_CR11","unstructured":"Hallmen, T., Mertes, S., Schiller, D., Andr\u00e9, E.: An efficient multitask learning architecture for affective vocal burst analysis (2022)"},{"key":"14_CR12","unstructured":"Hendrycks, D., Gimpel, K.: Gaussian error linear units (GELUs). arXiv preprint arXiv:1606.08415 (2016)"},{"issue":"8","key":"14_CR13","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"14_CR14","doi-asserted-by":"publisher","first-page":"3451","DOI":"10.1109\/TASLP.2021.3122291","volume":"29","author":"WN Hsu","year":"2021","unstructured":"Hsu, W.N., Bolte, B., Tsai, Y.H.H., Lakhotia, K., Salakhutdinov, R., Mohamed, A.: HuBERT: self-supervised speech representation learning by masked prediction of hidden units. IEEE\/ACM Trans. Audio Speech Lang. Proc. 29, 3451\u20133460 (2021)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Proc."},{"key":"14_CR15","unstructured":"Karas, V., Triantafyllopoulos, A., Song, M., Schuller, B.W.: Self-supervised attention networks and uncertainty loss weighting for multi-task emotion recognition on vocal bursts. arXiv preprint arXiv:2209.07384 (2022)"},{"key":"14_CR16","unstructured":"Kendall, A., Gal, Y., Cipolla, R.: Multi-task learning using uncertainty to weigh losses for scene geometry and semantics. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7482\u20137491 (2018)"},{"key":"14_CR17","unstructured":"Kwon, J., Kim, J., Park, H., Choi, I.K.: ASAM: adaptive sharpness-aware minimization for scale-invariant learning of deep neural networks. In: International Conference on Machine Learning, pp. 5905\u20135914. PMLR (2021)"},{"key":"14_CR18","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"key":"14_CR19","unstructured":"Nguyen, D.K., Pant, S., Ho, N.H., Lee, G.S., Kim, S.H., Yang, H.J.: Fine-tuning wav2vec for vocal-burst emotion recognition. arXiv preprint arXiv:2210.00263 (2022)"},{"key":"14_CR20","doi-asserted-by":"crossref","unstructured":"Panayotov, V., Chen, G., Povey, D., Khudanpur, S.: LibriSpeech: an ASR corpus based on public domain audio books. In: 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5206\u20135210. IEEE (2015)","DOI":"10.1109\/ICASSP.2015.7178964"},{"issue":"4","key":"14_CR21","first-page":"43","volume":"9","author":"D Phutela","year":"2015","unstructured":"Phutela, D.: The importance of non-verbal communication. IUP J. Soft Skills 9(4), 43 (2015)","journal-title":"IUP J. Soft Skills"},{"key":"14_CR22","unstructured":"Purohit, T., Mahmoud, I.B., Vlasenko, B., Doss, M.M.: Comparing supervised and self-supervised embedding for exvo multi-task learning track. arXiv preprint arXiv:2206.11968 (2022)"},{"issue":"3","key":"14_CR23","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1016\/S0892-1997(05)80231-0","volume":"9","author":"KR Scherer","year":"1995","unstructured":"Scherer, K.R.: Expression of emotion in voice and music. J. Voice 9(3), 235\u2013248 (1995)","journal-title":"J. Voice"},{"issue":"1\u20132","key":"14_CR24","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1016\/S0167-6393(02)00078-X","volume":"40","author":"M Schr\u00f6der","year":"2003","unstructured":"Schr\u00f6der, M.: Experimental study of affect bursts. Speech Commun. 40(1\u20132), 99\u2013116 (2003)","journal-title":"Speech Commun."},{"key":"14_CR25","unstructured":"Sharma, R., Vuong, T., Lindsey, M., Dhamyal, H., Singh, R., Raj, B.: Self-supervision and learnable STRFs for age, emotion, and country prediction. arXiv preprint arXiv:2206.12568 (2022)"},{"key":"14_CR26","unstructured":"Syed, M.S.S., Syed, Z.S., Syed, A.: Classification of vocal bursts for ACII 2022 A-VB-Type competition using convolutional network networks and deep acoustic embeddings. arXiv preprint arXiv:2209.14842 (2022)"},{"issue":"1","key":"14_CR27","doi-asserted-by":"publisher","first-page":"200","DOI":"10.3390\/s23010200","volume":"23","author":"DL Trinh","year":"2022","unstructured":"Trinh, D.L., Vo, M.C., Kim, S.H., Yang, H.J., Lee, G.S.: Self-relation attention and temporal awareness for emotion recognition via vocal burst. Sensors 23(1), 200 (2022)","journal-title":"Sensors"},{"key":"14_CR28","doi-asserted-by":"crossref","unstructured":"Xu, Q., Baevski, A., Auli, M.: Simple and effective zero-shot cross-lingual phoneme recognition. arXiv preprint arXiv:2109.11680 (2021)","DOI":"10.21437\/Interspeech.2022-60"}],"container-title":["Communications in Computer and Information Science","Deep Learning Theory and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-39059-3_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,30]],"date-time":"2023-07-30T13:04:30Z","timestamp":1690722270000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-39059-3_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031390586","9783031390593"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-39059-3_14","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"31 July 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DeLTA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Deep Learning Theory and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Rome","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 July 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 July 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"delta2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/delta.scitevents.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"PRIMORIS","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"42","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"9","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"22","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"21% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}