{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,19]],"date-time":"2025-03-19T15:49:26Z","timestamp":1742399366664},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,10,15]],"date-time":"2018-10-15T00:00:00Z","timestamp":1539561600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Natural Science Foundation of China","award":["No.61332017","No.61773379","NO.61425017","No.61603390","No.61771472"]},{"name":"National Key Research & Development Plan of China","award":["No. 2017YFB1002804"]},{"name":"Major Program for the National Social Science Fund of China","award":["13&ZD189"]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,10,15]]},"DOI":"10.1145\/3266302.3266304","type":"proceedings-article","created":{"date-parts":[[2018,10,18]],"date-time":"2018-10-18T10:19:29Z","timestamp":1539857969000},"page":"57-64","update-policy":"http:\/\/dx.doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":25,"title":["Multimodal Continuous Emotion Recognition with Data Augmentation Using Recurrent Neural Networks"],"prefix":"10.1145","author":[{"given":"Jian","family":"Huang","sequence":"first","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"given":"Ya","family":"Li","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"given":"Jianhua","family":"Tao","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"given":"Zheng","family":"Lian","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"given":"Mingyue","family":"Niu","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"given":"Minghao","family":"Yang","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2018,10,15]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.4018\/jse.2010101605"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2012.06.016"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1467-9280.2007.02024.x"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"B. Schuller M. Valstar F. Eyben etal 2011. Avec 2011--the first international audio\/visual emotion challenge. Affective Computing and Intelligent Interaction 415--424. B. Schuller M. Valstar F. Eyben et al. 2011. Avec 2011--the first international audio\/visual emotion challenge. Affective Computing and Intelligent Interaction 415--424.","DOI":"10.1007\/978-3-642-24571-8_53"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2388676.2388776"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2512530.2512533"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2661806.2661807"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2806408"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988257.2988258"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133944.3133953"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3266302.3266316"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/taffc.2015.2457417"},{"key":"e_1_3_2_1_13_1","volume-title":"Investigation of Bottle-Neck Features for Emotion Recognition. International Conference on Text, Speech, and Dialogue. Springer International Publishing, 426--434","author":"Popkov\u00e1 A.","year":"2016","unstructured":"A. Popkov\u00e1 , F. Povoln\u00fd , P. Matejka , 2016 . Investigation of Bottle-Neck Features for Emotion Recognition. International Conference on Text, Speech, and Dialogue. Springer International Publishing, 426--434 . A. Popkov\u00e1, F. Povoln\u00fd, P. Matejka, et al. 2016. Investigation of Bottle-Neck Features for Emotion Recognition. International Conference on Text, Speech, and Dialogue. Springer International Publishing, 426--434."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988257.2988268"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133944.3133949"},{"key":"e_1_3_2_1_16_1","volume-title":"Soundnet: Learning sound representations from unlabeled video. Advances in Neural Information Processing Systems, 892--900.","author":"Aytar Y.","year":"2016","unstructured":"Y. Aytar , C. Vondrick , A. Torralba . 2016 . Soundnet: Learning sound representations from unlabeled video. Advances in Neural Information Processing Systems, 892--900. Y. Aytar, C. Vondrick, A. Torralba. 2016. Soundnet: Learning sound representations from unlabeled video. Advances in Neural Information Processing Systems, 892--900."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2013.65"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988257.2988270"},{"key":"e_1_3_2_1_19_1","unstructured":"H. W. Chung C. L. Jen L. W. Wen. 2014. Survey on audiovisual emotion recognition: databases features and data fusion strategies. APSIPA transactions on signal and information processing. H. W. Chung C. L. Jen L. W. Wen. 2014. Survey on audiovisual emotion recognition: databases features and data fusion strategies. APSIPA transactions on signal and information processing."},{"key":"e_1_3_2_1_20_1","volume-title":"Thirteenth Annual Conference of the International Speech Communication Association.","author":"Viktor R.","year":"2012","unstructured":"R. Viktor , A. Sankaranarayanan , S. Shirin , 2012 . Emotion recognition using acoustic and lexical features . In Thirteenth Annual Conference of the International Speech Communication Association. R. Viktor, A. Sankaranarayanan, S. Shirin, et al. 2012. Emotion recognition using acoustic and lexical features. In Thirteenth Annual Conference of the International Speech Communication Association."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/2808196.2811640"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2663204.2666277"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2967286"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2661806.2661811"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2012.03.001"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133944.3133946"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2014.2334294"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2015-711"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"G. Keren J. Deng J. Pohjalainen etal 2016. Convolutional Neural Networks with Data Augmentation for Classifying Speakers' Native Language. Interspeech 2393--2397. G. Keren J. Deng J. Pohjalainen et al. 2016. Convolutional Neural Networks with Data Augmentation for Classifying Speakers' Native Language. Interspeech 2393--2397.","DOI":"10.21437\/Interspeech.2016-261"},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of Interspeech","author":"Chi C. L.","year":"2009","unstructured":"C. L. Chi , B. Carlos , L. Sungbok , N. Shrikanth . 2009 . Modeling Mutual Influence of Interlocutor Emotion States in Dyadic Spoken Interactions . In Proceedings of Interspeech 2009, Brighton, UK. C. L. Chi, B. Carlos, L. Sungbok, N. Shrikanth. 2009. Modeling Mutual Influence of Interlocutor Emotion States in Dyadic Spoken Interactions. In Proceedings of Interspeech 2009, Brighton, UK."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"R. Zhang A. Atsushi S. Kobashikawa etal 2017. Interaction and Transition Model for Speech Emotion Recognition in Dialogue. Interspeech 1094--1097. R. Zhang A. Atsushi S. Kobashikawa et al. 2017. Interaction and Transition Model for Speech Emotion Recognition in Dialogue. Interspeech 1094--1097.","DOI":"10.21437\/Interspeech.2017-713"},{"key":"e_1_3_2_1_33_1","unstructured":"B.Valentin C. Chlo\u00b4e E. Slim. 2018. Attitude classification in adjacency pairs of a human-agent interaction with hidden conditional random fields. ICASSP. B.Valentin C. Chlo\u00b4e E. Slim. 2018. Attitude classification in adjacency pairs of a human-agent interaction with hidden conditional random fields. ICASSP."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1163\/156770908X289242"},{"key":"e_1_3_2_1_35_1","volume-title":"Nebraska symposium on motivation","author":"Ekman P.","year":"1971","unstructured":"P. Ekman . 1971 . Universals and cultural differences in facial expressions of emotion . Nebraska symposium on motivation . University of Nebraska Press. P. Ekman. 1971. Universals and cultural differences in facial expressions of emotion. Nebraska symposium on motivation. University of Nebraska Press."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2015.7344561"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/NCC.2013.6487998"},{"key":"e_1_3_2_1_38_1","unstructured":"F. Weninger F. Ringeval E. Marchi etal 2016. Discriminatively Trained Recurrent Neural Networks for Continuous Dimensional Emotion Recognition from Audio. IJCAI 2196--2202. F. Weninger F. Ringeval E. Marchi et al. 2016. Discriminatively Trained Recurrent Neural Networks for Continuous Dimensional Emotion Recognition from Audio. IJCAI 2196--2202."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874246"},{"key":"e_1_3_2_1_40_1","volume-title":"The INTERSPEECH 2010 paralinguistic challenge. Eleventh Annual Conference of the International Speech Communication Association.","author":"Schuller B.","year":"2010","unstructured":"B. Schuller , S. Steidl , A. Batliner , 2010 . The INTERSPEECH 2010 paralinguistic challenge. Eleventh Annual Conference of the International Speech Communication Association. B. Schuller, S. Steidl, A. Batliner, et al. 2010. The INTERSPEECH 2010 paralinguistic challenge. Eleventh Annual Conference of the International Speech Communication Association."},{"key":"e_1_3_2_1_41_1","volume-title":"OpenFace: An Open Source Facial Behavior Analysis Toolkit. In: Proc. IEEE Winter Conference on Applications of Computer Vision","author":"Tadas B.","year":"2016","unstructured":"B. Tadas , R. Peter , P. M. Louis . 2016 . OpenFace: An Open Source Facial Behavior Analysis Toolkit. In: Proc. IEEE Winter Conference on Applications of Computer Vision , New York, USA. B. Tadas, R. Peter, P. M. Louis. 2016. OpenFace: An Open Source Facial Behavior Analysis Toolkit. In: Proc. IEEE Winter Conference on Applications of Computer Vision, New York, USA."},{"key":"e_1_3_2_1_42_1","unstructured":"M. Tomas S. Ilya C. Kai etal 2013. Distributed representations of words and phrases and their compositionality. In Advances in neural information processing systems pages 3111--3119. M. Tomas S. Ilya C. Kai et al. 2013. Distributed representations of words and phrases and their compositionality. In Advances in neural information processing systems pages 3111--3119."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/2808196.2811634"},{"key":"e_1_3_2_1_45_1","unstructured":"M. D. Zeiler. 2012. ADADELTA: an adaptive learning rate method. arXiv preprint arXiv:1212.5701 M. D. Zeiler. 2012. ADADELTA: an adaptive learning rate method. arXiv preprint arXiv:1212.5701"}],"event":{"name":"MM '18: ACM Multimedia Conference","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Seoul Republic of Korea","acronym":"MM '18"},"container-title":["Proceedings of the 2018 on Audio\/Visual Emotion Challenge and Workshop"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3266302.3266304","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,10]],"date-time":"2023-01-10T05:16:58Z","timestamp":1673327818000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3266302.3266304"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,10,15]]},"references-count":45,"alternative-id":["10.1145\/3266302.3266304","10.1145\/3266302"],"URL":"https:\/\/doi.org\/10.1145\/3266302.3266304","relation":{},"subject":[],"published":{"date-parts":[[2018,10,15]]},"assertion":[{"value":"2018-10-15","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}