{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,5]],"date-time":"2025-04-05T21:24:16Z","timestamp":1743888256937,"version":"3.28.0"},"reference-count":51,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,6]]},"DOI":"10.1109\/cvprw56347.2022.00278","type":"proceedings-article","created":{"date-parts":[[2022,8,23]],"date-time":"2022-08-23T19:52:53Z","timestamp":1661284373000},"page":"2485-2494","source":"Crossref","is-referenced-by-count":50,"title":["A Joint Cross-Attention Model for Audio-Visual Fusion in Dimensional Emotion Recognition"],"prefix":"10.1109","author":[{"given":"R Gnana","family":"Praveen","sequence":"first","affiliation":[{"name":"Livia,École de Technologie Supérieure,Montreal,Canada"}]},{"given":"Wheidima Carneiro","family":"de Melo","sequence":"additional","affiliation":[{"name":"Livia,École de Technologie Supérieure,Montreal,Canada"}]},{"given":"Nasib","family":"Ullah","sequence":"additional","affiliation":[{"name":"Livia,École de Technologie Supérieure,Montreal,Canada"}]},{"given":"Haseeb","family":"Aslam","sequence":"additional","affiliation":[{"name":"Livia,École de Technologie Supérieure,Montreal,Canada"}]},{"given":"Osama","family":"Zeeshan","sequence":"additional","affiliation":[{"name":"Livia,École de Technologie Supérieure,Montreal,Canada"}]},{"given":"Theo","family":"Denorme","sequence":"additional","affiliation":[{"name":"Livia,École de Technologie Supérieure,Montreal,Canada"}]},{"given":"Marco","family":"Pedersoli","sequence":"additional","affiliation":[{"name":"Livia,École de Technologie Supérieure,Montreal,Canada"}]},{"given":"Alessandro L.","family":"Koerich","sequence":"additional","affiliation":[{"name":"Livia,École de Technologie Supérieure,Montreal,Canada"}]},{"given":"Simon","family":"Bacon","sequence":"additional","affiliation":[{"name":"Concordia University,Kinesiology & Applied Physiology,Dept. of Health,Montreal,Canada"}]},{"given":"Patrick","family":"Cardinal","sequence":"additional","affiliation":[{"name":"Livia,École de Technologie Supérieure,Montreal,Canada"}]},{"given":"Eric","family":"Granger","sequence":"additional","affiliation":[{"name":"Livia,École de Technologie Supérieure,Montreal,Canada"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00675"},{"key":"ref33","article-title":"Cross attention network for few-shot classification","author":"hou","year":"2019","journal-title":"NIPS"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00397"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/FG47880.2020.00098"},{"key":"ref30","article-title":"Cross-attentional audio-visual fusion for weakly-supervised action localization","author":"lee","year":"2021","journal-title":"ICLRE"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2012.03.001"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/T-AFFC.2011.9"},{"article-title":"Weakly supervised learning for facial behavior analysis : A review","year":"2021","author":"r","key":"ref35"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/FG47880.2020.00139"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/FG47880.2020.00056"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00396"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00406"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1080\/02699939208411068"},{"journal-title":"Emotion Recognition and Its Applications","year":"2014","author":"ko?akowska","key":"ref1"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00402"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3264869.3264873"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2020.10.011"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/FG52635.2021.9667055"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383573"},{"key":"ref26","article-title":"Deep auto-encoders with sequential learning for multimodal dimensional emotion recognition","author":"nguyen","year":"2021","journal-title":"IEEE Trans on Multimedia"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/SMC.2019.8914655"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00266"},{"key":"ref51","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1109\/TAFFC.2017.2740923","article-title":"Affectnet: A database for facial expression, valence, and arousal computing in the wild","volume":"10","author":"mollahosseini","year":"2019","journal-title":"IEEE Transactions on Affective Computing"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/FG47880.2020.00126"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00408"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/2808196.2811641"},{"article-title":"Distribution matching for heterogeneous multitask learning: a large-scale face study","year":"2021","author":"kollias","key":"ref12"},{"article-title":"Affect analysis in-the-wild: Valence-arousal, expressions, action units and a unified framework","year":"2021","author":"kollias","key":"ref13"},{"article-title":"Expression, affect, action unit recognition: Aff-wild2, multi-task learning and arcface","year":"2019","author":"kollias","key":"ref14"},{"article-title":"Face behavior a la carte: Expressions, affect and action units in a single network","year":"2019","author":"kollias","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00259"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.4324\/9781351308724"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2017.2764438"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2021.03.007"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1037\/h0054570"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992972"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2944808"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2013.6553805"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01158-4"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/T-AFFC.2011.20"},{"article-title":"Frame-level prediction of facial expressions, valence, arousal and action units for mobile devices","year":"2022","author":"savchenko","key":"ref49"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.248"},{"article-title":"Multi-modal emotion estimation for in-the-wild videos","year":"2022","author":"meng","key":"ref46"},{"key":"ref45","article-title":"Understanding the difficulty of training deep feedforward neural networks","author":"glorot","year":"2010","journal-title":"ICAISC"},{"article-title":"An ensemble approach for facial expression analysis in video","year":"2022","author":"nguyen","key":"ref48"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00265"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2228"},{"key":"ref44","article-title":"Attention bottlenecks for multimodal fusion","author":"nagrani","year":"2021","journal-title":"NIPS"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01271"}],"event":{"name":"2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)","start":{"date-parts":[[2022,6,19]]},"location":"New Orleans, LA, USA","end":{"date-parts":[[2022,6,20]]}},"container-title":["2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9856930\/9856648\/09856650.pdf?arnumber=9856650","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,12]],"date-time":"2022-09-12T20:05:05Z","timestamp":1663013105000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9856650\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6]]},"references-count":51,"URL":"https:\/\/doi.org\/10.1109\/cvprw56347.2022.00278","relation":{},"subject":[],"published":{"date-parts":[[2022,6]]}}}