{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,1,13]],"date-time":"2024-01-13T23:30:29Z","timestamp":1705188629913},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"27-28","license":[{"start":{"date-parts":[[2020,4,8]],"date-time":"2020-04-08T00:00:00Z","timestamp":1586304000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,4,8]],"date-time":"2020-04-08T00:00:00Z","timestamp":1586304000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1007\/s11042-020-08845-2","type":"journal-article","created":{"date-parts":[[2020,4,8]],"date-time":"2020-04-08T02:02:31Z","timestamp":1586311351000},"page":"20019-20038","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Spatial-temporal dual-actor CNN for human interaction prediction in video"],"prefix":"10.1007","volume":"79","author":[{"given":"Mahlagha","family":"Afrasiabi","sequence":"first","affiliation":[]},{"given":"Hassan","family":"Khotanlou","sequence":"additional","affiliation":[]},{"given":"Theo","family":"Gevers","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,4,8]]},"reference":[{"key":"8845_CR1","doi-asserted-by":"crossref","unstructured":"Afrasiabi M, Mansoorizadeh M, et al. (2019) Dtw-cnn: time series-based human interaction prediction in videos using cnn-extracted features. Vis Comput, 1\u201313","DOI":"10.1007\/s00371-019-01722-6"},{"issue":"3","key":"8845_CR2","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1145\/1922649.1922653","volume":"43","author":"JK Aggarwal","year":"2011","unstructured":"Aggarwal JK, Ryoo MS (2011) Human activity analysis: a review. ACM Comput Surv (CSUR) 43(3):16","journal-title":"ACM Comput Surv (CSUR)"},{"key":"8845_CR3","doi-asserted-by":"crossref","unstructured":"Ahmadipour Z, Afrasiabi M, Khotanlou H (2016) Multiple human detection in images based on differential evolution and hog-lbp. In: 2016 Eighth international conference on information and knowledge technology (IKT). IEEE, pp 61\u201365","DOI":"10.1109\/IKT.2016.7777779"},{"key":"8845_CR4","unstructured":"Berlin SJ, John M (2016) Human interaction recognition through deep learning network. In: 2016 IEEE International carnahan conference on security technology (ICCST). IEEE, pp 1\u20134"},{"issue":"7","key":"8845_CR5","doi-asserted-by":"publisher","first-page":"2631","DOI":"10.1109\/TCYB.2018.2831447","volume":"49","author":"Y Bin","year":"2018","unstructured":"Bin Y, Yang Y, Shen F, Xie N, Shen HT, Li X (2018) Describing video with attention-based bidirectional lstm. IEEE Trans Cybern 49(7):2631\u20132641","journal-title":"IEEE Trans Cybern"},{"key":"8845_CR6","first-page":"25","volume":"2004","author":"T Brox","year":"2004","unstructured":"Brox T, Bruhn A, Papenberg N, Weickert J (2004) High accuracy optical flow estimation based on a theory for warping. Comput Vis-ECCV 2004:25\u201336","journal-title":"Comput Vis-ECCV"},{"key":"8845_CR7","doi-asserted-by":"crossref","unstructured":"Dalal N, Triggs B (2005) Histograms of oriented gradients for human detection. In: IEEE Computer society conference on computer vision and pattern recognition, 2005. CVPR 2005, vol 1. IEEE, pp 886\u2013893","DOI":"10.1109\/CVPR.2005.177"},{"key":"8845_CR8","doi-asserted-by":"crossref","unstructured":"Dalal N, Triggs B, Schmid C (2006) Human detection using oriented histograms of flow and appearance. In: European conference on computer vision. Springer, pp 428\u2013441","DOI":"10.1007\/11744047_33"},{"key":"8845_CR9","doi-asserted-by":"crossref","unstructured":"Donahue J, Anne Hendricks L, Guadarrama S, Rohrbach M, Venugopalan S, Saenko K, Darrell T (2015) Long-term recurrent convolutional networks for visual recognition and description. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2625\u20132634","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"8845_CR10","doi-asserted-by":"crossref","unstructured":"Dyer C, Ballesteros M, Ling W, Matthews A, Smith NA (2015) Transition-based dependency parsing with stack long short-term memory. arXiv:1505.08075","DOI":"10.3115\/v1\/P15-1033"},{"key":"8845_CR11","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/j.imavis.2017.01.010","volume":"60","author":"S Herath","year":"2017","unstructured":"Herath S, Harandi M, Porikli F (2017) Going deeper into action recognition: a survey. Image Vis Comput 60:4\u201321","journal-title":"Image Vis Comput"},{"key":"8845_CR12","doi-asserted-by":"crossref","unstructured":"Hoai M, Zisserman A (2014) Talking heads: detecting humans and recognizing their interactions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 875\u2013882","DOI":"10.1109\/CVPR.2014.117"},{"issue":"8","key":"8845_CR13","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neur Comput 9(8):1735\u20131780","journal-title":"Neur Comput"},{"issue":"6","key":"8845_CR14","doi-asserted-by":"publisher","first-page":"567","DOI":"10.3390\/app7060567","volume":"7","author":"X Ji","year":"2017","unstructured":"Ji X, Wang C, Ju Z (2017) A new framework of human interaction recognition based on multiple stage probability fusion. Appl Sci 7(6):567","journal-title":"Appl Sci"},{"key":"8845_CR15","doi-asserted-by":"crossref","unstructured":"Jia Y, Shelhamer E, Donahue J, Karayev S, Long J, Girshick R, Guadarrama S, Darrell T (2014) Caffe: convolutional architecture for fast feature embedding. In: Proceedings of the 22nd ACM international conference on multimedia. ACM, pp 675\u2013678","DOI":"10.1145\/2647868.2654889"},{"key":"8845_CR16","doi-asserted-by":"crossref","unstructured":"Ke Q, Bennamoun M, An S, Boussaid F, Sohel F (2016) Human interaction prediction using deep temporal features. In: European conference on computer vision. Springer, pp 403\u2013414","DOI":"10.1007\/978-3-319-48881-3_28"},{"key":"8845_CR17","doi-asserted-by":"crossref","unstructured":"Ke Q, Bennamoun M, An S, Sohel F, Boussaid F (2017) Leveraging structural context models and ranking score fusion for human interaction prediction. IEEE Transactions on Multimedia","DOI":"10.1109\/TMM.2017.2778559"},{"key":"8845_CR18","doi-asserted-by":"crossref","unstructured":"Ko T (2008) A survey on behavior analysis in video surveillance for homeland security applications. In: Applied imagery pattern recognition workshop, 2008. AIPR\u201908. 37th IEEE. IEEE, pp 1\u20138","DOI":"10.1109\/AIPR.2008.4906450"},{"issue":"9","key":"8845_CR19","doi-asserted-by":"publisher","first-page":"1844","DOI":"10.1109\/TPAMI.2015.2491928","volume":"38","author":"Y Kong","year":"2016","unstructured":"Kong Y, Fu Y (2016) Max-margin action prediction machine. IEEE Trans Pattern Anal Mach Intell 38(9):1844\u20131858","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"8845_CR20","doi-asserted-by":"crossref","unstructured":"Kong Y, Jia Y, Fu Y (2012) Learning human interaction by interactive phrases. In: European conference on computer vision. Springer, pp 300\u2013313","DOI":"10.1007\/978-3-642-33718-5_22"},{"key":"8845_CR21","doi-asserted-by":"crossref","unstructured":"Kong Y, Kit D, Fu Y (2014) A discriminative model with multiple temporal scales for action prediction. In: European conference on computer vision. Springer, pp 596\u2013611","DOI":"10.1007\/978-3-319-10602-1_39"},{"key":"8845_CR22","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. In: Advances in neural information processing systems, pp 1097\u20131105"},{"key":"8845_CR23","unstructured":"Kumar B, Carneiro G, Reid I, et al. (2016) Learning local image descriptors with deep siamese and triplet convolutional networks by minimising global loss functions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5385\u20135394"},{"key":"8845_CR24","doi-asserted-by":"crossref","unstructured":"Lan T, Chen TC, Savarese S (2014) A hierarchical representation for future action prediction. In: ECCV (3), pp 689\u2013704","DOI":"10.1007\/978-3-319-10578-9_45"},{"key":"8845_CR25","doi-asserted-by":"crossref","unstructured":"Laptev I, Marszalek M, Schmid C, Rozenfeld B (2008) Learning realistic human actions from movies. In: IEEE Conference on computer vision and pattern recognition, 2008. CVPR 2008. IEEE, pp 1\u20138","DOI":"10.1109\/CVPR.2008.4587756"},{"issue":"2","key":"8845_CR26","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe DG (2004) Distinctive image features from scale-invariant keypoints. Int J Comput Vis 60(2):91\u2013110","journal-title":"Int J Comput Vis"},{"key":"8845_CR27","doi-asserted-by":"crossref","unstructured":"Medsker L, Jain LC (1999) Recurrent neural networks: design and applications. CRC press","DOI":"10.1201\/9781420049176"},{"key":"8845_CR28","doi-asserted-by":"crossref","unstructured":"Patron-Perez A, Marszalek M, Zisserman A, Reid ID (2010) High five: recognising human interactions in tv shows. In: BMVC, vol 1. Citeseer, p 2","DOI":"10.5244\/C.24.50"},{"issue":"6","key":"8845_CR29","doi-asserted-by":"publisher","first-page":"976","DOI":"10.1016\/j.imavis.2009.11.014","volume":"28","author":"R Poppe","year":"2010","unstructured":"Poppe R (2010) A survey on vision-based human action recognition. Image Vis Comput 28(6):976\u2013990","journal-title":"Image Vis Comput"},{"key":"8845_CR30","doi-asserted-by":"crossref","unstructured":"Qi Y, Song Y, Zhang H, Liu J (2016) Sketch-based image retrieval via siamese convolutional neural network. In: 2016 IEEE International conference on image processing (ICIP). IEEE, pp 2460\u20132464","DOI":"10.1109\/ICIP.2016.7532801"},{"issue":"5","key":"8845_CR31","doi-asserted-by":"publisher","first-page":"650","DOI":"10.1109\/THMS.2014.2325871","volume":"44","author":"M Ramanathan","year":"2014","unstructured":"Ramanathan M, Yau WY, Teoh EK (2014) Human action recognition with video data: research and evaluation challenges. IEEE Trans Human-Mach Syst 44 (5):650\u2013663","journal-title":"IEEE Trans Human-Mach Syst"},{"key":"8845_CR32","doi-asserted-by":"crossref","unstructured":"Ryoo MS, Aggarwal JK (2010) UT-Interaction dataset, ICPR contest on semantic description of human activities (SDHA). http:\/\/cvrc.ece.utexas.edu\/SDHA2010\/Human_Interaction.html","DOI":"10.1007\/978-3-642-17711-8_28"},{"issue":"8","key":"8845_CR33","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"J Schmidhuber","year":"1997","unstructured":"Schmidhuber J, Hochreiter S (1997) Long short-term memory. Neur Comput 9(8):1735\u20131780","journal-title":"Neur Comput"},{"key":"8845_CR34","doi-asserted-by":"crossref","unstructured":"Shu X, Tang J, Qi GJ, Song Y, Li Z, Zhang L (2017) Concurrence-aware long short-term sub-memories for person-person action recognition. arXiv:1706.00931","DOI":"10.1109\/CVPRW.2017.270"},{"key":"8845_CR35","unstructured":"Simonyan K, Zisserman A (2014) Two-stream convolutional networks for action recognition in videos. In: Advances in neural information processing systems, pp 568\u2013576"},{"key":"8845_CR36","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv:1409.1556"},{"key":"8845_CR37","doi-asserted-by":"crossref","unstructured":"Tao R, Gavves E, Smeulders AW (2016) Siamese instance search for tracking. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1420\u20131429","DOI":"10.1109\/CVPR.2016.158"},{"key":"8845_CR38","doi-asserted-by":"crossref","unstructured":"Trong NP, Nguyen H, Kazunori K, Le Hoai B (2017) A comprehensive survey on human activity prediction. In: International conference on computational science and its applications. Springer, pp 411\u2013425","DOI":"10.1007\/978-3-319-62392-4_30"},{"key":"8845_CR39","doi-asserted-by":"crossref","unstructured":"Vahdat A, Gao B, Ranjbar M, Mori G (2011) A discriminative key pose sequence model for recognizing human interactions. In: 2011 IEEE International conference on computer vision workshops (ICCV workshops). IEEE, pp 1729\u20131736","DOI":"10.1109\/ICCVW.2011.6130458"},{"key":"8845_CR40","doi-asserted-by":"crossref","unstructured":"Wang X, Farhadi A, Gupta A (2016) Actions\u02dc transformations. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2658\u20132667","DOI":"10.1109\/CVPR.2016.291"},{"issue":"12","key":"8845_CR41","doi-asserted-by":"publisher","first-page":"2531","DOI":"10.1109\/TPAMI.2015.2417578","volume":"37","author":"C Xu","year":"2015","unstructured":"Xu C, Tao D, Xu C (2015) Multi-view intact space learning. IEEE Trans Pattern Anal Mach Intell 37(12):2531\u20132544","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"11","key":"8845_CR42","doi-asserted-by":"publisher","first-page":"5600","DOI":"10.1109\/TIP.2018.2855422","volume":"27","author":"Y Yang","year":"2018","unstructured":"Yang Y, Zhou J, Ai J, Bin Y, Hanjalic A, Shen HT, Ji Y (2018) Video captioning by adversarial lstm. IEEE Trans Image Process 27(11):5600\u20135611","journal-title":"IEEE Trans Image Process"},{"key":"8845_CR43","doi-asserted-by":"crossref","unstructured":"Yao L, Torabi A, Cho K, Ballas N, Pal C, Larochelle H, Courville A (2015) Describing videos by exploiting temporal structure. In: Proceedings of the IEEE international conference on computer vision, pp 4507\u20134515","DOI":"10.1109\/ICCV.2015.512"},{"key":"8845_CR44","doi-asserted-by":"crossref","unstructured":"Yu C, Zhao X, Zheng Q, Zhang P, You X (2018) Hierarchical bilinear pooling for fine-grained visual recognition. In: Proceedings of the European conference on computer vision (ECCV), pp 574\u2013589","DOI":"10.1007\/978-3-030-01270-0_35"},{"key":"8845_CR45","doi-asserted-by":"crossref","unstructured":"Zhang S, Wei Z, Nie J, Huang L, Wang S, Li Z (2017) A review on human activity recognition using vision-based method. J Healthcare Eng, 2017","DOI":"10.1155\/2017\/3090343"},{"key":"8845_CR46","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1016\/j.media.2019.02.010","volume":"54","author":"J Zhang","year":"2019","unstructured":"Zhang J, Xie Y, Wu Q, Xia Y (2019) Medical image classification using synergic deep learning. Med Image Anal 54:10\u201319","journal-title":"Med Image Anal"},{"key":"8845_CR47","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1016\/j.patcog.2018.01.012","volume":"78","author":"Y Zhao","year":"2018","unstructured":"Zhao Y, You X, Yu S, Xu C, Yuan W, Jing XY, Zhang T, Tao D (2018) Multi-view manifold learning with locality alignment. Pattern Recogn 78:154\u2013166","journal-title":"Pattern Recogn"},{"key":"8845_CR48","doi-asserted-by":"crossref","unstructured":"Zhu L, Xu Z, Yang Y (2017) Bidirectional multirate reconstruction for temporal modeling in videos. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2653\u20132662","DOI":"10.1109\/CVPR.2017.147"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-08845-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11042-020-08845-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-08845-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,7]],"date-time":"2021-04-07T23:46:20Z","timestamp":1617839180000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11042-020-08845-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,4,8]]},"references-count":48,"journal-issue":{"issue":"27-28","published-print":{"date-parts":[[2020,7]]}},"alternative-id":["8845"],"URL":"https:\/\/doi.org\/10.1007\/s11042-020-08845-2","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,4,8]]},"assertion":[{"value":"5 April 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 January 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 March 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 April 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}