{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T06:04:28Z","timestamp":1740117868029,"version":"3.37.3"},"reference-count":57,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2019,3,1]],"date-time":"2019-03-01T00:00:00Z","timestamp":1551398400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"funder":[{"DOI":"10.13039\/501100001809","name":"NSFC","doi-asserted-by":"publisher","award":["61672089","61273274","61572064"],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2019,3]]},"DOI":"10.1016\/j.neucom.2019.01.008","type":"journal-article","created":{"date-parts":[[2019,1,9]],"date-time":"2019-01-09T12:49:07Z","timestamp":1547038147000},"page":"351-363","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":23,"special_numbering":"C","title":["Action recognition and localization with spatial and temporal contexts"],"prefix":"10.1016","volume":"333","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2206-5051","authenticated-orcid":false,"given":"Wanru","family":"Xu","sequence":"first","affiliation":[]},{"given":"Zhenjiang","family":"Miao","sequence":"additional","affiliation":[]},{"given":"Jian","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Qiang","family":"Ji","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"3","key":"10.1016\/j.neucom.2019.01.008_bib0001","doi-asserted-by":"crossref","first-page":"507","DOI":"10.1109\/TMM.2011.2128301","article-title":"Fast action detection via discriminative random forest voting and top-k subvolume search","volume":"13","author":"Yu","year":"2011","journal-title":"IEEE Trans. Multimed."},{"issue":"4","key":"10.1016\/j.neucom.2019.01.008_bib0002","doi-asserted-by":"crossref","first-page":"512","DOI":"10.1109\/TMM.2015.2404779","article-title":"Learning spatial and temporal extents of human actions for action detection","volume":"17","author":"Zhou","year":"2015","journal-title":"IEEE Trans. Multimed."},{"issue":"7","key":"10.1016\/j.neucom.2019.01.008_bib0003","doi-asserted-by":"crossref","first-page":"1494","DOI":"10.1109\/TMM.2017.2674622","article-title":"A hierarchical spatio-temporal model for human activity recognition","volume":"19","author":"Xu","year":"2017","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.neucom.2019.01.008_bib0004","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"3551","article-title":"Action recognition with improved trajectories","author":"Wang","year":"2013"},{"key":"10.1016\/j.neucom.2019.01.008_bib0005","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"4305","article-title":"Action recognition with trajectory-pooled deep-convolutional descriptors","author":"Wang","year":"2015"},{"key":"10.1016\/j.neucom.2019.01.008_bib0006","series-title":"IEEE Conf. on Computer Vision and Pattern Recognition","article-title":"Untrimmednets for weakly supervised action recognition and detection","author":"Wang","year":"2017"},{"key":"10.1016\/j.neucom.2019.01.008_bib0007","article-title":"Deep learning for detecting multiple space-time action tubes in videos","author":"Saha","year":"2015","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.neucom.2019.01.008_bib0008","series-title":"Proceedings of European Conference on Computer Vision","first-page":"565","article-title":"Video action detection with relational dynamic-poselets","author":"Wang","year":"2014"},{"key":"10.1016\/j.neucom.2019.01.008_bib0009","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"3164","article-title":"Learning to track for spatio-temporal action localization","author":"Weinzaepfel","year":"2015"},{"key":"10.1016\/j.neucom.2019.01.008_bib0010","series-title":"Proceedings of European Conference on Computer Vision","first-page":"744","article-title":"Multi-region two-stream r-cnn for action detection","author":"Peng","year":"2016"},{"issue":"9","key":"10.1016\/j.neucom.2019.01.008_bib0011","doi-asserted-by":"crossref","first-page":"1728","DOI":"10.1109\/TPAMI.2011.38","article-title":"Discriminative video pattern search for efficient action detection","volume":"33","author":"Yuan","year":"2011","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2019.01.008_bib0012","series-title":"Proceedings of British Machine Vision Conference, BMVC","first-page":"1","article-title":"Action detection in crowd.","author":"Siva","year":"2010"},{"issue":"11","key":"10.1016\/j.neucom.2019.01.008_bib0013","doi-asserted-by":"crossref","first-page":"2782","DOI":"10.1109\/TPAMI.2013.65","article-title":"Temporal localization of actions with actoms","volume":"35","author":"Gaidon","year":"2013","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2019.01.008_bib0014","unstructured":"L. Wang, Y. Qiao, X. Tang, Action Recognition and Detection by Combining Motion and Appearance Features, THUMOS14 Action Recognition Challenge 1 (2014) 2."},{"key":"10.1016\/j.neucom.2019.01.008_bib0015","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"2545","article-title":"Efficient action localization with approximately normalized fisher vectors","author":"Oneata","year":"2014"},{"key":"10.1016\/j.neucom.2019.01.008_bib0016","series-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, CVPR","first-page":"2442","article-title":"Discriminative subvolume search for efficient action detection","author":"Yuan","year":"2009"},{"key":"10.1016\/j.neucom.2019.01.008_bib0017","series-title":"Proceedings of Advances in Neural Information Processing Systems","first-page":"350","article-title":"Max-margin structured output regression for spatio-temporal action localization","author":"Tran","year":"2012"},{"key":"10.1016\/j.neucom.2019.01.008_bib0018","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"1302","article-title":"Fast action proposals for human action detection and search","author":"Yu","year":"2015"},{"issue":"2","key":"10.1016\/j.neucom.2019.01.008_bib0019","doi-asserted-by":"crossref","first-page":"277","DOI":"10.1109\/TCSVT.2013.2276856","article-title":"Detecting human action as the spatio-temporal tube of maximum mutual information","volume":"24","author":"Wang","year":"2014","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.neucom.2019.01.008_bib0020","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"3093","article-title":"Temporal action localization with pyramid of score distribution features","author":"Yuan","year":"2016"},{"key":"10.1016\/j.neucom.2019.01.008_bib0021","series-title":"Proceedings of the ACM International Conference on Image and Video Retrieval","first-page":"454","article-title":"A set of co-occurrence matrices on the intrinsic manifold of human silhouettes for action recognition","author":"Zheng","year":"2010"},{"issue":"12","key":"10.1016\/j.neucom.2019.01.008_bib0022","doi-asserted-by":"crossref","first-page":"3819","DOI":"10.1016\/j.patcog.2014.07.006","article-title":"Realistic action recognition via sparsely-constructed gaussian processes","volume":"47","author":"Liu","year":"2014","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.neucom.2019.01.008_bib0023","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"740","article-title":"Action localization with tubelets from motion","author":"Jain","year":"2014"},{"key":"10.1016\/j.neucom.2019.01.008_bib0024","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"3762","article-title":"Human action segmentation with hierarchical supervoxel consistency","author":"Lu","year":"2015"},{"key":"10.1016\/j.neucom.2019.01.008_bib0025","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"2744","article-title":"Action recognition and localization by hierarchical space-time segments","author":"Ma","year":"2013"},{"key":"10.1016\/j.neucom.2019.01.008_bib0026","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"2642","article-title":"Spatiotemporal deformable part models for action detection","author":"Tian","year":"2013"},{"key":"10.1016\/j.neucom.2019.01.008_bib0027","series-title":"Proceedings of the 2011 IEEE International Conference on Computer Vision, ICCV","first-page":"2003","article-title":"Discriminative figure-centric models for joint action localization and recognition","author":"Lan","year":"2011"},{"key":"10.1016\/j.neucom.2019.01.008_bib0028","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"759","article-title":"Finding action tubes","author":"Gkioxari","year":"2015"},{"key":"10.1016\/j.neucom.2019.01.008_bib0029","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"1049","article-title":"Temporal action localization in untrimmed videos via multi-stage cnns","author":"Shou","year":"2016"},{"key":"10.1016\/j.neucom.2019.01.008_bib0030","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"1440","article-title":"Fast r-cnn","author":"Girshick","year":"2015"},{"key":"10.1016\/j.neucom.2019.01.008_bib0031","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"2678","article-title":"End-to-end learning of action detection from frame glimpses in videos","author":"Yeung","year":"2016"},{"key":"10.1016\/j.neucom.2019.01.008_bib0032","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"1080","article-title":"Contextual action recognition with r* cnn","author":"Gkioxari","year":"2015"},{"key":"10.1016\/j.neucom.2019.01.008_bib0033","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"4543","article-title":"Context aware active learning of activity recognition models","author":"Hasan","year":"2015"},{"key":"10.1016\/j.neucom.2019.01.008_bib0034","series-title":"Proceedings of Asian Conference on Computer Vision","first-page":"583","article-title":"Camera motion and surrounding scene appearance as context for action recognition","author":"Heilbron","year":"2014"},{"issue":"10","key":"10.1016\/j.neucom.2019.01.008_bib0035","doi-asserted-by":"crossref","first-page":"2468","DOI":"10.1109\/TPAMI.2013.33","article-title":"Modeling temporal interactions with interval temporal Bayesian networks for complex activity recognition","volume":"35","author":"Zhang","year":"2013","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2019.01.008_bib0036","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"4471","article-title":"Learning temporal embeddings for complex video analysis","author":"Ramanathan","year":"2015"},{"issue":"9","key":"10.1016\/j.neucom.2019.01.008_bib0037","doi-asserted-by":"crossref","first-page":"1770","DOI":"10.1109\/TPAMI.2016.2616308","article-title":"Hierarchical context modeling for video event recognition","volume":"39","author":"Wang","year":"2017","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2019.01.008_bib0038","series-title":"Proceedings of 2011 IEEE Conference on Computer Vision and Pattern Recognition, CVPR","first-page":"3273","article-title":"Learning context for collective activity recognition","author":"Choi","year":"2011"},{"key":"10.1016\/j.neucom.2019.01.008_bib0039","unstructured":"M.S. Aliakbarian, F. Saleh, B. Fernando, M. Salzmann, L. Petersson, L. Andersson, Deep Action-and Context-aware Sequence Learning for Activity Recognition and Anticipation, arXiv:1611.05520 (2016)."},{"key":"10.1016\/j.neucom.2019.01.008_bib0040","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"2609","article-title":"Human action recognition based on context-dependent graph kernels","author":"Wu","year":"2014"},{"key":"10.1016\/j.neucom.2019.01.008_bib0041","unstructured":"M. Wang, B. Ni, X. Yang, Recurrent Modeling of Interaction Context for Collective Activity Recognition."},{"year":"2009","series-title":"Probabilistic Graphical Models: Principles and Techniques","author":"Koller","key":"10.1016\/j.neucom.2019.01.008_bib0042"},{"key":"10.1016\/j.neucom.2019.01.008_bib0043","series-title":"Proceedings of International Conference on Machine Learning","first-page":"1659","article-title":"Efficient structured prediction with latent variables for general graphical models","author":"Schwing","year":"2012"},{"key":"10.1016\/j.neucom.2019.01.008_bib0044","unstructured":"W. Ping, Q. Liu, A. Ihler, Marginal Structured svm with Hidden Variables, Eprint Arxiv (2014) 190\u2013198."},{"key":"10.1016\/j.neucom.2019.01.008_bib0045","series-title":"Proceedings of Advances in Neural Information Processing Systems","first-page":"1033","article-title":"The concave-convex procedure (cccp)","volume":"2","author":"Yuille","year":"2002"},{"key":"10.1016\/j.neucom.2019.01.008_bib0046","series-title":"Proceedings of Computer Vision in Sports","first-page":"181","article-title":"Action recognition in realistic sports videos","author":"Soomro","year":"2014"},{"key":"10.1016\/j.neucom.2019.01.008_bib0047","article-title":"UCF101: A dataset of 101 human actions classes from videos in the wild","author":"Soomro","year":"2012","journal-title":"Computer Science"},{"key":"10.1016\/j.neucom.2019.01.008_bib0048","series-title":"Proceedings of Advances in Neural Information Processing Systems","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","author":"Simonyan","year":"2014"},{"key":"10.1016\/j.neucom.2019.01.008_bib0049","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"4694","article-title":"Beyond short snippets: deep networks for video classification","author":"Yue-Hei Ng","year":"2015"},{"key":"10.1016\/j.neucom.2019.01.008_bib0050","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"4489","article-title":"Learning spatiotemporal features with 3d convolutional networks","author":"Tran","year":"2015"},{"key":"10.1016\/j.neucom.2019.01.008_bib0051","unstructured":"K. Simonyan, A. Zisserman, Very Deep Convolzutional Networks for Large-scale Image Recognition, arXiv:1409.1556 (2014)."},{"key":"10.1016\/j.neucom.2019.01.008_bib0052","series-title":"Proceedings of British Machine Vision Conference, BMVC","first-page":"124.1","article-title":"Evaluation of local spatio-temporal features for action recognition","author":"Wang","year":"2009"},{"issue":"7","key":"10.1016\/j.neucom.2019.01.008_bib0053","doi-asserted-by":"crossref","first-page":"1494","DOI":"10.1109\/TMM.2017.2674622","article-title":"A hierarchical spatio-temporal model for human activity recognition","volume":"19","author":"Xu","year":"2017","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.neucom.2019.01.008_bib0054","series-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, CVPR","first-page":"872","article-title":"Max-margin hidden conditional random fields for human action recognition","author":"Wang","year":"2009"},{"key":"10.1016\/j.neucom.2019.01.008_bib0055","series-title":"Proceedings of British Machine Vision Conference, BMVC","first-page":"4","article-title":"Apt: action localization proposals from dense trajectories.","volume":"2","author":"van Gemert","year":"2015"},{"key":"10.1016\/j.neucom.2019.01.008_bib0056","series-title":"Proceedings of European Conference on Computer Vision","first-page":"437","article-title":"Spot on: action localization from pointly-supervised proposals","author":"Mettes","year":"2016"},{"key":"10.1016\/j.neucom.2019.01.008_bib0057","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"3280","article-title":"Action localization in videos through context walk","author":"Soomro","year":"2015"}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231219300189?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231219300189?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2019,8,7]],"date-time":"2019-08-07T23:37:45Z","timestamp":1565221065000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231219300189"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,3]]},"references-count":57,"alternative-id":["S0925231219300189"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2019.01.008","relation":{},"ISSN":["0925-2312"],"issn-type":[{"type":"print","value":"0925-2312"}],"subject":[],"published":{"date-parts":[[2019,3]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Action recognition and localization with spatial and temporal contexts","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2019.01.008","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2019 Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}]}}