{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,8,15]],"date-time":"2024-08-15T13:53:18Z","timestamp":1723729998905},"reference-count":58,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2021,5,1]],"date-time":"2021-05-01T00:00:00Z","timestamp":1619827200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2021,5,1]],"date-time":"2021-05-01T00:00:00Z","timestamp":1619827200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2021,5,1]],"date-time":"2021-05-01T00:00:00Z","timestamp":1619827200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2021,5,1]],"date-time":"2021-05-01T00:00:00Z","timestamp":1619827200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2021,5,1]],"date-time":"2021-05-01T00:00:00Z","timestamp":1619827200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,5,1]],"date-time":"2021-05-01T00:00:00Z","timestamp":1619827200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100012659","name":"Foundation for Innovative Research Groups of the National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012659","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Image and Vision Computing"],"published-print":{"date-parts":[[2021,5]]},"DOI":"10.1016\/j.imavis.2021.104144","type":"journal-article","created":{"date-parts":[[2021,2,21]],"date-time":"2021-02-21T07:26:33Z","timestamp":1613892393000},"page":"104144","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":10,"special_numbering":"C","title":["Boundary graph convolutional network for temporal action detection"],"prefix":"10.1016","volume":"109","author":[{"given":"Yaosen","family":"Chen","sequence":"first","affiliation":[]},{"given":"Bing","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Yan","family":"Shen","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Weichen","family":"Lu","sequence":"additional","affiliation":[]},{"given":"Xinhua","family":"Suo","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.imavis.2021.104144_bb0005","series-title":"Advances in Neural Information Processing Systems","first-page":"3059","article-title":"Weakly supervised dense event captioning in videos","author":"Duan","year":"2018"},{"key":"10.1016\/j.imavis.2021.104144_bb0010","article-title":"Untrimmed video classification for activity detection: submission to activitynet challenge","volume":"6","author":"Singh","year":"2016","journal-title":"ActivityNet Large Scale Activity Recognition Challenge"},{"key":"10.1016\/j.imavis.2021.104144_bb0015","series-title":"The IEEE Conference on Computer Vision and Pattern Recognition","first-page":"1","article-title":"End-to-end learning of motion representation for video understanding","author":"Fan","year":"2018"},{"key":"10.1016\/j.imavis.2021.104144_bb0020","series-title":"CVPR","first-page":"5589","article-title":"Geometry guided convolutional neural networks for self-supervised video representation learning","author":"Gan","year":"2018"},{"key":"10.1016\/j.imavis.2021.104144_bb0025","series-title":"Proceedings of the European Conference on Computer Vision (ECCV)","article-title":"Bsn: boundary sensitive network for temporal action proposal generation","author":"Lin","year":"2018"},{"key":"10.1016\/j.imavis.2021.104144_bb0030","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"3889","article-title":"Bmn: Boundary-matching network for temporal action proposal generation","author":"Lin","year":"2019"},{"key":"10.1016\/j.imavis.2021.104144_bb0035","series-title":"AAAI","first-page":"11499","article-title":"Fast learning of temporal action proposal via dense boundary generator","author":"Lin","year":"2020"},{"key":"10.1016\/j.imavis.2021.104144_bb0040","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"7094","article-title":"Graph convolutional networks for temporal action localization","author":"Zeng","year":"2019"},{"key":"10.1016\/j.imavis.2021.104144_bb0045","series-title":"Computer Vision and Pattern Recognition (CVPR), 2011 IEEE Conference on, pages 3169\u20133176","first-page":"2","article-title":"Action recognition by dense trajectories","author":"Wang","year":"2011"},{"key":"10.1016\/j.imavis.2021.104144_bb0050","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"3551","article-title":"Action recognition with improved trajectories","author":"Wang","year":"2013"},{"key":"10.1016\/j.imavis.2021.104144_bb0055","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","author":"Simonyan","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"10.1016\/j.imavis.2021.104144_bb0060","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"1933","article-title":"Convolutional two-stream network fusion for video action recognition","author":"Feichtenhofer","year":"2016"},{"key":"10.1016\/j.imavis.2021.104144_bb0065","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"4489","article-title":"Learning spatiotemporal features with 3d convolutional networks","author":"Tran","year":"2015"},{"key":"10.1016\/j.imavis.2021.104144_bb0070","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"5533","article-title":"Learning spatio-temporal representation with pseudo-3d residual networks","author":"Qiu","year":"2017"},{"key":"10.1016\/j.imavis.2021.104144_bb0075","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"6450","article-title":"A closer look at spatiotemporal convolutions for action recognition","author":"Tran","year":"2018"},{"issue":"07","key":"10.1016\/j.imavis.2021.104144_bb0080","first-page":"11948","article-title":"Slowfast networks for video recognition","volume":"34","author":"Feichtenhofer","year":"2018","journal-title":"arXiv preprint"},{"key":"10.1016\/j.imavis.2021.104144_bb0085","first-page":"1","article-title":"Using efficient group pseudo-3D network to learn spatio-temporal features","author":"Chen","year":"2020","journal-title":"Signal, Image and Video Processing"},{"key":"10.1016\/j.imavis.2021.104144_bb0090","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence 34.7","first-page":"11948","article-title":"Conquering the CNN over-parameterization dilemma: a Volterra filtering approach for action recognition","author":"Roheda","year":"2020"},{"key":"10.1016\/j.imavis.2021.104144_bb0095","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"6299","article-title":"Quo vadis, action recognition? a new model and the kinetics dataset","author":"Carreira","year":"2017"},{"key":"10.1016\/j.imavis.2021.104144_bb0100","series-title":"Cuhk & ethz & siat Submission to Activitynet Challenge","author":"Xiong","year":"2016"},{"key":"10.1016\/j.imavis.2021.104144_bb0105","series-title":"In Proceedings of the European Conference on Computer Vision","article-title":"Temporal segment networks: Towards good practices for deep action recognition","author":"Wang","year":"2016"},{"key":"10.1016\/j.imavis.2021.104144_bb0110","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","article-title":"DongangWang, and Shih-Fu Chang. Temporal action localization in untrimmed videos via multi-stage cnns","author":"Zheng","year":"2016"},{"key":"10.1016\/j.imavis.2021.104144_bb0115","series-title":"Proceedings of the IEEE International Conference on Computer Vision (ICCV)","article-title":"Temporal action detection with structured segment networks","author":"Zhao","year":"2017"},{"key":"10.1016\/j.imavis.2021.104144_bb0120","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","article-title":"Rethinking the faster r-cnn architecture for temporal action localization","author":"Chao","year":"2018"},{"key":"10.1016\/j.imavis.2021.104144_bb0125","series-title":"Proceedings of the IEEE conference on Computer Vision and Pattern Recognition (CVPR)","article-title":"Sst: Single-stream temporal action proposals","author":"Buch","year":"2017"},{"key":"10.1016\/j.imavis.2021.104144_bb0130","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","article-title":"Fast temporal activity proposals for efficient detection of human actions in untrimmed videos","author":"Heilbron","year":"2016"},{"key":"10.1016\/j.imavis.2021.104144_bb0135","series-title":"Proceedings of the European Conference on Computer Vision (ECCV)","article-title":"Daps: Deep action proposals for action understanding","author":"Escorcia","year":"2016"},{"key":"10.1016\/j.imavis.2021.104144_bb0140","series-title":"Proceedings of the IEEE International Conference on Computer Vision (ICCV)","article-title":"Turn tap: temporal unit regression network for temporal action proposals","author":"Gao","year":"2017"},{"key":"10.1016\/j.imavis.2021.104144_bb0145","series-title":"International Conference on Learning Representations","first-page":"3","article-title":"Kipf and max welling. Semi-supervised classification with graph convolutional networks","volume":"2","author":"Thomas","year":"2017"},{"key":"10.1016\/j.imavis.2021.104144_bb0150","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"4100","article-title":"Learning graph structure for multi-label image classification via clique generation","author":"Tan","year":"2015"},{"key":"10.1016\/j.imavis.2021.104144_bb0155","series-title":"Mesh r-cnn. arXiv preprint","author":"Gkioxari","year":"2019"},{"key":"10.1016\/j.imavis.2021.104144_bb0160","series-title":"ACM Transactions on Graphics","article-title":"Dynamic graph cnn for learning on point clouds","author":"Wang","year":"2018"},{"key":"10.1016\/j.imavis.2021.104144_bb0165","doi-asserted-by":"crossref","first-page":"245","DOI":"10.1016\/j.neucom.2020.03.086","article-title":"Point clouds learning with attention-based graph convolution networks","volume":"402","author":"Xie","year":"2020","journal-title":"Neurocomputing"},{"key":"10.1016\/j.imavis.2021.104144_bb0170","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","article-title":"Learning video representations from correspondence proposals","author":"Liu","year":"2019"},{"key":"10.1016\/j.imavis.2021.104144_bb0175","series-title":"Proceedings of the European Conference on Computer Vision (ECCV)","article-title":"Videos as space-time region graphs","author":"Wang","year":"2018"},{"key":"10.1016\/j.imavis.2021.104144_bb0180","series-title":"The IEEE International Conference on Computer Vision (ICCV)","article-title":"Deepgcns: Can gcns go as deep as cnns?","author":"Li","year":"2019"},{"key":"10.1016\/j.imavis.2021.104144_bb0185","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"10156","article-title":"G-TAD: sub-graph localization for temporal action detection","author":"Xu","year":"2020"},{"key":"10.1016\/j.imavis.2021.104144_bb0190","series-title":"AAAI","first-page":"4626","article-title":"Graph attention based proposal 3D convnets for action detection","author":"Li","year":"2020"},{"key":"10.1016\/j.imavis.2021.104144_bb0195","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"1","article-title":"Going deeper with convolutions","author":"Szegedy","year":"2015"},{"key":"10.1016\/j.imavis.2021.104144_bb0200","series-title":"International conference on machine learning. PMLR","first-page":"448","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"Ioffe","year":"2015"},{"key":"10.1016\/j.imavis.2021.104144_bb0205","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"2818","article-title":"Rethinking the inception architecture for computer vision","author":"Szegedy","year":"2016"},{"key":"10.1016\/j.imavis.2021.104144_bb0210","series-title":"International Conference on Learning Representations","article-title":"Dropedge: towards deep graph convolutional networks on node classification","author":"Rong","year":"2019"},{"key":"10.1016\/j.imavis.2021.104144_bb0215","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"5561","article-title":"Soft-NMS\u2013improving object detection with one line of code","author":"Bodla","year":"2017"},{"key":"10.1016\/j.imavis.2021.104144_bb0220","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"961","article-title":"Activitynet: a large-scale video benchmark for human activity understanding","author":"Heilbron","year":"2015"},{"key":"10.1016\/j.imavis.2021.104144_bb0225","author":"Jiang"},{"key":"10.1016\/j.imavis.2021.104144_bb0230","series-title":"Proceedings of the European Conference on Computer Vision (ECCV)","article-title":"Temporal segment networks: Towards good practices for deep action recognition","author":"Wang","year":"2016"},{"key":"10.1016\/j.imavis.2021.104144_bb0235","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"5734","article-title":"Cdc: Convolutional-de-convolutional networks for precise temporal action localization in untrimmed videos[C]","author":"Zisserman","year":"2017"},{"key":"10.1016\/j.imavis.2021.104144_bb0240","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","article-title":"Cdc: convolutional-deconvolutional networks for precise temporal action localization in untrimmed videos","author":"Zheng","year":"2017"},{"key":"10.1016\/j.imavis.2021.104144_bb0245","series-title":"Proceedings of the British Machine Vision Conference (BMVC)","article-title":"Cascaded boundary regression for temporal action detection","author":"Gao","year":"2017"},{"key":"10.1016\/j.imavis.2021.104144_bb0250","series-title":"Proceedings of the IEEE International Conference on Computer Vision (ICCV)","article-title":"Temporal context network for activity localization in videos","author":"Dai","year":"2017"},{"key":"10.1016\/j.imavis.2021.104144_bb0255","series-title":"Computing Research Repository (CoRR)","article-title":"Multi-granularity generator for temporal action proposal","author":"Liu","year":"2018"},{"key":"10.1016\/j.imavis.2021.104144_bb0260","series-title":"The British Machine Vision Conference (BMVC)","article-title":"End-to-end, single-stream temporal action detection in untrimmed videos","author":"Buch","year":"2017"},{"key":"10.1016\/j.imavis.2021.104144_bb0265","series-title":"Proceedings of the IEEE international conference on computer vision (ICCV)","article-title":"R-c3d: region convolutional 3d network for temporal activity detection","author":"Xu","year":"2017"},{"key":"10.1016\/j.imavis.2021.104144_bb0270","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"6","article-title":"Untrimmednets for weakly supervised action recognition and detection","author":"LiminWang","year":"2017"},{"key":"10.1016\/j.imavis.2021.104144_bb0275","series-title":"Advances in Neural Information Processing Systems","first-page":"5998","article-title":"Attention is all you need","author":"Vaswani","year":"2017"},{"key":"10.1016\/j.imavis.2021.104144_bb0280","series-title":"European Conference on Computer Vision","first-page":"213","article-title":"End-to-End object detection with transformers","author":"Carion","year":"2020"},{"key":"10.1016\/j.imavis.2021.104144_bb0285","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020","journal-title":"arXiv preprint"},{"key":"10.1016\/j.imavis.2021.104144_bb0290","article-title":"Deformable DETR: deformable transformers for end-to-end object detection","author":"Zhu","year":"2020","journal-title":"arXiv preprint"}],"container-title":["Image and Vision Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0262885621000494?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0262885621000494?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2023,1,20]],"date-time":"2023-01-20T21:59:36Z","timestamp":1674251976000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0262885621000494"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5]]},"references-count":58,"alternative-id":["S0262885621000494"],"URL":"https:\/\/doi.org\/10.1016\/j.imavis.2021.104144","relation":{},"ISSN":["0262-8856"],"issn-type":[{"value":"0262-8856","type":"print"}],"subject":[],"published":{"date-parts":[[2021,5]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Boundary graph convolutional network for temporal action detection","name":"articletitle","label":"Article Title"},{"value":"Image and Vision Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.imavis.2021.104144","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2021 Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}],"article-number":"104144"}}