{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T05:52:24Z","timestamp":1740117144456,"version":"3.37.3"},"reference-count":49,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61901099","61973069"],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Image and Vision Computing"],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1016\/j.imavis.2023.104687","type":"journal-article","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T00:00:41Z","timestamp":1682467241000},"page":"104687","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":1,"special_numbering":"C","title":["Unsupervised video segmentation for multi-view daily action recognition"],"prefix":"10.1016","volume":"134","author":[{"given":"Zhigang","family":"Liu","sequence":"first","affiliation":[]},{"given":"Yin","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Ziyang","family":"Yin","sequence":"additional","affiliation":[]},{"given":"Chunlei","family":"Gao","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.imavis.2023.104687_b0005","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2021.108445","article-title":"Hierarchical domain adaptation with local feature patterns","volume":"124","author":"Wen","year":"2022","journal-title":"Pattern Recogn."},{"key":"10.1016\/j.imavis.2023.104687_b0010","doi-asserted-by":"crossref","first-page":"7941","DOI":"10.3390\/s21237941","article-title":"Human action recognition: A paradigm of best deep learning features selection and serial based extended fusion","volume":"21","author":"Khan","year":"2021","journal-title":"Sensors"},{"key":"10.1016\/j.imavis.2023.104687_b0015","first-page":"1","article-title":"Action recognition with a multi-view temporal attention network","author":"Sun","year":"2021","journal-title":"Cognit. Comput."},{"key":"10.1016\/j.imavis.2023.104687_b0020","doi-asserted-by":"crossref","DOI":"10.1016\/j.imavis.2021.104313","article-title":"Incremental human action recognition with dual memory","volume":"116","author":"Gutoski","year":"2021","journal-title":"Image Vis. Comput."},{"key":"10.1016\/j.imavis.2023.104687_b0025","doi-asserted-by":"crossref","first-page":"1633","DOI":"10.1093\/comjnl\/bxz123","article-title":"Improving human action recognition using hierarchical features and multiple classifier ensembles","volume":"64","author":"Bulbul","year":"2021","journal-title":"Comput. J."},{"key":"10.1016\/j.imavis.2023.104687_b0030","doi-asserted-by":"crossref","first-page":"1595","DOI":"10.1007\/s00371-018-1560-4","article-title":"A unified model for human activity recognition using spatial distribution of gradients and difference of Gaussian kernel","volume":"35","author":"Vishwakarma","year":"2019","journal-title":"Vis. Comput."},{"key":"10.1016\/j.imavis.2023.104687_b0035","doi-asserted-by":"crossref","first-page":"1461","DOI":"10.1109\/TCSVT.2014.2382984","article-title":"Cross-view action recognition based on a statistical translation framework","volume":"26","author":"Wang","year":"2014","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.imavis.2023.104687_b0040","doi-asserted-by":"crossref","first-page":"266","DOI":"10.1049\/cvi2.12086","article-title":"Temporal-enhanced graph convolution network for skeleton-based action recognition","volume":"16","author":"Xie","year":"2022","journal-title":"IET Comput. Vis."},{"key":"10.1016\/j.imavis.2023.104687_b0045","doi-asserted-by":"crossref","first-page":"442","DOI":"10.1016\/j.patrec.2020.01.023","article-title":"Hierarchical attention network for action segmentation","volume":"131","author":"Gammulle","year":"2020","journal-title":"Pattern Recogn. Lett."},{"key":"10.1016\/j.imavis.2023.104687_b0050","doi-asserted-by":"crossref","first-page":"668","DOI":"10.1109\/TMM.2021.3057503","article-title":"Temporal cross-layer correlation mining for action recognition","volume":"24","author":"Zhu","year":"2021","journal-title":"IEEE Trans. Multimedia"},{"key":"10.1016\/j.imavis.2023.104687_b0055","doi-asserted-by":"crossref","first-page":"164","DOI":"10.1109\/TIP.2021.3129117","article-title":"Feedback graph convolutional network for skeleton-based action recognition","volume":"31","author":"Yang","year":"2021","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.imavis.2023.104687_b0060","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1016\/j.imavis.2016.11.004","article-title":"Sparse composition of body poses and atomic actions for human activity recognition in RGB-D videos","volume":"59","author":"Lillo","year":"2017","journal-title":"Image Vis. Comput."},{"key":"10.1016\/j.imavis.2023.104687_b0065","doi-asserted-by":"crossref","first-page":"1045","DOI":"10.1109\/TPAMI.2017.2691321","article-title":"Deep multimodal feature analysis for action recognition in rgb+ d videos","volume":"40","author":"Shahroudy","year":"2017","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.imavis.2023.104687_b0070","doi-asserted-by":"crossref","first-page":"2195","DOI":"10.1109\/TMM.2019.2897902","article-title":"Learning composite latent structures for 3D human action representation and recognition","volume":"21","author":"Wei","year":"2019","journal-title":"IEEE Trans. Multimedia"},{"key":"10.1016\/j.imavis.2023.104687_b0075","doi-asserted-by":"crossref","first-page":"254","DOI":"10.1007\/s11263-015-0859-0","article-title":"MoFAP: A multi-level representation for action recognition","volume":"119","author":"Wang","year":"2016","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.imavis.2023.104687_b0080","doi-asserted-by":"crossref","first-page":"315","DOI":"10.1016\/j.jvcir.2019.04.001","article-title":"Action recognition using dynamic hierarchical trees","volume":"61","author":"Wang","year":"2019","journal-title":"J. Vis. Commun. Image Represent."},{"key":"10.1016\/j.imavis.2023.104687_b0085","doi-asserted-by":"crossref","first-page":"800","DOI":"10.1109\/TCSVT.2018.2816960","article-title":"ML-HDP: a hierarchical bayesian nonparametric model for recognizing human actions in video","volume":"29","author":"Tu","year":"2018","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.imavis.2023.104687_b0090","doi-asserted-by":"crossref","DOI":"10.1016\/j.imavis.2021.104333","article-title":"MLRMV: Multi-layer representation for multi-view action recognition","volume":"116","author":"Liu","year":"2021","journal-title":"Image Vis. Comput."},{"key":"10.1016\/j.imavis.2023.104687_b0095","doi-asserted-by":"crossref","first-page":"1230","DOI":"10.1109\/TIP.2017.2765821","article-title":"On space-time filtering framework for matching human actions across different viewpoints","volume":"27","author":"Ulhaq","year":"2017","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.imavis.2023.104687_b0100","doi-asserted-by":"crossref","first-page":"22","DOI":"10.1016\/j.patrec.2021.04.023","article-title":"T-VLAD: Temporal vector of locally aggregated descriptor for multiview human action recognition","volume":"148","author":"Naeem","year":"2021","journal-title":"Pattern Recogn. Lett."},{"key":"10.1016\/j.imavis.2023.104687_b0105","doi-asserted-by":"crossref","first-page":"6755","DOI":"10.1007\/s11042-015-2606-5","article-title":"View-independent action recognition: A hybrid approach","volume":"75","author":"Hashemi","year":"2016","journal-title":"Multimed. Tools Appl."},{"key":"10.1016\/j.imavis.2023.104687_b0110","series-title":"European Conference on Computer Vision","first-page":"427","article-title":"Multi-view action recognition using cross-view video prediction","author":"Vyas","year":"2020"},{"key":"10.1016\/j.imavis.2023.104687_b0115","doi-asserted-by":"crossref","first-page":"3835","DOI":"10.1109\/TIP.2020.2965299","article-title":"View-invariant deep architecture for human action recognition using two-stream motion and shape temporal dynamics","volume":"29","author":"Dhiman","year":"2020","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.imavis.2023.104687_b0120","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1016\/j.neucom.2019.12.151","article-title":"Conflux LSTMs network: A novel approach for multi-view action recognition","volume":"435","author":"Ullah","year":"2021","journal-title":"Neurocomputing"},{"key":"10.1016\/j.imavis.2023.104687_b0125","doi-asserted-by":"crossref","first-page":"384","DOI":"10.1016\/j.neucom.2021.05.077","article-title":"Cross-modality online distillation for multi-view action recognition","volume":"456","author":"Xu","year":"2021","journal-title":"Neurocomputing"},{"key":"10.1016\/j.imavis.2023.104687_b0130","doi-asserted-by":"crossref","first-page":"6109","DOI":"10.1007\/s13369-020-04481-y","article-title":"Multiple batches of motion history images (MB-MHIs) for multi-view human action recognition","volume":"45","author":"Naeem","year":"2020","journal-title":"Arab. J. Sci. Eng."},{"key":"10.1016\/j.imavis.2023.104687_b0135","doi-asserted-by":"crossref","first-page":"5195","DOI":"10.1109\/JSEN.2019.2903645","article-title":"Robust framework for abnormal human action recognition using R-transform and zernike moments in depth videos","volume":"19","author":"Dhiman","year":"2019","journal-title":"IEEE Sens. J."},{"key":"10.1016\/j.imavis.2023.104687_b0140","series-title":"2019 IEEE fifth international conference on multimedia big data (BigMM)","first-page":"225","article-title":"Skeleton-based view invariant deep features for human activity recognition","author":"Dhiman","year":"2019"},{"key":"10.1016\/j.imavis.2023.104687_b0145","doi-asserted-by":"crossref","unstructured":"D. Wang, W. Ouyang, W. Li, D. Xu, Dividing and aggregating network for multi-view action recognition, in: Proceedings of the European Conference on Computer Vision (ECCV), 2018, pp. 451\u2013467.","DOI":"10.1007\/978-3-030-01240-3_28"},{"key":"10.1016\/j.imavis.2023.104687_b0150","series-title":"European Conference on Computer Vision","first-page":"52","article-title":"Human daily action analysis with multi-view and color-depth data","author":"Cheng","year":"2012"},{"key":"10.1016\/j.imavis.2023.104687_b0155","doi-asserted-by":"crossref","unstructured":"Z. Cai, L. Wang, X. Peng, Y. Qiao, Multi-view super vector for action recognition, in: Proceedings of the IEEE conference on Computer Vision and Pattern Recognition, 2014, pp. 596\u2013603.","DOI":"10.1109\/CVPR.2014.83"},{"key":"10.1016\/j.imavis.2023.104687_b0160","series-title":"2011 International conference on 3D imaging, modeling, processing, visualization and transmission","first-page":"342","article-title":"3D human action recognition for multi-view camera systems","author":"Holte","year":"2011"},{"key":"10.1016\/j.imavis.2023.104687_b0165","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3434746","article-title":"Joint transferable dictionary learning and view adaptation for multi-view human action recognition","volume":"15","author":"Sun","year":"2021","journal-title":"ACM Trans. Knowl. Discov. Data (TKDD)"},{"key":"10.1016\/j.imavis.2023.104687_b0170","doi-asserted-by":"crossref","first-page":"9280","DOI":"10.1109\/JIOT.2019.2911669","article-title":"Adaptive fusion and category-level dictionary learning model for multiview human action recognition","volume":"6","author":"Gao","year":"2019","journal-title":"IEEE Internet Things J."},{"key":"10.1016\/j.imavis.2023.104687_b0175","series-title":"Multi-views action recognition on deep learning and K-SVD","volume":"vol. 1176","author":"Wang","year":"2019"},{"key":"10.1016\/j.imavis.2023.104687_b0180","series-title":"2020 Chinese Control and Decision Conference (CCDC)","first-page":"4858","article-title":"Jointly learning multi-view features for human action recognition","author":"Wang","year":"2020"},{"key":"10.1016\/j.imavis.2023.104687_b0185","doi-asserted-by":"crossref","DOI":"10.1016\/j.dsp.2022.103487","article-title":"Task-driven joint dictionary learning model for multi-view human action recognition","volume":"126","author":"Liu","year":"2022","journal-title":"Digital Signal Process."},{"key":"10.1016\/j.imavis.2023.104687_b0190","doi-asserted-by":"crossref","first-page":"302","DOI":"10.1007\/s11704-018-8015-y","article-title":"Soft video parsing by label distribution learning","volume":"13","author":"Ling","year":"2019","journal-title":"Front. Comput. Sci."},{"key":"10.1016\/j.imavis.2023.104687_b0195","doi-asserted-by":"crossref","first-page":"4382","DOI":"10.1109\/TIP.2018.2837386","article-title":"Beyond joints: Learning representations from primitive geometries for skeleton-based action recognition and detection","volume":"27","author":"Wang","year":"2018","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.imavis.2023.104687_b0200","unstructured":"L. Ding, C. Xu, Tricornet: A hybrid temporal convolutional and recurrent network for video action segmentation, arXiv preprint arXiv:1705.07818, 2017."},{"key":"10.1016\/j.imavis.2023.104687_b0205","doi-asserted-by":"crossref","first-page":"1138","DOI":"10.1109\/TCSVT.2020.2999384","article-title":"Semantic boundary detection with reinforcement learning for continuous sign language recognition","volume":"31","author":"Wei","year":"2020","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.imavis.2023.104687_b0210","doi-asserted-by":"crossref","first-page":"717","DOI":"10.1109\/TMM.2018.2866370","article-title":"Temporal action localization in untrimmed videos using action pattern trees","volume":"21","author":"Song","year":"2018","journal-title":"IEEE Trans. Multimedia"},{"key":"10.1016\/j.imavis.2023.104687_b0215","series-title":"2018 25th IEEE International Conference on Image Processing (ICIP)","first-page":"923","article-title":"Unsupervised detection of periodic segments in videos","author":"Panagiotakis","year":"2018"},{"key":"10.1016\/j.imavis.2023.104687_b0220","doi-asserted-by":"crossref","unstructured":"H. Wang, C. Schmid, Action recognition with improved trajectories, in: Proceedings of the IEEE international conference on computer vision, 2013, pp. 3551\u20133558.","DOI":"10.1109\/ICCV.2013.441"},{"key":"10.1016\/j.imavis.2023.104687_b0225","doi-asserted-by":"crossref","first-page":"222","DOI":"10.1007\/s11263-013-0636-x","article-title":"Image classification with the fisher vector: Theory and practice","volume":"105","author":"S\u00e1nchez","year":"2013","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.imavis.2023.104687_b0230","doi-asserted-by":"crossref","first-page":"2189","DOI":"10.1109\/TPAMI.2013.35","article-title":"Learning and-or templates for object recognition and detection","volume":"35","author":"Si","year":"2013","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.imavis.2023.104687_b0235","doi-asserted-by":"crossref","first-page":"1377","DOI":"10.1007\/s10044-018-0688-1","article-title":"An implementation of optimized framework for action classification using multilayers neural network on selected fused features","volume":"22","author":"Khan","year":"2019","journal-title":"Pattern Anal. Appl."},{"key":"10.1016\/j.imavis.2023.104687_b0240","doi-asserted-by":"crossref","first-page":"2684","DOI":"10.1109\/TPAMI.2019.2916873","article-title":"Ntu rgb+ d 120: A large-scale benchmark for 3d human activity understanding","volume":"42","author":"Liu","year":"2019","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.imavis.2023.104687_b0245","doi-asserted-by":"crossref","first-page":"568","DOI":"10.1016\/j.patcog.2014.08.015","article-title":"Trajectory-based human action segmentation","volume":"48","author":"Santos","year":"2015","journal-title":"Pattern Recogn."}],"container-title":["Image and Vision Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0262885623000616?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0262885623000616?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,2,13]],"date-time":"2024-02-13T13:40:54Z","timestamp":1707831654000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0262885623000616"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6]]},"references-count":49,"alternative-id":["S0262885623000616"],"URL":"https:\/\/doi.org\/10.1016\/j.imavis.2023.104687","relation":{},"ISSN":["0262-8856"],"issn-type":[{"type":"print","value":"0262-8856"}],"subject":[],"published":{"date-parts":[[2023,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Unsupervised video segmentation for multi-view daily action recognition","name":"articletitle","label":"Article Title"},{"value":"Image and Vision Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.imavis.2023.104687","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2023 Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}],"article-number":"104687"}}