{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,12,8]],"date-time":"2024-12-08T05:07:43Z","timestamp":1733634463463,"version":"3.30.1"},"reference-count":44,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1016\/j.patcog.2024.111128","type":"journal-article","created":{"date-parts":[[2024,11,15]],"date-time":"2024-11-15T20:59:11Z","timestamp":1731704351000},"page":"111128","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["FocTrack: Focus attention for visual tracking"],"prefix":"10.1016","volume":"160","author":[{"given":"Jian","family":"Tao","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8916-1174","authenticated-orcid":false,"given":"Sixian","family":"Chan","sequence":"additional","affiliation":[]},{"given":"Zhenchao","family":"Shi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6177-3862","authenticated-orcid":false,"given":"Cong","family":"Bai","sequence":"additional","affiliation":[]},{"given":"Shengyong","family":"Chen","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"4","key":"10.1016\/j.patcog.2024.111128_b1","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1002\/rob.21494","article-title":"Automatic driving on ill-defined roads: An adaptive, shape-constrained, color-based method","volume":"32","author":"Ososinski","year":"2015","journal-title":"J. Field Robotics"},{"key":"10.1016\/j.patcog.2024.111128_b2","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"13598","article-title":"MixFormer: End-to-end tracking with iterative mixed attention","author":"Cui","year":"2022"},{"key":"10.1016\/j.patcog.2024.111128_b3","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109457","article-title":"Adaptive spatial-temporal surrounding-aware correlation filter tracking via ensemble learning","volume":"139","author":"Moorthy","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2024.111128_b4","series-title":"IEEE Conference on Computer Vision and Pattern Recognition","first-page":"5289","article-title":"Alpha-refine: Boosting tracking performance by precise bounding box estimation","author":"Yan","year":"2021"},{"key":"10.1016\/j.patcog.2024.111128_b5","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109630","article-title":"SiamRank: A siamese based visual tracking network with ranking strategy","volume":"141","author":"Meng","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2024.111128_b6","series-title":"The IEEE Conference on Computer Vision and Pattern Recognition","first-page":"8126","article-title":"Transformer tracking","author":"Chen","year":"2021"},{"key":"10.1016\/j.patcog.2024.111128_b7","series-title":"2021 IEEE\/CVF International Conference on Computer Vision","first-page":"10428","article-title":"Learning spatio-temporal transformer for visual tracking","author":"Yan","year":"2021"},{"key":"10.1016\/j.patcog.2024.111128_b8","series-title":"9th International Conference on Learning Representations","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2021"},{"key":"10.1016\/j.patcog.2024.111128_b9","series-title":"Computer Vision - ECCV 2022 - 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XXII","first-page":"341","article-title":"Joint feature learning and relation modeling for tracking: A one-stream framework","volume":"vol. 13682","author":"Ye","year":"2022"},{"key":"10.1016\/j.patcog.2024.111128_b10","series-title":"Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence","first-page":"905","article-title":"Sparsett: Visual tracking with sparse transformers","author":"Fu","year":"2022"},{"key":"10.1016\/j.patcog.2024.111128_b11","series-title":"2021 IEEE\/CVF International Conference on Computer Vision","first-page":"9992","article-title":"Swin transformer: Hierarchical vision transformer using shifted windows","author":"Liu","year":"2021"},{"key":"10.1016\/j.patcog.2024.111128_b12","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"12114","article-title":"Cswin transformer: A general vision transformer backbone with cross-shaped windows","author":"Dong","year":"2022"},{"key":"10.1016\/j.patcog.2024.111128_b13","series-title":"The European Conference on Computer Vision","first-page":"146","article-title":"AiATrack: Attention in attention for transformer visual tracking","volume":"vol. 13682","author":"Gao","year":"2022"},{"key":"10.1016\/j.patcog.2024.111128_b14","series-title":"2019 IEEE\/CVF International Conference on Computer Vision","first-page":"4009","article-title":"Learning the model update for siamese trackers","author":"Zhang","year":"2019"},{"key":"10.1016\/j.patcog.2024.111128_b15","doi-asserted-by":"crossref","unstructured":"G. Bhat, M. Danelljan, L.V. Gool, R. Timofte, Learning discriminative model prediction for tracking, in: The IEEE International Conference on Computer Vision, 2019, pp. 6182\u20136191.","DOI":"10.1109\/ICCV.2019.00628"},{"key":"10.1016\/j.patcog.2024.111128_b16","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"773","article-title":"Target-aware tracking with long-term context attention","author":"He","year":"2023"},{"key":"10.1016\/j.patcog.2024.111128_b17","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"11091","article-title":"Not all tokens are equal: Human-centric visual analysis via token clustering transformer","author":"Zeng","year":"2022"},{"key":"10.1016\/j.patcog.2024.111128_b18","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, New Orleans, la, USA, June 18\u201324, 2022","first-page":"2550","article-title":"CMT-DeepLab: Clustering mask transformers for panoptic segmentation","author":"Yu","year":"2022"},{"year":"2023","series-title":"Image as set of points","author":"Ma","key":"10.1016\/j.patcog.2024.111128_b19"},{"year":"2022","series-title":"BOAT: Bilateral local attention vision transformer","author":"Yu","key":"10.1016\/j.patcog.2024.111128_b20"},{"key":"10.1016\/j.patcog.2024.111128_b21","series-title":"Computer Vision - ECCV 2020 Workshops - Glasgow, UK, August 23\u201328, 2020, Proceedings, Part V","first-page":"653","article-title":"RPT: learning point set representation for siamese visual tracking","volume":"vol. 12539","author":"Ma","year":"2020"},{"key":"10.1016\/j.patcog.2024.111128_b22","series-title":"The European Conference on Computer Vision","first-page":"816","article-title":"Acquisition of localization confidence for accurate object detection","volume":"vol. 11218","author":"Jiang","year":"2018"},{"year":"2022","series-title":"DaViT: Dual attention vision transformers","author":"Ding","key":"10.1016\/j.patcog.2024.111128_b23"},{"key":"10.1016\/j.patcog.2024.111128_b24","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"15979","article-title":"Masked autoencoders are scalable vision learners","author":"He","year":"2022"},{"key":"10.1016\/j.patcog.2024.111128_b25","series-title":"The European Conference on Computer Vision","first-page":"740","article-title":"Microsoft coco: Common objects in context","author":"Lin","year":"2014"},{"key":"10.1016\/j.patcog.2024.111128_b26","doi-asserted-by":"crossref","unstructured":"M. Muller, A. Bibi, S. Giancola, S. Alsubaihi, B. Ghanem, Trackingnet: A large-scale dataset and benchmark for object tracking in the wild, in: Proceedings of the European Conference on Computer Vision, ECCV, 2018, pp. 300\u2013317.","DOI":"10.1007\/978-3-030-01246-5_19"},{"issue":"5","key":"10.1016\/j.patcog.2024.111128_b27","doi-asserted-by":"crossref","first-page":"1562","DOI":"10.1109\/TPAMI.2019.2957464","article-title":"GOT-10k: A large high-diversity benchmark for generic object tracking in the wild","volume":"43","author":"Huang","year":"2021","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.patcog.2024.111128_b28","doi-asserted-by":"crossref","unstructured":"H. Fan, L. Lin, F. Yang, P. Chu, G. Deng, S. Yu, H. Bai, Y. Xu, C. Liao, H. Ling, Lasot: A high-quality benchmark for large-scale single object tracking, in: The IEEE Conference on Computer Vision and Pattern Recognition, 2019, pp. 5374\u20135383.","DOI":"10.1109\/CVPR.2019.00552"},{"issue":"3","key":"10.1016\/j.patcog.2024.111128_b29","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","article-title":"Imagenet large scale visual recognition challenge","volume":"115","author":"Russakovsky","year":"2015","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.patcog.2024.111128_b30","unstructured":"D.P. Kingma, J. Ba, Adam: A Method for Stochastic Optimization, in: Y. Bengio, Y. LeCun (Eds.), 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7\u20139, 2015, Conference Track Proceedings, 2015."},{"key":"10.1016\/j.patcog.2024.111128_b31","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109964","article-title":"Transformer-based visual object tracking via fine-coarse concatenated attention and cross concatenated MLP","volume":"146","author":"Gao","year":"2024","journal-title":"Pattern Recognit."},{"issue":"6","key":"10.1016\/j.patcog.2024.111128_b32","doi-asserted-by":"crossref","first-page":"4129","DOI":"10.1109\/TPAMI.2024.3349519","article-title":"MixFormer: End-to-end tracking with iterative mixed attention","volume":"46","author":"Cui","year":"2024","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.patcog.2024.111128_b33","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110705","article-title":"A transformer-based visual object tracker via learning immediate appearance change","volume":"155","author":"Li","year":"2024","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2024.111128_b34","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"14572","article-title":"SeqTrack: Sequence to sequence learning for visual object tracking","author":"Chen","year":"2023"},{"year":"2023","series-title":"Compact transformer tracker with correlative masked modeling","author":"Song","key":"10.1016\/j.patcog.2024.111128_b35"},{"key":"10.1016\/j.patcog.2024.111128_b36","series-title":"The European Conference on Computer Vision","first-page":"771","article-title":"Ocean: Object-aware anchor-free tracking","author":"Zhang","year":"2020"},{"key":"10.1016\/j.patcog.2024.111128_b37","series-title":"2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"7181","article-title":"Probabilistic regression for visual tracking","author":"Danelljan","year":"2020"},{"key":"10.1016\/j.patcog.2024.111128_b38","doi-asserted-by":"crossref","unstructured":"B. Li, W. Wu, Q. Wang, F. Zhang, J. Xing, J. Yan, Siamrpn++: Evolution of siamese visual tracking with very deep networks, in: The IEEE Conference on Computer Vision and Pattern Recognition, 2019, pp. 4282\u20134291.","DOI":"10.1109\/CVPR.2019.00441"},{"year":"2016","series-title":"Fully-convolutional Siamese networks for object tracking","author":"Bertinetto","key":"10.1016\/j.patcog.2024.111128_b39"},{"key":"10.1016\/j.patcog.2024.111128_b40","series-title":"The European Conference on Computer Vision","first-page":"547","article-title":"The eighth visual object tracking VOT2020 challenge results","volume":"vol. 12539","author":"Kristan","year":"2020"},{"key":"10.1016\/j.patcog.2024.111128_b41","series-title":"The European Conference on Computer Vision","first-page":"445","article-title":"A benchmark and simulator for UAV tracking","volume":"vol. 9905","author":"Mueller","year":"2016"},{"key":"10.1016\/j.patcog.2024.111128_b42","series-title":"IEEE Conference on Computer Vision and Pattern Recognition","first-page":"5289","article-title":"Alpha-refine: Boosting tracking performance by precise bounding box estimation","author":"Yan","year":"2021"},{"issue":"3","key":"10.1016\/j.patcog.2024.111128_b43","first-page":"3072","article-title":"SiamMask: A framework for fast online object tracking and segmentation","volume":"45","author":"Hu","year":"2023","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.patcog.2024.111128_b44","unstructured":"T. Ridnik, E.B. Baruch, A. Noy, L. Zelnik, ImageNet-21K Pretraining for the Masses, in: J. Vanschoren, S. Yeung (Eds.), Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks 1, NeurIPS Datasets and Benchmarks 2021, December 2021, Virtual, 2021."}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320324008793?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320324008793?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T08:59:33Z","timestamp":1733561973000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320324008793"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4]]},"references-count":44,"alternative-id":["S0031320324008793"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2024.111128","relation":{},"ISSN":["0031-3203"],"issn-type":[{"type":"print","value":"0031-3203"}],"subject":[],"published":{"date-parts":[[2025,4]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"FocTrack: Focus attention for visual tracking","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2024.111128","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2024 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"111128"}}