{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T05:28:11Z","timestamp":1731562091831,"version":"3.28.0"},"reference-count":76,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62272415"],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004731","name":"Natural Science Foundation of Zhejiang Province","doi-asserted-by":"publisher","award":["LQ23F020021"],"id":[{"id":"10.13039\/501100004731","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100022963","name":"Key Research and Development Program of Zhejiang Province","doi-asserted-by":"publisher","award":["2023 C01041"],"id":[{"id":"10.13039\/100022963","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Knowledge-Based Systems"],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1016\/j.knosys.2024.111597","type":"journal-article","created":{"date-parts":[[2024,3,2]],"date-time":"2024-03-02T07:14:57Z","timestamp":1709363697000},"page":"111597","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":2,"special_numbering":"C","title":["PATNet: Patch-to-pixel attention-aware transformer network for RGB-D and RGB-T salient object detection"],"prefix":"10.1016","volume":"291","author":[{"given":"Mingfeng","family":"Jiang","sequence":"first","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0009-0009-8311-556X","authenticated-orcid":false,"given":"Jianhua","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Jiatong","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Yaming","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Xian","family":"Fang","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"10","key":"10.1016\/j.knosys.2024.111597_b1","doi-asserted-by":"crossref","first-page":"4555","DOI":"10.1109\/TIP.2016.2592701","article-title":"Visual tracking via coarse and fine structural local sparse appearance models","volume":"25","author":"Jia","year":"2016","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.knosys.2024.111597_b2","doi-asserted-by":"crossref","first-page":"64","DOI":"10.1016\/j.patrec.2018.08.010","article-title":"Visual saliency guided complex image retrieval","volume":"130","author":"Wang","year":"2020","journal-title":"Pattern Recognit. Lett."},{"key":"10.1016\/j.knosys.2024.111597_b3","doi-asserted-by":"crossref","first-page":"694","DOI":"10.1109\/TIP.2019.2928144","article-title":"Re-caption: Saliency-enhanced image captioning through two-phase learning","volume":"29","author":"Zhou","year":"2019","journal-title":"IEEE Trans. Image Process."},{"issue":"7","key":"10.1016\/j.knosys.2024.111597_b4","doi-asserted-by":"crossref","first-page":"2471","DOI":"10.1109\/TVCG.2018.2889055","article-title":"Saliency-aware texture smoothing","volume":"26","author":"Zhu","year":"2018","journal-title":"IEEE Trans. Vis. Comput. Graphics"},{"key":"10.1016\/j.knosys.2024.111597_b5","series-title":"CVPR 2011","first-page":"2129","article-title":"From co-saliency to co-segmentation: An efficient and fully unsupervised energy minimization model","author":"Chang","year":"2011"},{"key":"10.1016\/j.knosys.2024.111597_b6","doi-asserted-by":"crossref","first-page":"378","DOI":"10.1016\/j.neucom.2013.09.021","article-title":"Integration of the saliency-based seed extraction and random walks for image segmentation","volume":"129","author":"Qin","year":"2014","journal-title":"Neurocomputing"},{"issue":"3","key":"10.1016\/j.knosys.2024.111597_b7","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1016\/j.image.2012.11.008","article-title":"Video abstraction based on the visual attention model and online clustering","volume":"28","author":"Ji","year":"2013","journal-title":"Signal Process., Image Commun."},{"key":"10.1016\/j.knosys.2024.111597_b8","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TMM.2023.3340892","article-title":"Transformer fusion and pixel-level contrastive learning for RGB-D salient object detection","author":"Wu","year":"2023","journal-title":"IEEE Trans. Multimed."},{"issue":"7","key":"10.1016\/j.knosys.2024.111597_b9","doi-asserted-by":"crossref","first-page":"4486","DOI":"10.1109\/TCSVT.2021.3127149","article-title":"SwinNet: Swin transformer drives edge-aware RGB-D and RGB-T salient object detection","volume":"32","author":"Liu","year":"2021","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"2","key":"10.1016\/j.knosys.2024.111597_b10","doi-asserted-by":"crossref","first-page":"728","DOI":"10.1109\/TCSVT.2022.3202563","article-title":"HRTransNet: Hrformer-driven two-modality salient object detection","volume":"33","author":"Tang","year":"2023","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.knosys.2024.111597_b11","doi-asserted-by":"crossref","first-page":"892","DOI":"10.1109\/TIP.2023.3234702","article-title":"CAVER: Cross-modal view-mixed transformer for bi-modal salient object detection","volume":"32","author":"Pang","year":"2023","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.knosys.2024.111597_b12","doi-asserted-by":"crossref","unstructured":"Z. Liu, Y. Wang, Z. Tu, Y. Xiao, B. Tang, TriTransNet: RGB-D salient object detection with a triplet transformer embedding network, in: Proceedings of the 29th ACM International Conference on Multimedia, 2021, pp. 4481\u20134490.","DOI":"10.1145\/3474085.3475601"},{"key":"10.1016\/j.knosys.2024.111597_b13","doi-asserted-by":"crossref","unstructured":"N. Liu, N. Zhang, K. Wan, L. Shao, J. Han, Visual saliency transformer, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 4722\u20134732.","DOI":"10.1109\/ICCV48922.2021.00468"},{"key":"10.1016\/j.knosys.2024.111597_b14","first-page":"1","article-title":"CATNet: A cascaded and aggregated transformer network for RGB-D salient object detection","author":"Sun","year":"2023","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.knosys.2024.111597_b15","first-page":"1","article-title":"3-D convolutional neural networks for RGB-D salient object detection and beyond","author":"Chen","year":"2022","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.knosys.2024.111597_b16","first-page":"12321","article-title":"F3Net: Fusion, feedback and focus for salient object detection","volume":"vol. 34","author":"Wei","year":"2020"},{"key":"10.1016\/j.knosys.2024.111597_b17","doi-asserted-by":"crossref","first-page":"3528","DOI":"10.1109\/TIP.2021.3062689","article-title":"Hierarchical alternate interaction network for RGB-D salient object detection","volume":"30","author":"Li","year":"2021","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.knosys.2024.111597_b18","doi-asserted-by":"crossref","first-page":"1285","DOI":"10.1109\/TIP.2022.3140606","article-title":"Learning discriminative cross-modality features for RGB-D saliency detection","volume":"31","author":"Wang","year":"2022","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.knosys.2024.111597_b19","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2022.109139","article-title":"M2RNet: Multi-modal and multi-scale refined network for RGB-D salient object detection","volume":"135","author":"Fang","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.knosys.2024.111597_b20","doi-asserted-by":"crossref","first-page":"3125","DOI":"10.1109\/TIP.2022.3164550","article-title":"EDN: Salient object detection via extremely-downsampled network","volume":"31","author":"Wu","year":"2022","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.knosys.2024.111597_b21","doi-asserted-by":"crossref","first-page":"2192","DOI":"10.1109\/TMM.2021.3077767","article-title":"CCAFNet: Crossflow and cross-scale adaptive fusion network for detecting salient objects in RGB-D images","volume":"24","author":"Zhou","year":"2021","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.knosys.2024.111597_b22","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2022.108372","article-title":"LC3Net: Ladder context correlation complementary network for salient object detection","volume":"242","author":"Fang","year":"2022","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.knosys.2024.111597_b23","doi-asserted-by":"crossref","unstructured":"J. Wei, S. Wang, Z. Wu, C. Su, Q. Huang, Q. Tian, Label decoupling framework for salient object detection, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2020, pp. 13025\u201313034.","DOI":"10.1109\/CVPR42600.2020.01304"},{"issue":"1","key":"10.1016\/j.knosys.2024.111597_b24","doi-asserted-by":"crossref","first-page":"887","DOI":"10.1109\/TPAMI.2021.3140168","article-title":"PoolNet+: Exploring the potential of pooling for salient object detection","volume":"45","author":"Liu","year":"2023","journal-title":"IEEE TPAMI"},{"key":"10.1016\/j.knosys.2024.111597_b25","doi-asserted-by":"crossref","unstructured":"J.-J. Liu, Q. Hou, M.-M. Cheng, J. Feng, J. Jiang, A Simple Pooling-Based Design for Real-Time Salient Object Detection, in: IEEE CVPR, 2019, pp. 3917\u20133926.","DOI":"10.1109\/CVPR.2019.00404"},{"issue":"3","key":"10.1016\/j.knosys.2024.111597_b26","first-page":"3738","article-title":"Salient object detection via integrity learning","volume":"45","author":"Zhuge","year":"2022","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2024.111597_b27","doi-asserted-by":"crossref","unstructured":"Y. Wang, R. Wang, X. Fan, T. Wang, X. He, Pixels, Regions, and Objects: Multiple Enhancement for Salient Object Detection, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 10031\u201310040.","DOI":"10.1109\/CVPR52729.2023.00967"},{"issue":"12","key":"10.1016\/j.knosys.2024.111597_b28","doi-asserted-by":"crossref","first-page":"8128","DOI":"10.1109\/TCSVT.2021.3102944","article-title":"Dense attentive feature enhancement for salient object detection","volume":"32","author":"Li","year":"2021","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.knosys.2024.111597_b29","first-page":"12993","article-title":"TRACER: Extreme attention guided salient object tracing network (student abstract)","volume":"vol. 36","author":"Lee","year":"2022"},{"key":"10.1016\/j.knosys.2024.111597_b30","doi-asserted-by":"crossref","first-page":"4873","DOI":"10.1109\/TIP.2020.2976689","article-title":"Icnet: Information conversion network for RGB-D based salient object detection","volume":"29","author":"Li","year":"2020","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.knosys.2024.111597_b31","doi-asserted-by":"crossref","unstructured":"G. Li, Z. Liu, L. Ye, Y. Wang, H. Ling, Cross-Modal Weighting Network for RGB-D Salient Object Detection, in: European Conference on Computer Vision, ECCV, 2020, pp. 665\u2013681.","DOI":"10.1007\/978-3-030-58520-4_39"},{"key":"10.1016\/j.knosys.2024.111597_b32","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2022.109194","article-title":"Cross-modal hierarchical interaction network for RGB-D salient object detection","volume":"136","author":"Bi","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.knosys.2024.111597_b33","doi-asserted-by":"crossref","first-page":"5142","DOI":"10.1109\/TMM.2022.3187856","article-title":"C2DFNet: Criss-cross dynamic filter network for RGB-D salient object detection","volume":"25","author":"Zhang","year":"2023","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.knosys.2024.111597_b34","first-page":"3463","article-title":"Self-supervised pretraining for rgb-d salient object detection","volume":"vol. 36","author":"Zhao","year":"2022"},{"key":"10.1016\/j.knosys.2024.111597_b35","unstructured":"A. Dosovitskiy, L. Beyer, A. Kolesnikov, D. Weissenborn, X. Zhai, T. Unterthiner, M. Dehghani, M. Minderer, G. Heigold, S. Gelly, J. Uszkoreit, N. Houlsby, An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale, in: 9th International Conference on Learning Representations, 2021."},{"issue":"6","key":"10.1016\/j.knosys.2024.111597_b36","doi-asserted-by":"crossref","first-page":"679","DOI":"10.1109\/TPAMI.1986.4767851","article-title":"A computational approach to edge detection","volume":"PAMI-8","author":"Canny","year":"1986","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2024.111597_b37","doi-asserted-by":"crossref","unstructured":"Z. Liu, Y. Lin, Y. Cao, H. Hu, Y. Wei, Z. Zhang, S. Lin, B. Guo, Swin transformer: Hierarchical vision transformer using shifted windows, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 10012\u201310022.","DOI":"10.1109\/ICCV48922.2021.00986"},{"issue":"4","key":"10.1016\/j.knosys.2024.111597_b38","doi-asserted-by":"crossref","first-page":"1787","DOI":"10.1109\/TCSVT.2022.3215979","article-title":"Modality-induced transfer-fusion network for RGB-D and RGB-T salient object detection","volume":"33","author":"Chen","year":"2023","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"3","key":"10.1016\/j.knosys.2024.111597_b39","doi-asserted-by":"crossref","first-page":"415","DOI":"10.1007\/s41095-022-0274-8","article-title":"Pvt v2: Improved baselines with pyramid vision transformer","volume":"8","author":"Wang","year":"2022","journal-title":"Comput. Vis. Media"},{"key":"10.1016\/j.knosys.2024.111597_b40","first-page":"7281","article-title":"Hrformer: High-resolution vision transformer for dense predict","volume":"34","author":"Yuan","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2024.111597_b41","series-title":"2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops","first-page":"2877","article-title":"Pyramidal attention for saliency detection","author":"Hussain","year":"2022"},{"key":"10.1016\/j.knosys.2024.111597_b42","doi-asserted-by":"crossref","unstructured":"R. Ranftl, A. Bochkovskiy, V. Koltun, Vision Transformers for Dense Prediction, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, ICCV, 2021, pp. 12179\u201312188.","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"10.1016\/j.knosys.2024.111597_b43","doi-asserted-by":"crossref","first-page":"5678","DOI":"10.1109\/TIP.2021.3087412","article-title":"Multi-interactive dual-decoder for RGB-thermal salient object detection","volume":"30","author":"Tu","year":"2021","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.knosys.2024.111597_b44","doi-asserted-by":"crossref","first-page":"6971","DOI":"10.1109\/TMM.2022.3216476","article-title":"Does thermal really always matter for RGB-T salient object detection?","volume":"25","author":"Cong","year":"2023","journal-title":"IEEE Trans. Multimed."},{"issue":"11","key":"10.1016\/j.knosys.2024.111597_b45","doi-asserted-by":"crossref","first-page":"7646","DOI":"10.1109\/TCSVT.2022.3184840","article-title":"Cross-collaborative fusion-encoder network for robust RGB-thermal salient object detection","volume":"32","author":"Liao","year":"2022","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.knosys.2024.111597_b46","doi-asserted-by":"crossref","first-page":"3027","DOI":"10.1109\/TIP.2023.3275538","article-title":"WaveNet: Wavelet network with knowledge distillation for RGB-T salient object detection","volume":"32","author":"Zhou","year":"2023","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.knosys.2024.111597_b47","doi-asserted-by":"crossref","unstructured":"Z. Liu, X. Huang, G. Zhang, X. Fang, L. Wang, B. Tang, Scribble-Supervised RGB-T Salient Object Detection, in: 2023 IEEE International Conference on Multimedia and Expo, ICME, 2023, pp. 2369\u20132374.","DOI":"10.1109\/ICME55011.2023.00404"},{"key":"10.1016\/j.knosys.2024.111597_b48","doi-asserted-by":"crossref","unstructured":"W. Shi, J. Caballero, F. Husz\u00e1r, J. Totz, A.P. Aitken, R. Bishop, D. Rueckert, Z. Wang, Real-time single image and video super-resolution using an efficient sub-pixel convolutional neural network, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2016, pp. 1874\u20131883.","DOI":"10.1109\/CVPR.2016.207"},{"key":"10.1016\/j.knosys.2024.111597_b49","doi-asserted-by":"crossref","unstructured":"Y. Piao, W. Ji, J. Li, M. Zhang, H. Lu, Depth-induced multi-scale recurrent attention network for saliency detection, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2019, pp. 7254\u20137263.","DOI":"10.1109\/ICCV.2019.00735"},{"key":"10.1016\/j.knosys.2024.111597_b50","series-title":"2014 IEEE International Conference on Image Processing","first-page":"1115","article-title":"Depth saliency based on anisotropic center-surround difference","author":"Ju","year":"2014"},{"key":"10.1016\/j.knosys.2024.111597_b51","doi-asserted-by":"crossref","unstructured":"H. Peng, B. Li, W. Xiong, W. Hu, R. Ji, RGBD salient object detection: A benchmark and algorithms, in: Proceedings of the European Conference on Computer Vision, 2014, pp. 92\u2013109.","DOI":"10.1007\/978-3-319-10578-9_7"},{"key":"10.1016\/j.knosys.2024.111597_b52","doi-asserted-by":"crossref","unstructured":"Y. Cheng, H. Fu, X. Wei, J. Xiao, X. Cao, Depth enhanced saliency detection method, in: Proceedings of International Conference on Internet Multimedia Computing and Service, 2014, pp. 23\u201327.","DOI":"10.1145\/2632856.2632866"},{"key":"10.1016\/j.knosys.2024.111597_b53","series-title":"2012 IEEE Conference on Computer Vision and Pattern Recognition","first-page":"454","article-title":"Leveraging stereopsis for saliency analysis","author":"Niu","year":"2012"},{"issue":"5","key":"10.1016\/j.knosys.2024.111597_b54","doi-asserted-by":"crossref","first-page":"2075","DOI":"10.1109\/TNNLS.2020.2996406","article-title":"Rethinking RGB-d salient object detection: Models, data sets, and large-scale benchmarks","volume":"32","author":"Fan","year":"2020","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.knosys.2024.111597_b55","doi-asserted-by":"crossref","unstructured":"W. Ji, J. Li, M. Zhang, Y. Piao, H. Lu, Accurate RGB-D salient object detection via collaborative learning, in: Proceedings of the European Conference on Computer Vision, 2020, pp. 52\u201369.","DOI":"10.1007\/978-3-030-58523-5_4"},{"key":"10.1016\/j.knosys.2024.111597_b56","doi-asserted-by":"crossref","unstructured":"Y. Piao, Z. Rong, M. Zhang, W. Ren, H. Lu, A2dele: Adaptive and attentive depth distiller for efficient RGB-D salient object detection, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2020, pp. 9060\u20139069.","DOI":"10.1109\/CVPR42600.2020.00908"},{"key":"10.1016\/j.knosys.2024.111597_b57","doi-asserted-by":"crossref","unstructured":"X. Zhao, L. Zhang, Y. Pang, H. Lu, L. Zhang, A single stream network for robust and real-time RGB-D salient object detection, in: Proceedings of the European Conference on Computer Vision, 2020, pp. 646\u2013662.","DOI":"10.1007\/978-3-030-58542-6_39"},{"key":"10.1016\/j.knosys.2024.111597_b58","doi-asserted-by":"crossref","unstructured":"G. Wang, C. Li, Y. Ma, A. Zheng, J. Tang, B. Luo, RGB-T saliency detection benchmark: Dataset, baselines, analysis and a novel approach, in: Proceedings of the Conference on Image and Graphics Technologies and Applications, 2018, pp. 359\u2013369.","DOI":"10.1007\/978-981-13-1702-6_36"},{"issue":"1","key":"10.1016\/j.knosys.2024.111597_b59","doi-asserted-by":"crossref","first-page":"160","DOI":"10.1109\/TMM.2019.2924578","article-title":"RGB-T image saliency detection via collaborative graph learning","volume":"22","author":"Tu","year":"2019","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.knosys.2024.111597_b60","doi-asserted-by":"crossref","first-page":"4163","DOI":"10.1109\/TMM.2022.3171688","article-title":"RGBT salient object detection: A large-scale dataset and benchmark","volume":"25","author":"Tu","year":"2023","journal-title":"IEEE Trans. Multimed."},{"issue":"12","key":"10.1016\/j.knosys.2024.111597_b61","doi-asserted-by":"crossref","first-page":"5706","DOI":"10.1109\/TIP.2015.2487833","article-title":"Salient object detection: A benchmark","volume":"24","author":"Borji","year":"2015","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.knosys.2024.111597_b62","doi-asserted-by":"crossref","unstructured":"D.-P. Fan, M.-M. Cheng, Y. Liu, T. Li, A. Borji, Structure-measure: A new way to evaluate foreground maps, in: Proceedings of the IEEE International Conference on Computer Vision, 2017, pp. 4548\u20134557.","DOI":"10.1109\/ICCV.2017.487"},{"key":"10.1016\/j.knosys.2024.111597_b63","series-title":"2009 IEEE Conference on Computer Vision and Pattern Recognition","first-page":"1597","article-title":"Frequency-tuned salient region detection","author":"Achanta","year":"2009"},{"key":"10.1016\/j.knosys.2024.111597_b64","doi-asserted-by":"crossref","unstructured":"D.-P. Fan, C. Gong, Y. Cao, B. Ren, M.-M. Cheng, A. Borji, Enhanced-alignment Measure for Binary Foreground Map Evaluation, in: Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence, IJCAI-18, 2018, pp. 698\u2013704.","DOI":"10.24963\/ijcai.2018\/97"},{"key":"10.1016\/j.knosys.2024.111597_b65","series-title":"2012 IEEE Conference on Computer Vision and Pattern Recognition","first-page":"733","article-title":"Saliency filters: Contrast based filtering for salient region detection","author":"Perazzi","year":"2012"},{"article-title":"Adam: A method for stochastic optimization","year":"2015","series-title":"3rd International Conference on Learning Representations","author":"Kingma","key":"10.1016\/j.knosys.2024.111597_b66"},{"issue":"4","key":"10.1016\/j.knosys.2024.111597_b67","doi-asserted-by":"crossref","first-page":"2091","DOI":"10.1109\/TCSVT.2021.3082939","article-title":"Unified information fusion network for multi-modal RGB-D and RGB-T salient object detection","volume":"32","author":"Gao","year":"2021","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.knosys.2024.111597_b68","first-page":"15448","article-title":"Learning generative vision transformer with energy-based latent space for saliency prediction","volume":"34","author":"Zhang","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2024.111597_b69","doi-asserted-by":"crossref","unstructured":"P. Sun, W. Zhang, H. Wang, S. Li, X. Li, Deep RGB-D saliency detection with depth-sensitive attention and automatic multi-modal fusion, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021, pp. 1407\u20131417.","DOI":"10.1109\/CVPR46437.2021.00146"},{"key":"10.1016\/j.knosys.2024.111597_b70","doi-asserted-by":"crossref","first-page":"4253","DOI":"10.1109\/TMM.2022.3172852","article-title":"Depth-induced gap-reducing network for RGB-D salient object detection: An interaction, guidance and refinement approach","volume":"25","author":"Cheng","year":"2023","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.knosys.2024.111597_b71","doi-asserted-by":"crossref","unstructured":"M. Lee, C. Park, S. Cho, S. Lee, Spsn: Superpixel prototype sampling network for rgb-d salient object detection, in: Proceedings of the European Conference on Computer Vision, 2022, pp. 630\u2013647.","DOI":"10.1007\/978-3-031-19818-2_36"},{"key":"10.1016\/j.knosys.2024.111597_b72","series-title":"2022 International Conference on 3D Vision","first-page":"403","article-title":"Robust rgb-d fusion for saliency detection","author":"Wu","year":"2022"},{"issue":"5","key":"10.1016\/j.knosys.2024.111597_b73","doi-asserted-by":"crossref","first-page":"2949","DOI":"10.1109\/TCSVT.2021.3099120","article-title":"CGFNet: Cross-guided fusion network for RGB-T salient object detection","volume":"32","author":"Wang","year":"2021","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"5","key":"10.1016\/j.knosys.2024.111597_b74","doi-asserted-by":"crossref","first-page":"3111","DOI":"10.1109\/TCSVT.2021.3102268","article-title":"Efficient context-guided stacked refinement network for RGB-T salient object detection","volume":"32","author":"Huo","year":"2021","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.knosys.2024.111597_b75","first-page":"1","article-title":"Modal complementary fusion network for RGB-T salient object detection","author":"Ma","year":"2022","journal-title":"Appl. Intell."},{"key":"10.1016\/j.knosys.2024.111597_b76","doi-asserted-by":"crossref","first-page":"3752","DOI":"10.1109\/TIP.2022.3176540","article-title":"Weakly alignment-free RGBT salient object detection with deep correlation network","volume":"31","author":"Tu","year":"2022","journal-title":"IEEE Trans. Image Process."}],"container-title":["Knowledge-Based Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705124002326?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705124002326?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,11,13]],"date-time":"2024-11-13T18:27:33Z","timestamp":1731522453000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0950705124002326"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5]]},"references-count":76,"alternative-id":["S0950705124002326"],"URL":"https:\/\/doi.org\/10.1016\/j.knosys.2024.111597","relation":{},"ISSN":["0950-7051"],"issn-type":[{"type":"print","value":"0950-7051"}],"subject":[],"published":{"date-parts":[[2024,5]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"PATNet: Patch-to-pixel attention-aware transformer network for RGB-D and RGB-T salient object detection","name":"articletitle","label":"Article Title"},{"value":"Knowledge-Based Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.knosys.2024.111597","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2024 Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}],"article-number":"111597"}}