{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,12,9]],"date-time":"2024-12-09T08:40:16Z","timestamp":1733733616749,"version":"3.30.1"},"reference-count":68,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/100007219","name":"Shanghai Municipal Natural Science Foundation","doi-asserted-by":"publisher","award":["21ZR1426500"],"id":[{"id":"10.13039\/100007219","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002383","name":"King Saud University","doi-asserted-by":"publisher","award":["RSP2024R12"],"id":[{"id":"10.13039\/501100002383","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100016807","name":"Natural Science Foundation of Shenyang Municipality","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100016807","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["52331012"],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Expert Systems with Applications"],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1016\/j.eswa.2024.125658","type":"journal-article","created":{"date-parts":[[2024,11,12]],"date-time":"2024-11-12T20:55:38Z","timestamp":1731444938000},"page":"125658","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["LRCN: Layer-residual Co-Attention Networks for visual question answering"],"prefix":"10.1016","volume":"263","author":[{"given":"Dezhi","family":"Han","sequence":"first","affiliation":[]},{"given":"Jingya","family":"Shi","sequence":"additional","affiliation":[]},{"given":"Jiahao","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Huafeng","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Yachao","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Ling-Huey","family":"Li","sequence":"additional","affiliation":[]},{"given":"Muhammad Khurram","family":"Khan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1381-4364","authenticated-orcid":false,"given":"Kuan-Ching","family":"Li","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.eswa.2024.125658_b1","series-title":"2018 IEEE conference on computer vision and pattern recognition","first-page":"6077","article-title":"Bottom-up and top-down attention for image captioning and visual question answering","author":"Anderson","year":"2018"},{"key":"10.1016\/j.eswa.2024.125658_b2","article-title":"Layer normalization","author":"Ba","year":"2016","journal-title":"CoRR"},{"key":"10.1016\/j.eswa.2024.125658_b3","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2023.126991","article-title":"Confidence-based interactable neural-symbolic visual question answering","volume":"564","author":"Bao","year":"2024","journal-title":"Neurocomputing"},{"key":"10.1016\/j.eswa.2024.125658_b4","series-title":"Computer vision - ECCV 2020 - 16th European conference, glasgow, UK, August 23-28, 2020, proceedings, part i","first-page":"213","article-title":"End-to-end object detection with transformers","volume":"vol. 12346","author":"Carion","year":"2020"},{"key":"10.1016\/j.eswa.2024.125658_b5","series-title":"2021 IEEE\/CVF international conference on computer vision","first-page":"387","article-title":"Generic attention-model explainability for interpreting bi-modal and encoder-decoder transformers","author":"Chefer","year":"2021"},{"key":"10.1016\/j.eswa.2024.125658_b6","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2022.108980","article-title":"CAAN: Context-aware attention network for visual question answering","volume":"132","author":"Chen","year":"2022","journal-title":"Pattern Recognition"},{"key":"10.1016\/j.eswa.2024.125658_b7","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.110084","article-title":"MPCCT: multimodal vision-language learning paradigm with context-based compact transformer","volume":"147","author":"Chen","year":"2024","journal-title":"Pattern Recognition"},{"key":"10.1016\/j.eswa.2024.125658_b8","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2023.110706","article-title":"CLVIN: complete language-vision interaction network for visual question answering","volume":"275","author":"Chen","year":"2023","journal-title":"Knowledge-Based Systems"},{"key":"10.1016\/j.eswa.2024.125658_b9","series-title":"Proceedings of the 2020 conference on empirical methods in natural language processing","first-page":"8785","article-title":"X-LXMERT: paint, caption and answer questions with multi-modal transformers","author":"Cho","year":"2020"},{"key":"10.1016\/j.eswa.2024.125658_b10","series-title":"2020 IEEE\/CVF conference on computer vision and pattern recognition","first-page":"10575","article-title":"Meshed-memory transformer for image captioning","author":"Cornia","year":"2020"},{"key":"10.1016\/j.eswa.2024.125658_b11","series-title":"2021 IEEE\/CVF international conference on computer vision","first-page":"1749","article-title":"Transvg: End-to-end visual grounding with transformers","author":"Deng","year":"2021"},{"key":"10.1016\/j.eswa.2024.125658_b12","first-page":"4171","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019"},{"key":"10.1016\/j.eswa.2024.125658_b13","doi-asserted-by":"crossref","first-page":"398","DOI":"10.1007\/s11263-018-1116-0","article-title":"Making the V in VQA matter: Elevating the role of image understanding in visual question answering","volume":"127","author":"Goyal","year":"2019","journal-title":"International Journal of Computer Vision"},{"key":"10.1016\/j.eswa.2024.125658_b14","doi-asserted-by":"crossref","first-page":"6730","DOI":"10.1109\/TIP.2021.3097180","article-title":"Re-attention for visual question answering","volume":"30","author":"Guo","year":"2021","journal-title":"IEEE Transactions on Image Processing"},{"issue":"1","key":"10.1016\/j.eswa.2024.125658_b15","doi-asserted-by":"crossref","first-page":"316","DOI":"10.1109\/TDSC.2020.2977646","article-title":"A traceable and revocable ciphertext-policy attribute-based encryption scheme based on privacy protection","volume":"19","author":"Han","year":"2020","journal-title":"IEEE Transactions on Dependable and Secure Computing"},{"issue":"4","key":"10.1016\/j.eswa.2024.125658_b16","doi-asserted-by":"crossref","first-page":"549","DOI":"10.1007\/s11235-023-01059-5","article-title":"LMCA: a lightweight anomaly network traffic detection model integrating adjusted mobilenet and coordinate attention mechanism for IoT","volume":"84","author":"Han","year":"2023","journal-title":"Telecommunication Systems"},{"issue":"5","key":"10.1016\/j.eswa.2024.125658_b17","doi-asserted-by":"crossref","first-page":"3530","DOI":"10.1109\/TII.2021.3114621","article-title":"A blockchain-based auditable access control system for private data in service-centric IoT environments","volume":"18","author":"Han","year":"2021","journal-title":"IEEE Transactions on Industrial Informatics"},{"year":"2020","series-title":"Realformer: Transformer likes residual attention","author":"He","key":"10.1016\/j.eswa.2024.125658_b18"},{"key":"10.1016\/j.eswa.2024.125658_b19","series-title":"2016 IEEE conference on computer vision and pattern recognition","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He","year":"2016"},{"issue":"8","key":"10.1016\/j.eswa.2024.125658_b20","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","article-title":"Long short-term memory","volume":"9","author":"Hochreiter","year":"1997","journal-title":"Neural Computation"},{"issue":"2","key":"10.1016\/j.eswa.2024.125658_b21","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1007\/s11235-022-00943-w","article-title":"A localization algorithm for DV-hop wireless sensor networks based on manhattan distance","volume":"81","author":"Huang","year":"2022","journal-title":"Telecommunication Systems"},{"key":"10.1016\/j.eswa.2024.125658_b22","series-title":"Proceedings of the 32nd international conference on machine learning","first-page":"448","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","volume":"vol. 37","author":"Ioffe","year":"2015"},{"key":"10.1016\/j.eswa.2024.125658_b23","series-title":"2020 IEEE\/CVF conference on computer vision and pattern recognition","first-page":"10264","article-title":"In defense of grid features for visual question answering","author":"Jiang","year":"2020"},{"key":"10.1016\/j.eswa.2024.125658_b24","series-title":"2017 IEEE conference on computer vision and pattern recognition","first-page":"1988","article-title":"CLEVR: a diagnostic dataset for compositional language and elementary visual reasoning","author":"Johnson","year":"2017"},{"year":"2014","series-title":"Adam: A method for stochastic optimization","author":"Kingma","key":"10.1016\/j.eswa.2024.125658_b25"},{"key":"10.1016\/j.eswa.2024.125658_b26","article-title":"Co-training transformer for remote sensing image classification, segmentation and detection","author":"Li","year":"2024","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"10.1016\/j.eswa.2024.125658_b27","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2023.102000","article-title":"Context-aware multi-level question embedding fusion for visual question answering","volume":"102","author":"Li","year":"2024","journal-title":"Information Fusion"},{"key":"10.1016\/j.eswa.2024.125658_b28","article-title":"A secure data storage and sharing scheme for port supply chain based on blockchain and dynamic searchable encryption","author":"Li","year":"2024","journal-title":"Computer Standards Interfaces"},{"key":"10.1016\/j.eswa.2024.125658_b29","doi-asserted-by":"crossref","DOI":"10.1016\/j.csi.2024.103887","article-title":"A secure data storage and sharing scheme for port supply chain based on blockchain and dynamic searchable encryption","volume":"91","author":"Li","year":"2025","journal-title":"Computer Standards & Interfaces"},{"key":"10.1016\/j.eswa.2024.125658_b30","doi-asserted-by":"crossref","DOI":"10.1016\/j.energy.2023.129504","article-title":"A multi-head attention mechanism aided hybrid network for identifying batteries\u2019 state of charge","volume":"286","author":"Li","year":"2024","journal-title":"Energy"},{"key":"10.1016\/j.eswa.2024.125658_b31","series-title":"IEEE INFOCOM 2023 - IEEE conference on computer communications","first-page":"1","article-title":"LightNestle: Quick and accurate neural sequential tensor completion via meta learning","author":"Li","year":"2023"},{"key":"10.1016\/j.eswa.2024.125658_b32","series-title":"Computer vision - ACCV 2022 - 16th Asian conference on computer vision, macao, China, December 4-8, 2022, proceedings, part IV","first-page":"658","article-title":"Two-stage multimodality fusion for high-performance text-based visual question answering","volume":"vol. 13844","author":"Li","year":"2022"},{"key":"10.1016\/j.eswa.2024.125658_b33","series-title":"Computer vision - ECCV 2020 - 16th European conference, glasgow, UK, August 23-28, 2020, proceedings, part XXX","first-page":"121","article-title":"Oscar: Object-semantics aligned pre-training for vision-language tasks","volume":"vol. 12375","author":"Li","year":"2020"},{"issue":"8","key":"10.1016\/j.eswa.2024.125658_b34","doi-asserted-by":"crossref","first-page":"5087","DOI":"10.1109\/TII.2021.3116085","article-title":"Variational few-shot learning for microservice-oriented intrusion detection in distributed industrial IoT","volume":"18","author":"Liang","year":"2022","journal-title":"IEEE Transactions on Industrial Informatics"},{"issue":"8","key":"10.1016\/j.eswa.2024.125658_b35","doi-asserted-by":"crossref","first-page":"8431","DOI":"10.1109\/TITS.2022.3156266","article-title":"Spatial-temporal aware inductive graph neural network for C-ITS data recovery","volume":"24","author":"Liang","year":"2023","journal-title":"IEEE Transactions on Intelligence Transport System"},{"key":"10.1016\/j.eswa.2024.125658_b36","series-title":"Computer vision - ECCV 2014 - 13th European conference, zurich, Switzerland, September 6-12, 2014, proceedings, part v","first-page":"740","article-title":"Microsoft COCO: common objects in context","volume":"vol. 8693","author":"Lin","year":"2014"},{"key":"10.1016\/j.eswa.2024.125658_b37","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1016\/j.aiopen.2022.10.001","article-title":"A survey of transformers","volume":"3","author":"Lin","year":"2022","journal-title":"AI Open"},{"issue":"2","key":"10.1016\/j.eswa.2024.125658_b38","doi-asserted-by":"crossref","first-page":"1747","DOI":"10.1109\/TII.2022.3204034","article-title":"A regularized cross-layer ladder network for intrusion detection in industrial internet of things","volume":"19","author":"Long","year":"2023","journal-title":"IEEE Transactions on Industrial Informatics"},{"issue":"1","key":"10.1016\/j.eswa.2024.125658_b39","doi-asserted-by":"crossref","first-page":"1","DOI":"10.4018\/IJIIT.318671","article-title":"Object-assisted question featurization and multi-CNN image feature fusion for visual question answering","volume":"19","author":"Manmadhan","year":"2023","journal-title":"International Journal of Intelligence and Information Technology"},{"key":"10.1016\/j.eswa.2024.125658_b40","doi-asserted-by":"crossref","first-page":"6997","DOI":"10.1109\/TMM.2022.3216770","article-title":"Positional attention guided transformer-like architecture for visual question answering","volume":"25","author":"Mao","year":"2022","journal-title":"IEEE Transactions on Multimedia"},{"key":"10.1016\/j.eswa.2024.125658_b41","series-title":"IEEE\/CVF conference on computer vision and pattern recognition workshops","first-page":"4557","article-title":"Coarse-to-fine reasoning for visual question answering","author":"Nguyen","year":"2022"},{"key":"10.1016\/j.eswa.2024.125658_b42","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2023.101868","article-title":"Openvivqa: Task, dataset, and multimodal fusion models for visual question answering in vietnamese","volume":"100","author":"Nguyen","year":"2023","journal-title":"Information Fusion"},{"key":"10.1016\/j.eswa.2024.125658_b43","first-page":"1532","article-title":"Glove: Global vectors for word representation","author":"Pennington","year":"2014"},{"key":"10.1016\/j.eswa.2024.125658_b44","doi-asserted-by":"crossref","first-page":"4282","DOI":"10.1109\/TMM.2022.3173131","article-title":"Deep residual weight-sharing attention network with low-rank attention for visual question answering","volume":"25","author":"Qin","year":"2023","journal-title":"IEEE Transactions on Multimedia"},{"key":"10.1016\/j.eswa.2024.125658_b45","doi-asserted-by":"crossref","DOI":"10.1016\/j.bspc.2022.104173","article-title":"Rtunet: Residual transformer unet specifically for pancreas segmentation","volume":"79","author":"Qiu","year":"2023","journal-title":"Biomedical Signal Processing and Control"},{"key":"10.1016\/j.eswa.2024.125658_b46","doi-asserted-by":"crossref","unstructured":"Rahman, T., Chou, S.-H., Sigal, L., & Carenini, G. (2021). An improved attention for visual question answering. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 1653\u20131662).","DOI":"10.1109\/CVPRW53098.2021.00181"},{"issue":"13","key":"10.1016\/j.eswa.2024.125658_b47","doi-asserted-by":"crossref","first-page":"16706","DOI":"10.1007\/s10489-022-04355-w","article-title":"Local self-attention in transformer for visual question answering","volume":"53","author":"Shen","year":"2023","journal-title":"Applied Intelligence: The International Journal of Artificial Intelligence, Neural Networks, and Complex Problem-Solving Technologies"},{"issue":"6","key":"10.1016\/j.eswa.2024.125658_b48","doi-asserted-by":"crossref","first-page":"5062","DOI":"10.1007\/s10489-024-05437-7","article-title":"Relational reasoning and adaptive fusion for visual question answering","volume":"54","author":"Shen","year":"2024","journal-title":"Applied Intelligence: The International Journal of Artificial Intelligence, Neural Networks, and Complex Problem-Solving Technologies"},{"key":"10.1016\/j.eswa.2024.125658_b49","first-page":"1","article-title":"Vman: visual-modified attention network for multimodal paradigms","author":"Song","year":"2024","journal-title":"Visual Computer"},{"key":"10.1016\/j.eswa.2024.125658_b50","series-title":"IEEE\/CVF conference on computer vision and pattern recognition","first-page":"2648","article-title":"Multimodal integration of human-like attention in visual question answering","author":"Sood","year":"2023"},{"issue":"1","key":"10.1016\/j.eswa.2024.125658_b51","first-page":"1929","article-title":"Dropout: a simple way to prevent neural networks from overfitting","volume":"15","author":"Srivastava","year":"2014","journal-title":"The Journal of Machine Learning Research"},{"year":"2022","series-title":"B2t connection: Serving stability and performance in deep transformers","author":"Takase","key":"10.1016\/j.eswa.2024.125658_b52"},{"key":"10.1016\/j.eswa.2024.125658_b53","series-title":"Proceedings of the 2019 conference on empirical methods in natural language processing and the 9th international joint conference on natural language processing","first-page":"5099","article-title":"LXMERT: learning cross-modality encoder representations from transformers","author":"Tan","year":"2019"},{"key":"10.1016\/j.eswa.2024.125658_b54","series-title":"2018 IEEE conference on computer vision and pattern recognition","first-page":"4223","article-title":"Tips and tricks for visual question answering: Learnings from the 2017 challenge","author":"Teney","year":"2018"},{"key":"10.1016\/j.eswa.2024.125658_b55","series-title":"Advances in neural information processing systems 30: annual conference on neural information processing systems 2017","first-page":"5998","article-title":"Attention is all you need","author":"Vaswani","year":"2017"},{"key":"10.1016\/j.eswa.2024.125658_b56","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2024.106110","article-title":"EMAT: Efficient feature fusion network for visual tracking via optimized multi-head attention","volume":"172","author":"Wang","year":"2024","journal-title":"Neural Networks"},{"key":"10.1016\/j.eswa.2024.125658_b57","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2022.117174","article-title":"Geometry attention transformer with position-aware LSTMs for image captioning","volume":"201","author":"Wang","year":"2022","journal-title":"Expert Systems with Applications"},{"issue":"1","key":"10.1016\/j.eswa.2024.125658_b58","doi-asserted-by":"crossref","first-page":"202","DOI":"10.1007\/s11227-023-05438-2","article-title":"A novel fuzzy control path planning algorithm for intelligent ship based on scale factors","volume":"80","author":"Wu","year":"2024","journal-title":"Journal of Supercomputing"},{"key":"10.1016\/j.eswa.2024.125658_b59","article-title":"ResiDual: Transformer with dual residual connections","author":"Xie","year":"2023","journal-title":"CoRR"},{"key":"10.1016\/j.eswa.2024.125658_b60","series-title":"International conference on machine learning","first-page":"10524","article-title":"On layer normalization in the transformer architecture","author":"Xiong","year":"2020"},{"issue":"9","key":"10.1016\/j.eswa.2024.125658_b61","doi-asserted-by":"crossref","first-page":"3097","DOI":"10.1007\/s00371-022-02524-z","article-title":"SPCA-net: a based on spatial position relationship co-attention network for visual question answering","volume":"38","author":"Yan","year":"2022","journal-title":"Visual Computer"},{"key":"10.1016\/j.eswa.2024.125658_b62","series-title":"IEEE international conference on multimedia and expo","first-page":"1","article-title":"Modality-specific multimodal global enhanced network for text-based visual question answering","author":"Yang","year":"2022"},{"year":"2019","series-title":"Multimodal unified attention networks for vision-and-language interactions","author":"Yu","key":"10.1016\/j.eswa.2024.125658_b63"},{"key":"10.1016\/j.eswa.2024.125658_b64","doi-asserted-by":"crossref","unstructured":"Yu, Z., Cui, Y., Yu, J., Wang, M., Tao, D., & Tian, Q. (2020). Deep multimodal neural architecture search. In Proceedings of the 28th ACM international conference on multimedia (pp. 3743\u20133752).","DOI":"10.1145\/3394171.3413977"},{"key":"10.1016\/j.eswa.2024.125658_b65","series-title":"IEEE\/CVF conference on computer vision and pattern recognition","first-page":"10809","article-title":"MetaFormer is actually what you need for vision","author":"Yu","year":"2022"},{"key":"10.1016\/j.eswa.2024.125658_b66","doi-asserted-by":"crossref","unstructured":"Yu, Z., Yu, J., Cui, Y., Tao, D., & Tian, Q. (2019). Deep modular co-attention networks for visual question answering. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 6281\u20136290).","DOI":"10.1109\/CVPR.2019.00644"},{"key":"10.1016\/j.eswa.2024.125658_b67","series-title":"2022 IEEE international conference on multimedia and expo","first-page":"1","article-title":"Multi-head attention fusion network for visual question answering","author":"Zhang","year":"2022"},{"key":"10.1016\/j.eswa.2024.125658_b68","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Ren, T., Zhu, C., Sun, X., Liu, J., Ding, X., Xu, M., & Ji, R. (2021). Trar: Routing the attention spans in transformer for visual question answering. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 2074\u20132084).","DOI":"10.1109\/ICCV48922.2021.00208"}],"container-title":["Expert Systems with Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417424025259?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417424025259?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,12,9]],"date-time":"2024-12-09T08:08:04Z","timestamp":1733731684000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0957417424025259"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3]]},"references-count":68,"alternative-id":["S0957417424025259"],"URL":"https:\/\/doi.org\/10.1016\/j.eswa.2024.125658","relation":{},"ISSN":["0957-4174"],"issn-type":[{"type":"print","value":"0957-4174"}],"subject":[],"published":{"date-parts":[[2025,3]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"LRCN: Layer-residual Co-Attention Networks for visual question answering","name":"articletitle","label":"Article Title"},{"value":"Expert Systems with Applications","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.eswa.2024.125658","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2024 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"125658"}}