{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,8,27]],"date-time":"2024-08-27T13:12:21Z","timestamp":1724764341718},"reference-count":40,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2021,3,1]],"date-time":"2021-03-01T00:00:00Z","timestamp":1614556800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2021,3]]},"DOI":"10.1016\/j.neucom.2020.10.046","type":"journal-article","created":{"date-parts":[[2020,10,24]],"date-time":"2020-10-24T06:11:28Z","timestamp":1603519888000},"page":"14-23","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":20,"special_numbering":"C","title":["MSCAN: Multimodal Self-and-Collaborative Attention Network for image aesthetic prediction tasks"],"prefix":"10.1016","volume":"430","author":[{"given":"Xiaodan","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Xinbo","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Lihuo","family":"He","sequence":"additional","affiliation":[]},{"given":"Wen","family":"Lu","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"7","key":"10.1016\/j.neucom.2020.10.046_b0005","doi-asserted-by":"crossref","first-page":"1480","DOI":"10.1109\/TMM.2013.2268051","article-title":"Aesthetic image enhancement by dependence-aware object recomposition","volume":"15","author":"Zhang","year":"2013","journal-title":"IEEE Trans. Multimedia"},{"key":"10.1016\/j.neucom.2020.10.046_b0010","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"8193","article-title":"A2-rl: aesthetics aware reinforcement learning for image cropping","author":"Li","year":"2018"},{"key":"10.1016\/j.neucom.2020.10.046_b0015","doi-asserted-by":"crossref","unstructured":"R. Datta, D. Joshi, J. Li, J. Z. Wang, Studying aesthetics in photographic images using a computational approach, in: Proceedings of European Conference on Computer Vision, 2006, pp. 288\u2013301.","DOI":"10.1007\/11744078_23"},{"issue":"8","key":"10.1016\/j.neucom.2020.10.046_b0020","doi-asserted-by":"crossref","first-page":"1930","DOI":"10.1109\/TMM.2013.2269899","article-title":"Content-based photo quality assessment","volume":"15","author":"Tang","year":"2013","journal-title":"IEEE Trans. Multimedia"},{"key":"10.1016\/j.neucom.2020.10.046_b0025","doi-asserted-by":"crossref","first-page":"582","DOI":"10.1016\/j.patcog.2017.12.002","article-title":"Deep convolutional neural networks for mental load classification based on EEG data","volume":"76","author":"Jiao","year":"2018","journal-title":"Pattern Recognition"},{"key":"10.1016\/j.neucom.2020.10.046_b0030","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He","year":"2016"},{"key":"10.1016\/j.neucom.2020.10.046_b0035","article-title":"P-cnn: Part-based convolutional neural networks for fine-grained visual categorization","author":"Han","year":"2019","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"11","key":"10.1016\/j.neucom.2020.10.046_b0040","doi-asserted-by":"crossref","first-page":"2021","DOI":"10.1109\/TMM.2015.2477040","article-title":"Rating image aesthetics using deep learning","volume":"17","author":"Lu","year":"2015","journal-title":"IEEE Trans. Multimedia"},{"key":"10.1016\/j.neucom.2020.10.046_b0045","doi-asserted-by":"crossref","first-page":"2815","DOI":"10.1109\/TMM.2019.2911428","article-title":"A gated peripheral-foveal convolutional neural network for unified image aesthetic prediction","volume":"21","author":"Zhang","year":"2019","journal-title":"IEEE Trans. Multimedia"},{"key":"10.1016\/j.neucom.2020.10.046_b0050","doi-asserted-by":"crossref","unstructured":"V. Hosu, B. Goldlucke, D. Saupe, Effective aesthetics prediction with multi-level spatially pooled features, in: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, 2019, pp. 9375\u20139383.","DOI":"10.1109\/CVPR.2019.00960"},{"key":"10.1016\/j.neucom.2020.10.046_b0055","series-title":"Proceedings of the ACM Conference on Multimedia Conference","first-page":"262","article-title":"Joint image and text representation for aesthetics analysis","author":"Zhou","year":"2016"},{"key":"10.1016\/j.neucom.2020.10.046_b0060","doi-asserted-by":"crossref","unstructured":"Y.-L. Hii, J. See, M. Kairanbay, L.-K. Wong, Multigap: Multi-pooled inception network with text augmentation for aesthetic prediction of photographs, in: Proceedings of IEEE International Conference on Image Processing, 2017, pp. 1722\u20131726.","DOI":"10.1109\/ICIP.2017.8296576"},{"key":"10.1016\/j.neucom.2020.10.046_b0065","unstructured":"H. Zhang, I. Goodfellow, D. Metaxas, A. Odena, Self-attention generative adversarial networks, arXiv preprint arXiv:1805.08318 (2018)."},{"key":"10.1016\/j.neucom.2020.10.046_b0070","doi-asserted-by":"crossref","unstructured":"Y. Ke, X. Tang, F. Jing, The design of high-level features for photo quality assessment, in: Proceedings of IEEE Computer Society Conference on Computer Vision and Pattern Recognition, Vol. 1, 2006, pp. 419\u2013426.","DOI":"10.1109\/CVPR.2006.303"},{"issue":"3","key":"10.1016\/j.neucom.2020.10.046_b0075","doi-asserted-by":"crossref","first-page":"1482","DOI":"10.1109\/TIP.2017.2651399","article-title":"Deep aesthetic quality assessment with semantic information","volume":"26","author":"Kao","year":"2017","journal-title":"IEEE Trans. Image Process."},{"issue":"10","key":"10.1016\/j.neucom.2020.10.046_b0080","doi-asserted-by":"crossref","first-page":"5100","DOI":"10.1109\/TIP.2018.2845100","article-title":"Leveraging expert feature knowledge for predicting image aesthetics","volume":"27","author":"Kucer","year":"2018","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.neucom.2020.10.046_b0085","doi-asserted-by":"crossref","unstructured":"X. Lu, Z. Lin, X. Shen, R. Mech, J.Z. Wang, Deep multi-patch aggregation network for image style, aesthetics, and quality estimation, in: Proceedings of IEEE International Conference on Computer Vision, 2015, pp. 990\u2013998.","DOI":"10.1109\/ICCV.2015.119"},{"key":"10.1016\/j.neucom.2020.10.046_b0090","doi-asserted-by":"crossref","unstructured":"S. Ma, J. Liu, C.W. Chen, A-lamp: Adaptive layout-aware multi-patch deep convolutional neural network for photo aesthetic assessment, in: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, 2017, pp. 722\u2013731.","DOI":"10.1109\/CVPR.2017.84"},{"key":"10.1016\/j.neucom.2020.10.046_b0095","doi-asserted-by":"crossref","unstructured":"L. Mai, H. Jin, F. Liu, Composition-preserving deep photo aesthetics assessment, in: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, 2016, pp. 497\u2013506.","DOI":"10.1109\/CVPR.2016.60"},{"key":"10.1016\/j.neucom.2020.10.046_b0100","doi-asserted-by":"crossref","unstructured":"Y. Luo, X. Tang, Photo and video quality evaluation: Focusing on the subject, in: Proceedings of European Conference on Computer Vision, 2008, pp. 386\u2013399.","DOI":"10.1007\/978-3-540-88690-7_29"},{"key":"10.1016\/j.neucom.2020.10.046_b0105","doi-asserted-by":"crossref","unstructured":"S. Kong, X. Shen, Z.L. Lin, R. Mech, C.C. Fowlkes, Photo aesthetics ranking network with attributes and content adaptation, in: Proceedings of 14th European Conference on Computer Vision, 2016, pp. 662\u2013679.","DOI":"10.1007\/978-3-319-46448-0_40"},{"key":"10.1016\/j.neucom.2020.10.046_b0110","series-title":"Proceedings of International Conference on Image Processing","first-page":"2291","article-title":"Image aesthetic predictors based on weighted cnns","author":"Jin","year":"2016"},{"issue":"11","key":"10.1016\/j.neucom.2020.10.046_b0115","doi-asserted-by":"crossref","first-page":"3092","DOI":"10.1109\/TCYB.2017.2758350","article-title":"Engineering deep representations for modeling aesthetic perception","volume":"48","author":"Chen","year":"2018","journal-title":"IEEE Trans. Cybern."},{"key":"10.1016\/j.neucom.2020.10.046_b0120","unstructured":"N. Murray, A. Gordo, A deep architecture for unified aesthetic prediction, arXiv preprint arXiv:1708.04890 (2017)."},{"issue":"8","key":"10.1016\/j.neucom.2020.10.046_b0125","doi-asserted-by":"crossref","first-page":"3998","DOI":"10.1109\/TIP.2018.2831899","article-title":"NIMA: neural image assessment","volume":"27","author":"Talebi","year":"2018","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.neucom.2020.10.046_b0130","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"77","article-title":"Predicting aesthetic score distribution through cumulative jensen-shannon divergence","author":"Jin","year":"2018"},{"key":"10.1016\/j.neucom.2020.10.046_b0135","article-title":"Beyond vision: A multimodal recurrent attention convolutional neural network for unified image aesthetic prediction tasks","author":"Zhang","year":"2020","journal-title":"IEEE Trans. Multimedia"},{"key":"10.1016\/j.neucom.2020.10.046_b0140","series-title":"Proceedings of the Conference on Empirical Methods in Natural Language Processing","article-title":"Convolutional neural networks for sentence classification","author":"Kim","year":"2014"},{"key":"10.1016\/j.neucom.2020.10.046_b0145","doi-asserted-by":"crossref","unstructured":"S. Lai, L. Xu, K. Liu, J. Zhao, Recurrent convolutional neural networks for text classification, in: Twenty-ninth AAAI Conference on Artificial Intelligence, 2015, pp. 2267\u20132273.","DOI":"10.1609\/aaai.v29i1.9513"},{"issue":"12","key":"10.1016\/j.neucom.2020.10.046_b0150","doi-asserted-by":"crossref","first-page":"5947","DOI":"10.1109\/TNNLS.2018.2817340","article-title":"Beyond bilinear: Generalized multimodal factorized high-order pooling for visual question answering","volume":"29","author":"Yu","year":"2018","journal-title":"IEEE Trans. Neural Netw. Learning Syst."},{"key":"10.1016\/j.neucom.2020.10.046_b0155","unstructured":"J. Lu, J. Yang, D. Batra, D. Parikh, Hierarchical question-image co-attention for visual question answering, in: Proceedings of Advances In Neural Information Processing Systems, 2016, pp. 289\u2013297."},{"key":"10.1016\/j.neucom.2020.10.046_b0160","doi-asserted-by":"crossref","unstructured":"Z. Yu, J. Yu, Y. Cui, D. Tao, Q. Tian, Deep modular co-attention networks for visual question answering, in: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, 2019, pp. 6281\u20136290.","DOI":"10.1109\/CVPR.2019.00644"},{"key":"10.1016\/j.neucom.2020.10.046_b0165","series-title":"Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing","first-page":"2249","article-title":"A decomposable attention model for natural language inference","author":"Parikh","year":"2016"},{"key":"10.1016\/j.neucom.2020.10.046_b0170","doi-asserted-by":"crossref","unstructured":"H. Hu, J. Gu, Z. Zhang, J. Dai, Y. Wei, Relation networks for object detection, in: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, 2018, pp. 3588\u20133597.","DOI":"10.1109\/CVPR.2018.00378"},{"key":"10.1016\/j.neucom.2020.10.046_b0175","doi-asserted-by":"crossref","unstructured":"Z. Yang, X. He, J. Gao, L. Deng, A.J. Smola, Stacked attention networks for image question answering, in: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, 2016, pp. 21\u201329.","DOI":"10.1109\/CVPR.2016.10"},{"key":"10.1016\/j.neucom.2020.10.046_b0180","doi-asserted-by":"crossref","unstructured":"L. Ye, M. Rochan, Z. Liu, Y. Wang, Cross-modal self-attention network for referring image segmentation, in: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, 2019, pp. 10502\u201310511.","DOI":"10.1109\/CVPR.2019.01075"},{"key":"10.1016\/j.neucom.2020.10.046_b0185","series-title":"Proceedings of the 3rd International Conference on Learning Representations","article-title":"Very deep convolutional networks for large-scale image recognition","author":"Simonyan","year":"2015"},{"key":"10.1016\/j.neucom.2020.10.046_b0190","series-title":"Proceedings of the Conference on Empirical Methods in Natural Language Processing","first-page":"1532","article-title":"Glove: Global vectors for word representation","author":"Pennington","year":"2014"},{"key":"10.1016\/j.neucom.2020.10.046_b0195","series-title":"Proceedings of the 26th Annual Conference on Neural Information Processing Systems","first-page":"1106","article-title":"Imagenet classification with deep convolutional neural networks","author":"Krizhevsky","year":"2012"},{"key":"10.1016\/j.neucom.2020.10.046_b0200","doi-asserted-by":"crossref","unstructured":"C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, Z. Wojna, Rethinking the inception architecture for computer vision, in: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, 2016, pp. 2818\u20132826.","DOI":"10.1109\/CVPR.2016.308"}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231220316076?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231220316076?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,8,16]],"date-time":"2024-08-16T10:55:17Z","timestamp":1723805717000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231220316076"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,3]]},"references-count":40,"alternative-id":["S0925231220316076"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2020.10.046","relation":{},"ISSN":["0925-2312"],"issn-type":[{"type":"print","value":"0925-2312"}],"subject":[],"published":{"date-parts":[[2021,3]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"MSCAN: Multimodal Self-and-Collaborative Attention Network for image aesthetic prediction tasks","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2020.10.046","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2020 Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}]}}