{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,23]],"date-time":"2024-09-23T04:16:01Z","timestamp":1727064961448},"reference-count":63,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,10,1]],"date-time":"2019-10-01T00:00:00Z","timestamp":1569888000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,10,1]],"date-time":"2019-10-01T00:00:00Z","timestamp":1569888000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,10,1]],"date-time":"2019-10-01T00:00:00Z","timestamp":1569888000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,10]]},"DOI":"10.1109\/iccv.2019.01041","type":"proceedings-article","created":{"date-parts":[[2020,2,28]],"date-time":"2020-02-28T10:27:52Z","timestamp":1582885672000},"source":"Crossref","is-referenced-by-count":207,"title":["Relation-Aware Graph Attention Network for Visual Question Answering"],"prefix":"10.1109","author":[{"given":"Linjie","family":"Li","sequence":"first","affiliation":[]},{"given":"Zhe","family":"Gan","sequence":"additional","affiliation":[]},{"given":"Yu","family":"Cheng","sequence":"additional","affiliation":[]},{"given":"Jingjing","family":"Liu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","article-title":"Learning conditioned graph structures for interpretable visual question answering","author":"norcliffe-brown","year":"2018","journal-title":"NeurIPS"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.232"},{"key":"ref33","article-title":"Hierarchical question-image co-attention for visual question answering","author":"lu","year":"2016","journal-title":"NIPS"},{"key":"ref32","article-title":"Visual relationship detection with language priors","author":"lu","year":"2016","journal-title":"ECCV"},{"key":"ref31","article-title":"Microsoft coco: Common objects in context","author":"lin","year":"2014","journal-title":"ECCV"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1164"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.119"},{"key":"ref36","article-title":"Learning visual question answering by bootstrapping hard attention","author":"malinowski","year":"2018","journal-title":"ECCV"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00729"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220036"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2817340"},{"key":"ref62","article-title":"Learning to count objects in natural images for visual question answering","author":"zhang","year":"2018","journal-title":"ICLRE"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.331"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.145"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"ref27","article-title":"Visual genome: Connecting language and vision using crowdsourced dense image annotations","author":"krishna","year":"2016","journal-title":"arXiv preprint arXiv 1602 07332"},{"key":"ref29","article-title":"Incorporating external knowledge to answer open-domain visual questions with dynamic memory networks","author":"li","year":"2018","journal-title":"CVPR"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00522"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref22","article-title":"Pythia v0. 1: the winning entry to the vqa challenge 2018","author":"jiang","year":"2018","journal-title":"arXiv preprint arXiv 1807 09956"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00378"},{"key":"ref24","article-title":"Bilinear attention networks","author":"kim","year":"2018","journal-title":"NeurIPS"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298990"},{"key":"ref26","article-title":"Semi-supervised classification with graph convolutional networks","author":"kipf","year":"2017","journal-title":"ICLRE"},{"key":"ref25","article-title":"Hadamard product for low-rank bilinear pooling","author":"kim","year":"2017","journal-title":"ICLRE"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.344"},{"key":"ref51","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"NIPS"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.446"},{"key":"ref58","article-title":"Exploring visual relationship for image captioning","author":"yao","year":"2018","journal-title":"ECCV"},{"key":"ref57","article-title":"Multi-modal learning with prior visual relation reasoning","author":"yang","year":"2018","journal-title":"arXiv preprint arXiv 1812 02588"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.10"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.500"},{"key":"ref54","article-title":"Image captioning and visual question answering based on attributes and external knowledge","author":"wu","year":"2017","journal-title":"PAMI"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.416"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00206"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206532"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00118"},{"key":"ref40","author":"paszke","year":"2017","journal-title":"On Automatic Differentiation"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298754"},{"key":"ref13","article-title":"Every picture tells a story: Generating sentences from images","author":"farhadi","year":"2010","journal-title":"ECCV"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2009.167"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1044"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2008.4587799"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-008-0140-x"},{"key":"ref18","article-title":"Accurate, large minibatch sgd: training imagenet in 1 hour","author":"goyal","year":"2017","journal-title":"arXiv preprint arXiv 1706 02677"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.670"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.285"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00209"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/0010-0285(82)90007-X"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.352"},{"key":"ref7","article-title":"A tree-based context model for object recognition","author":"choi","year":"2012","journal-title":"PAMI"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00444"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.412"},{"key":"ref46","article-title":"A simple neural network module for relational reasoning","author":"santoro","year":"2017","journal-title":"NIPS"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995711"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00807"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W15-2812"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00801"},{"key":"ref44","article-title":"Faster r-cnn: Towards real-time object detection with region proposal networks","author":"ren","year":"2015","journal-title":"NIPS"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298713"}],"event":{"name":"2019 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Seoul, Korea (South)","start":{"date-parts":[[2019,10,27]]},"end":{"date-parts":[[2019,11,2]]}},"container-title":["2019 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8972782\/9008105\/09010056.pdf?arnumber=9010056","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,17]],"date-time":"2022-07-17T21:55:42Z","timestamp":1658094942000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9010056\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,10]]},"references-count":63,"URL":"https:\/\/doi.org\/10.1109\/iccv.2019.01041","relation":{},"subject":[],"published":{"date-parts":[[2019,10]]}}}