{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,6]],"date-time":"2024-09-06T09:30:25Z","timestamp":1725615025638},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,7,18]],"date-time":"2019-07-18T00:00:00Z","timestamp":1563408000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Zhejiang Natural Science Foundation","award":["LR19F020002"]},{"name":"Key R&D Program of Zhejiang Province","award":["2018C01006"]},{"name":"Joint Research Program of ZJU & Hikvision Research Institute"},{"name":"National Natural Science Foundation of China","award":["No.61602405, No.61836002, No.U1611461, No.61751209"]},{"name":"Chinese Knowledge Center for Engineering Sciences and Technology"},{"name":"Alibaba Innovative Research"},{"name":"The Fundamental Research Funds for the Central Universities"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,7,18]]},"DOI":"10.1145\/3331184.3331240","type":"proceedings-article","created":{"date-parts":[[2019,7,19]],"date-time":"2019-07-19T17:40:26Z","timestamp":1563558026000},"page":"465-474","update-policy":"http:\/\/dx.doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Video Dialog via Multi-Grained Convolutional Self-Attention Context Networks"],"prefix":"10.1145","author":[{"given":"Weike","family":"Jin","sequence":"first","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"given":"Zhou","family":"Zhao","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"given":"Mao","family":"Gu","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"given":"Jun","family":"Yu","sequence":"additional","affiliation":[{"name":"Hangzhou Dianzi University, Hangzhou, China"}]},{"given":"Jun","family":"Xiao","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"given":"Yueting","family":"Zhuang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2019,7,18]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"volume-title":"Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics . 190--200","year":"2011","author":"Chen David L","key":"e_1_3_2_2_2_1"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-4012"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.121"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.321"},{"volume-title":"Language modeling with gated convolutional networks. arXiv preprint arXiv:1612.08083","year":"2016","author":"Dauphin Yann N","key":"e_1_3_2_2_6_1"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.475"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00688"},{"volume-title":"Long short-term memory. Neural computation","year":"1997","author":"Hochreiter Sepp","key":"e_1_3_2_2_9_1"},{"volume-title":"Abhishek Das, et almbox.","year":"2018","author":"Hori Chiori","key":"e_1_3_2_2_10_1"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00603"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.149"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"e_1_3_2_2_14_1","unstructured":"Jin-Hwa Kim Sang-Woo Lee Donghyun Kwak Min-Oh Heo Jeonghee Kim Jung-Woo Ha and Byoung-Tak Zhang. 2016. Multimodal residual learning for visual qa. In Advances in Neural Information Processing Systems. 361--369. Jin-Hwa Kim Sang-Woo Lee Donghyun Kwak Min-Oh Heo Jeonghee Kim Jung-Woo Ha and Byoung-Tak Zhang. 2016. Multimodal residual learning for visual qa. In Advances in Neural Information Processing Systems. 361--369."},{"key":"e_1_3_2_2_15_1","unstructured":"DP Kingma and J Ba. {n.d.}. DP Kingma and J. Ba Adam: A method for stochastic optimization arXiv: 1412.6980. Adam: A Method for Stochastic Optimization ( {n. d.}). DP Kingma and J Ba. {n.d.}. DP Kingma and J. Ba Adam: A method for stochastic optimization arXiv: 1412.6980. Adam: A Method for Stochastic Optimization ( {n. d.})."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01267-0_10"},{"key":"e_1_3_2_2_17_1","unstructured":"Sang-Woo Lee Yu-Jung Heo and Byoung-Tak Zhang. 2018. Answerer in Questioner's Mind: Information Theoretic Approach to Goal-Oriented Visual Dialog. In Advances in Neural Information Processing Systems. 2580--2590. Sang-Woo Lee Yu-Jung Heo and Byoung-Tak Zhang. 2018. Answerer in Questioner's Mind: Information Theoretic Approach to Goal-Oriented Visual Dialog. In Advances in Neural Information Processing Systems. 2580--2590."},{"volume-title":"Training rnns as fast as cnns. arXiv preprint arXiv:1709.02755","year":"2017","author":"Lei Tao","key":"e_1_3_2_2_18_1"},{"volume-title":"Focal Visual-Text Attention for Visual Question Answering. In IEEE Conference on Computer Vision and Pattern Recognition. 6135--6143","year":"2018","author":"Liang Junwei","key":"e_1_3_2_2_19_1"},{"volume-title":"Mo Yu, Bing Xiang, Bowen Zhou, and Yoshua Bengio.","year":"2017","author":"Lin Zhouhan","key":"e_1_3_2_2_20_1"},{"key":"e_1_3_2_2_21_1","unstructured":"Jiasen Lu Anitha Kannan Jianwei Yang Devi Parikh and Dhruv Batra. 2017. Best of both worlds: Transferring knowledge from discriminative learning to a generative visual dialog model. In Advances in Neural Information Processing Systems. 314--324. Jiasen Lu Anitha Kannan Jianwei Yang Devi Parikh and Dhruv Batra. 2017. Best of both worlds: Transferring knowledge from discriminative learning to a generative visual dialog model. In Advances in Neural Information Processing Systems. 314--324."},{"key":"e_1_3_2_2_22_1","unstructured":"Mateusz Malinowski and Mario Fritz. 2014. A multi-world approach to question answering about real-world scenes based on uncertain input. In Advances in neural information processing systems. 1682--1690. Mateusz Malinowski and Mario Fritz. 2014. A multi-world approach to question answering about real-world scenes based on uncertain input. In Advances in neural information processing systems. 1682--1690."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.9"},{"volume-title":"IEEE Conference on Computer Vision and Pattern Recognition. 6097--6105","year":"2018","author":"Massiceti Daniela","key":"e_1_3_2_2_24_1"},{"volume-title":"Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781","year":"2013","author":"Mikolov Tomas","key":"e_1_3_2_2_25_1"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.80"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00637"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.11"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-11752-2_15"},{"key":"e_1_3_2_2_31_1","unstructured":"Paul Hongsuck Seo Andreas Lehrmann Bohyung Han and Leonid Sigal. 2017. Visual reference resolution using attention memory for visual dialog. In Advances in neural information processing systems. 3719--3729. Paul Hongsuck Seo Andreas Lehrmann Bohyung Han and Leonid Sigal. 2017. Visual reference resolution using attention memory for visual dialog. In Advances in neural information processing systems. 3719--3729."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"crossref","unstructured":"Iulian Vlad Serban Tim Klinger Gerald Tesauro Kartik Talamadupula Bowen Zhou Yoshua Bengio and Aaron C Courville. 2017a. Multiresolution Recurrent Neural Networks: An Application to Dialogue Response Generation.. In AAAI. 3288--3294. Iulian Vlad Serban Tim Klinger Gerald Tesauro Kartik Talamadupula Bowen Zhou Yoshua Bengio and Aaron C Courville. 2017a. Multiresolution Recurrent Neural Networks: An Application to Dialogue Response Generation.. In AAAI. 3288--3294.","DOI":"10.1609\/aaai.v31i1.10984"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"crossref","unstructured":"Iulian Vlad Serban Alessandro Sordoni Yoshua Bengio Aaron C Courville and Joelle Pineau. 2016. Building End-To-End Dialogue Systems Using Generative Hierarchical Neural Network Models.. In AAAI. 3776--3784. Iulian Vlad Serban Alessandro Sordoni Yoshua Bengio Aaron C Courville and Joelle Pineau. 2016. Building End-To-End Dialogue Systems Using Generative Hierarchical Neural Network Models.. In AAAI. 3776--3784.","DOI":"10.1609\/aaai.v30i1.9883"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"crossref","unstructured":"Iulian Vlad Serban Alessandro Sordoni Ryan Lowe Laurent Charlin Joelle Pineau Aaron C Courville and Yoshua Bengio. 2017b. A Hierarchical Latent Variable Encoder-Decoder Model for Generating Dialogues.. In AAAI . 3295--3301. Iulian Vlad Serban Alessandro Sordoni Ryan Lowe Laurent Charlin Joelle Pineau Aaron C Courville and Yoshua Bengio. 2017b. A Hierarchical Latent Variable Encoder-Decoder Model for Generating Dialogues.. In AAAI . 3295--3301.","DOI":"10.1609\/aaai.v31i1.10983"},{"volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","year":"2014","author":"Simonyan Karen","key":"e_1_3_2_2_35_1"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"e_1_3_2_2_37_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information Processing Systems. 5998--6008. Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information Processing Systems. 5998--6008."},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.515"},{"key":"e_1_3_2_2_39_1","unstructured":"Jason E Weston. 2016. Dialog-based language learning. In Advances in Neural Information Processing Systems. 829--837. Jason E Weston. 2016. Dialog-based language learning. In Advances in Neural Information Processing Systems. 829--837."},{"volume-title":"IEEE Conference on Computer Vision and Pattern Recognition","year":"2018","author":"Wu Qi","key":"e_1_3_2_2_40_1"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1046"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080655"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.446"},{"volume-title":"Juan Carlos Niebles, and Min Sun","year":"2017","author":"Zeng Kuo-Hao","key":"e_1_3_2_2_44_1"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.5555\/3304222.3304281"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.5555\/3172077.3172381"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P15-2117"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.540"}],"event":{"name":"SIGIR '19: The 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Paris France","acronym":"SIGIR '19"},"container-title":["Proceedings of the 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3331184.3331240","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,7]],"date-time":"2023-01-07T21:00:49Z","timestamp":1673125249000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3331184.3331240"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,7,18]]},"references-count":48,"alternative-id":["10.1145\/3331184.3331240","10.1145\/3331184"],"URL":"https:\/\/doi.org\/10.1145\/3331184.3331240","relation":{},"subject":[],"published":{"date-parts":[[2019,7,18]]},"assertion":[{"value":"2019-07-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}