{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T02:11:57Z","timestamp":1743127917583,"version":"3.40.3"},"publisher-location":"Cham","reference-count":45,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030012489"},{"type":"electronic","value":"9783030012496"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-01249-6_32","type":"book-chapter","created":{"date-parts":[[2018,10,5]],"date-time":"2018-10-05T15:35:46Z","timestamp":1538753746000},"page":"527-543","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":59,"title":["\u201cFactual\u201d or \u201cEmotional\u201d: Stylized Image Captioning with Adaptive Learning and Attention"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6355-6474","authenticated-orcid":false,"given":"Tianlang","family":"Chen","sequence":"first","affiliation":[]},{"given":"Zhongping","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Quanzeng","family":"You","sequence":"additional","affiliation":[]},{"given":"Chen","family":"Fang","sequence":"additional","affiliation":[]},{"given":"Zhaowen","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Hailin","family":"Jin","sequence":"additional","affiliation":[]},{"given":"Jiebo","family":"Luo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,10,6]]},"reference":[{"key":"32_CR1","doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Bottom-up and top-down attention for image captioning and VQA. arXiv preprint arXiv:1707.07998 (2017)","DOI":"10.1109\/CVPR.2018.00636"},{"key":"32_CR2","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473 (2014)"},{"key":"32_CR3","unstructured":"Chen, X., et al.: Microsoft coco captions: data collection and evaluation server. arXiv preprint arXiv:1504.00325 (2015)"},{"key":"32_CR4","doi-asserted-by":"crossref","unstructured":"Chen, X., Lawrence Zitnick, C.: Mind\u2019s eye: a recurrent visual representation for image caption generation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2422\u20132431 (2015)","DOI":"10.1109\/CVPR.2015.7298856"},{"key":"32_CR5","doi-asserted-by":"crossref","unstructured":"Donahue, J., et al.: Long-term recurrent convolutional networks for visual recognition and description. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2625\u20132634 (2015)","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"32_CR6","unstructured":"Elliott, D., Keller, F.: Image description using visual dependency representations. In: Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing, pp. 1292\u20131302 (2013)"},{"key":"32_CR7","doi-asserted-by":"crossref","unstructured":"Fang, H., et al.: From captions to visual concepts and back (2015)","DOI":"10.1109\/CVPR.2015.7298754"},{"key":"32_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1007\/978-3-642-15561-1_2","volume-title":"Computer Vision \u2013 ECCV 2010","author":"A Farhadi","year":"2010","unstructured":"Farhadi, A., et al.: Every picture tells a story: generating sentences from images. In: Daniilidis, K., Maragos, P., Paragios, N. (eds.) ECCV 2010. LNCS, vol. 6314, pp. 15\u201329. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-15561-1_2"},{"key":"32_CR9","doi-asserted-by":"crossref","unstructured":"Gan, C., Gan, Z., He, X., Gao, J., Deng, L.: Stylenet: generating attractive visual captions with styles. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.108"},{"key":"32_CR10","doi-asserted-by":"crossref","unstructured":"Gatys, L.A., Ecker, A.S., Bethge, M.: Image style transfer using convolutional neural networks. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2414\u20132423. IEEE (2016)","DOI":"10.1109\/CVPR.2016.265"},{"key":"32_CR11","doi-asserted-by":"crossref","unstructured":"Gurari, D., et al.: Vizwiz grand challenge: answering visual questions from blind people. arXiv preprint arXiv:1802.08218 (2018)","DOI":"10.1109\/CVPR.2018.00380"},{"key":"32_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"32_CR13","unstructured":"Hermann, K.M., et al.: Teaching machines to read and comprehend. In: Advances in Neural Information Processing Systems, pp. 1693\u20131701 (2015)"},{"key":"32_CR14","doi-asserted-by":"publisher","first-page":"853","DOI":"10.1613\/jair.3994","volume":"47","author":"M Hodosh","year":"2013","unstructured":"Hodosh, M., Young, P., Hockenmaier, J.: Framing image description as a ranking task: data, models and evaluation metrics. J. Artif. Intell. Res. 47, 853\u2013899 (2013)","journal-title":"J. Artif. Intell. Res."},{"key":"32_CR15","unstructured":"Hu, Z., Yang, Z., Liang, X., Salakhutdinov, R., Xing, E.P.: Toward controlled generation of text. In: International Conference on Machine Learning, pp. 1587\u20131596 (2017)"},{"key":"32_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"694","DOI":"10.1007\/978-3-319-46475-6_43","volume-title":"Computer Vision \u2013 ECCV 2016","author":"J Johnson","year":"2016","unstructured":"Johnson, J., Alahi, A., Fei-Fei, L.: Perceptual losses for real-time style transfer and super-resolution. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9906, pp. 694\u2013711. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46475-6_43"},{"key":"32_CR17","doi-asserted-by":"crossref","unstructured":"Karpathy, A., Fei-Fei, L.: Deep visual-semantic alignments for generating image descriptions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3128\u20133137 (2015)","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"32_CR18","doi-asserted-by":"crossref","unstructured":"Kulkarni, G., et al.: Baby talk: understanding and generating image descriptions. In: Proceedings of the 24th CVPR. Citeseer (2011)","DOI":"10.1109\/CVPR.2011.5995466"},{"key":"32_CR19","unstructured":"Kuznetsova, P., Ordonez, V., Berg, A.C., Berg, T.L., Choi, Y.: Collective generation of natural image descriptions. In: Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics: Long Papers, vol. 1, pp. 359\u2013368. Association for Computational Linguistics (2012)"},{"issue":"1","key":"32_CR20","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1162\/tacl_a_00188","volume":"2","author":"P Kuznetsova","year":"2014","unstructured":"Kuznetsova, P., Ordonez, V., Berg, T., Choi, Y.: Treetalk: composition and compression of trees for image descriptions. Trans. Assoc. Comput. Linguist. 2(1), 351\u2013362 (2014)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"32_CR21","unstructured":"Lebret, R., Pinheiro, P.O., Collobert, R.: Simple image description generator via a linear phrase-based approach. arXiv preprint arXiv:1412.8419 (2014)"},{"key":"32_CR22","unstructured":"Li, S., Kulkarni, G., Berg, T.L., Berg, A.C., Choi, Y.: Composing simple image descriptions using web-scale n-grams. In: Proceedings of the Fifteenth Conference on Computational Natural Language Learning, pp. 220\u2013228. Association for Computational Linguistics (2011)"},{"key":"32_CR23","doi-asserted-by":"crossref","unstructured":"Li, Y., Yao, T., Mei, T., Chao, H., Rui, Y.: Share-and-chat: achieving human-level video commenting by search and multi-view embedding. In: Proceedings of the 2016 ACM on Multimedia Conference, pp. 928\u2013937. ACM (2016)","DOI":"10.1145\/2964284.2964320"},{"key":"32_CR24","doi-asserted-by":"crossref","unstructured":"Lu, J., Xiong, C., Parikh, D., Socher, R.: Knowing when to look: adaptive attention via a visual sentinel for image captioning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), vol. 6 (2017)","DOI":"10.1109\/CVPR.2017.345"},{"key":"32_CR25","unstructured":"Luong, M.T., Le, Q.V., Sutskever, I., Vinyals, O., Kaiser, L.: Multi-task sequence to sequence learning. arXiv preprint arXiv:1511.06114 (2015)"},{"key":"32_CR26","doi-asserted-by":"crossref","unstructured":"Mao, J., Wei, X., Yang, Y., Wang, J., Huang, Z., Yuille, A.L.: Learning like a child: fast novel visual concept learning from sentence descriptions of images. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2533\u20132541 (2015)","DOI":"10.1109\/ICCV.2015.291"},{"key":"32_CR27","unstructured":"Mao, J., Xu, W., Yang, Y., Wang, J., Huang, Z., Yuille, A.: Deep captioning with multimodal recurrent neural networks (M-RNN). arXiv preprint arXiv:1412.6632 (2014)"},{"key":"32_CR28","doi-asserted-by":"crossref","unstructured":"Mathews, A.P., Xie, L., He, X.: SentiCap: generating image descriptions with sentiments. In: AAAI, pp. 3574\u20133580 (2016)","DOI":"10.1609\/aaai.v30i1.10475"},{"key":"32_CR29","unstructured":"Mnih, V., Heess, N., Graves, A., et al.: Recurrent models of visual attention. In: Advances in Neural Information Processing Systems, pp. 2204\u20132212 (2014)"},{"key":"32_CR30","unstructured":"Neumann, L., Neumann, A.: Color style transfer techniques using hue, lightness and saturation histogram matching. In: Computational Aesthetics, pp. 111\u2013122. Citeseer (2005)"},{"key":"32_CR31","unstructured":"Ordonez, V., Kulkarni, G., Berg, T.L.: Im2Text: describing images using 1 million captioned photographs. In: Advances in Neural Information Processing Systems, pp. 1143\u20131151 (2011)"},{"key":"32_CR32","unstructured":"Rockt\u00e4schel, T., Grefenstette, E., Hermann, K.M., Ko\u010disk\u1ef3, T., Blunsom, P.: Reasoning about entailment with neural attention. arXiv preprint arXiv:1509.06664 (2015)"},{"key":"32_CR33","doi-asserted-by":"crossref","unstructured":"Rush, A.M., Chopra, S., Weston, J.: A neural attention model for abstractive sentence summarization. arXiv preprint arXiv:1509.00685 (2015)","DOI":"10.18653\/v1\/D15-1044"},{"key":"32_CR34","unstructured":"Schweikert, G., R\u00e4tsch, G., Widmer, C., Sch\u00f6lkopf, B.: An empirical analysis of domain adaptation algorithms for genomic sequence analysis. In: Advances in Neural Information Processing Systems, pp. 1433\u20131440 (2009)"},{"key":"32_CR35","unstructured":"Shen, T., Lei, T., Barzilay, R., Jaakkola, T.: Style transfer from non-parallel text by cross-alignment. In: Advances in Neural Information Processing Systems, pp. 6833\u20136844 (2017)"},{"key":"32_CR36","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"issue":"2","key":"32_CR37","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1162\/089892904322984526","volume":"16","author":"MW Spratling","year":"2004","unstructured":"Spratling, M.W., Johnson, M.H.: A feedback model of visual attention. J. Cogn. Neurosci. 16(2), 219\u2013237 (2004)","journal-title":"J. Cogn. Neurosci."},{"key":"32_CR38","unstructured":"Sutskever, I., Vinyals, O., Le, Q.V.: Sequence to sequence learning with neural networks. In: Advances in Neural Information Processing Systems, pp. 3104\u20133112 (2014)"},{"key":"32_CR39","unstructured":"Tang, Y., Srivastava, N., Salakhutdinov, R.R.: Learning generative models with visual attention. In: Advances in Neural Information Processing Systems, pp. 1808\u20131816 (2014)"},{"key":"32_CR40","doi-asserted-by":"crossref","unstructured":"Tran, K., He, X., Zhang, L., Sun, J.: Rich image captioning in the wild. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), pp. 434\u2013441. IEEE (2016)","DOI":"10.1109\/CVPRW.2016.61"},{"key":"32_CR41","unstructured":"Ulyanov, D., Lebedev, V., Vedaldi, A., Lempitsky, V.S.: Texture networks: feed-forward synthesis of textures and stylized images. In: ICML, pp. 1349\u20131357 (2016)"},{"key":"32_CR42","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., Erhan, D.: Show and tell: a neural image caption generator. In: 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3156\u20133164. IEEE (2015)","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"32_CR43","unstructured":"Wu, Z.Y.Y.Y.Y., Cohen, R.S.W.W.: Encode, review, and decode: Reviewer module for caption generation. arXiv preprint arXiv:1605.07912 (2016)"},{"key":"32_CR44","unstructured":"Xu, K., et al.: Show, attend and tell: neural image caption generation with visual attention. In: International Conference on Machine Learning, pp. 2048\u20132057 (2015)"},{"key":"32_CR45","doi-asserted-by":"crossref","unstructured":"You, Q., Jin, H., Wang, Z., Fang, C., Luo, J.: Image captioning with semantic attention. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4651\u20134659 (2016)","DOI":"10.1109\/CVPR.2016.503"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-01249-6_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,5]],"date-time":"2022-10-05T00:56:37Z","timestamp":1664931397000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-01249-6_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030012489","9783030012496"],"references-count":45,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-01249-6_32","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"6 October 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}