{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,4,26]],"date-time":"2024-04-26T16:40:12Z","timestamp":1714149612272},"reference-count":39,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2024,8]]},"DOI":"10.1016\/j.patcog.2024.110438","type":"journal-article","created":{"date-parts":[[2024,3,16]],"date-time":"2024-03-16T16:02:44Z","timestamp":1710604964000},"page":"110438","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"title":["Adaptive multi-text union for stable text-to-image synthesis learning"],"prefix":"10.1016","volume":"152","author":[{"given":"Yan","family":"Zhou","sequence":"first","affiliation":[]},{"given":"Jiechang","family":"Qian","sequence":"additional","affiliation":[]},{"given":"Huaidong","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Xuemiao","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Huajie","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Fanzhi","family":"Zeng","sequence":"additional","affiliation":[]},{"given":"Yuexia","family":"Zhou","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2024.110438_b1","doi-asserted-by":"crossref","first-page":"1275","DOI":"10.1109\/TIP.2020.3026728","article-title":"KT-GAN: Knowledge-transfer generative adversarial network for text-to-image synthesis","volume":"30","author":"Tan","year":"2020","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.patcog.2024.110438_b2","doi-asserted-by":"crossref","unstructured":"Y. Li, H. Liu, Q. Wu, F. Mu, J. Yang, J. Gao, C. Li, Y.J. Lee, Gligen: Open-set grounded text-to-image generation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 22511\u201322521.","DOI":"10.1109\/CVPR52729.2023.02156"},{"key":"10.1016\/j.patcog.2024.110438_b3","doi-asserted-by":"crossref","unstructured":"H. Zhang, T. Xu, H. Li, S. Zhang, X. Wang, X. Huang, D.N. Metaxas, Stackgan: Text to photo-realistic image synthesis with stacked generative adversarial networks, in: Proc. IEEE Conf. Int. Conf. Comput. Vis., 2017, pp. 5907\u20135915.","DOI":"10.1109\/ICCV.2017.629"},{"issue":"8","key":"10.1016\/j.patcog.2024.110438_b4","doi-asserted-by":"crossref","first-page":"1947","DOI":"10.1109\/TPAMI.2018.2856256","article-title":"Stackgan++: Realistic image synthesis with stacked generative adversarial networks","volume":"41","author":"Zhang","year":"2018","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.patcog.2024.110438_b5","doi-asserted-by":"crossref","DOI":"10.1016\/j.jvcir.2020.102955","article-title":"Text to photo-realistic image synthesis via chained deep recurrent generative adversarial network","volume":"74","author":"Wang","year":"2021","journal-title":"J. Vis. Commun. Image Represent."},{"key":"10.1016\/j.patcog.2024.110438_b6","doi-asserted-by":"crossref","unstructured":"S. Hong, D. Yang, J. Choi, H. Lee, Inferring semantic layout for hierarchical text-to-image synthesis, in: Proc. IEEE Conf. Comput. Vis. Pattern Recognit., 2018, pp. 7986\u20137994.","DOI":"10.1109\/CVPR.2018.00833"},{"key":"10.1016\/j.patcog.2024.110438_b7","series-title":"Proc. IEEE Conf. Eur. Conf. Comput. Vis.","first-page":"740","article-title":"Microsoft coco: Common objects in context","author":"Lin","year":"2014"},{"key":"10.1016\/j.patcog.2024.110438_b8","article-title":"Attention is all you need","volume":"vol. 30","author":"Vaswani","year":"2017"},{"key":"10.1016\/j.patcog.2024.110438_b9","doi-asserted-by":"crossref","unstructured":"T. Xu, P. Zhang, Q. Huang, H. Zhang, Z. Gan, X. Huang, X. He, Attngan: Fine-grained text to image generation with attentional generative adversarial networks, in: Proc. IEEE Conf. Comput. Vis. Pattern Recognit., 2018, pp. 1316\u20131324.","DOI":"10.1109\/CVPR.2018.00143"},{"key":"10.1016\/j.patcog.2024.110438_b10","doi-asserted-by":"crossref","unstructured":"T. Qiao, J. Zhang, D. Xu, D. Tao, Mirrorgan: Learning text-to-image generation by redescription, in: Proc. IEEE Conf. Comput. Vis. Pattern Recognit., 2019, pp. 1505\u20131514.","DOI":"10.1109\/CVPR.2019.00160"},{"key":"10.1016\/j.patcog.2024.110438_b11","doi-asserted-by":"crossref","unstructured":"S. Ruan, Y. Zhang, K. Zhang, Y. Fan, F. Tang, Q. Liu, E. Chen, Dae-gan: Dynamic aspect-aware gan for text-to-image synthesis, in: Proc. IEEE Conf. Int. Conf. Comput. Vis., 2021, pp. 13960\u201313969.","DOI":"10.1109\/ICCV48922.2021.01370"},{"key":"10.1016\/j.patcog.2024.110438_b12","doi-asserted-by":"crossref","unstructured":"H. Zhang, J.Y. Koh, J. Baldridge, H. Lee, Y. Yang, Cross-modal contrastive learning for text-to-image generation, in: Proc. IEEE Conf. Comput. Vis. Pattern Recognit., 2021, pp. 833\u2013842.","DOI":"10.1109\/CVPR46437.2021.00089"},{"key":"10.1016\/j.patcog.2024.110438_b13","series-title":"The Caltech-Ucsd Birds-200\u20132011 Dataset","author":"Wah","year":"2011"},{"key":"10.1016\/j.patcog.2024.110438_b14","doi-asserted-by":"crossref","unstructured":"Z. Zhang, Y. Xie, L. Yang, Photographic text-to-image synthesis with a hierarchically-nested adversarial network, in: Proc. IEEE Conf. Comput. Vis. Pattern Recognit., 2018, pp. 6199\u20136208.","DOI":"10.1109\/CVPR.2018.00649"},{"key":"10.1016\/j.patcog.2024.110438_b15","doi-asserted-by":"crossref","unstructured":"M. Kang, J.-Y. Zhu, R. Zhang, J. Park, E. Shechtman, S. Paris, T. Park, Scaling up gans for text-to-image synthesis, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 10124\u201310134.","DOI":"10.1109\/CVPR52729.2023.00976"},{"key":"10.1016\/j.patcog.2024.110438_b16","article-title":"Blip-diffusion: Pre-trained subject representation for controllable text-to-image generation and editing","volume":"36","author":"Li","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2024.110438_b17","series-title":"International Conference on Machine Learning","first-page":"1060","article-title":"Generative adversarial text to image synthesis","author":"Reed","year":"2016"},{"key":"10.1016\/j.patcog.2024.110438_b18","article-title":"Learning what and where to draw","volume":"vol. 29","author":"Reed","year":"2016"},{"key":"10.1016\/j.patcog.2024.110438_b19","doi-asserted-by":"crossref","unstructured":"A. Nguyen, J. Clune, Y. Bengio, A. Dosovitskiy, J. Yosinski, Plug and play generative networks: Conditional iterative generation of images in latent space, in: Proc. IEEE Conf. Comput. Vis. Pattern Recognit., 2017, pp. 4467\u20134477.","DOI":"10.1109\/CVPR.2017.374"},{"key":"10.1016\/j.patcog.2024.110438_b20","series-title":"Tac-gan-text conditioned auxiliary classifier generative adversarial network","author":"Dash","year":"2017"},{"key":"10.1016\/j.patcog.2024.110438_b21","doi-asserted-by":"crossref","unstructured":"G. Yin, B. Liu, L. Sheng, N. Yu, X. Wang, J. Shao, Semantics disentangling for text-to-image generation, in: Proc. IEEE Conf. Comput. Vis. Pattern Recognit., 2019, pp. 2327\u20132336.","DOI":"10.1109\/CVPR.2019.00243"},{"key":"10.1016\/j.patcog.2024.110438_b22","doi-asserted-by":"crossref","unstructured":"M. Zhu, P. Pan, W. Chen, Y. Yang, Dm-gan: Dynamic memory generative adversarial networks for text-to-image synthesis, in: Proc. IEEE Conf. Comput. Vis. Pattern Recognit., 2019, pp. 5802\u20135810.","DOI":"10.1109\/CVPR.2019.00595"},{"key":"10.1016\/j.patcog.2024.110438_b23","doi-asserted-by":"crossref","unstructured":"W. Li, P. Zhang, L. Zhang, Q. Huang, X. He, S. Lyu, J. Gao, Object-driven text-to-image synthesis via adversarial training, in: Proc. IEEE Conf. Comput. Vis. Pattern Recognit., 2019, pp. 12174\u201312182.","DOI":"10.1109\/CVPR.2019.01245"},{"key":"10.1016\/j.patcog.2024.110438_b24","doi-asserted-by":"crossref","DOI":"10.1016\/j.dsp.2020.102866","article-title":"Cross-modal feature alignment based hybrid attentional generative adversarial networks for text-to-image synthesis","volume":"107","author":"Cheng","year":"2020","journal-title":"Digit. Signal Process."},{"key":"10.1016\/j.patcog.2024.110438_b25","article-title":"Deep multiview union learning network for multisource image classification","author":"Liu","year":"2020","journal-title":"IEEE Trans. Cybern."},{"issue":"12","key":"10.1016\/j.patcog.2024.110438_b26","doi-asserted-by":"crossref","first-page":"10062","DOI":"10.1109\/TGRS.2020.3047130","article-title":"Multisource remote sensing data classification with graph fusion network","volume":"59","author":"Du","year":"2021","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.patcog.2024.110438_b27","doi-asserted-by":"crossref","first-page":"2798","DOI":"10.1109\/TIP.2021.3055062","article-title":"Multi-sentence auxiliary adversarial networks for fine-grained text-to-image synthesis","volume":"30","author":"Yang","year":"2021","journal-title":"IEEE Trans. Image Process."},{"issue":"11","key":"10.1016\/j.patcog.2024.110438_b28","doi-asserted-by":"crossref","first-page":"2673","DOI":"10.1109\/78.650093","article-title":"Bidirectional recurrent neural networks","volume":"45","author":"Schuster","year":"1997","journal-title":"IEEE Trans. Signal Process."},{"key":"10.1016\/j.patcog.2024.110438_b29","doi-asserted-by":"crossref","unstructured":"J. Wang, W. Jiang, L. Ma, W. Liu, Y. Xu, Bidirectional attentive fusion with context gating for dense video captioning, in: Proc. IEEE Conf. Comput. Vis. Pattern Recognit., 2018, pp. 7190\u20137198.","DOI":"10.1109\/CVPR.2018.00751"},{"key":"10.1016\/j.patcog.2024.110438_b30","doi-asserted-by":"crossref","unstructured":"J. Cheng, F. Wu, Y. Tian, L. Wang, D. Tao, RiFeGAN: Rich feature generation for text-to-image synthesis from prior knowledge, in: Proc. IEEE Conf. Comput. Vis. Pattern Recognit., 2020, pp. 10911\u201310920.","DOI":"10.1109\/CVPR42600.2020.01092"},{"key":"10.1016\/j.patcog.2024.110438_b31","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2020.107573","article-title":"Unsupervised text-to-image synthesis","volume":"110","author":"Dong","year":"2021","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2024.110438_b32","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1016\/j.patrec.2022.04.010","article-title":"Text-to-image synthesis with self-supervised learning","volume":"157","author":"Tan","year":"2022","journal-title":"Pattern Recognit. Lett."},{"key":"10.1016\/j.patcog.2024.110438_b33","series-title":"Proc. IEEE Conf. Eur. Conf. Comput. Vis.","first-page":"491","article-title":"Cpgan: Content-parsing generative adversarial networks for text-to-image synthesis","author":"Liang","year":"2020"},{"key":"10.1016\/j.patcog.2024.110438_b34","doi-asserted-by":"crossref","unstructured":"H. Wang, G. Lin, S.C. Hoi, C. Miao, Cycle-consistent inverse gan for text-to-image synthesis, in: Proceedings of the 29th ACM International Conference on Multimedia, 2021, pp. 630\u2013638.","DOI":"10.1145\/3474085.3475226"},{"key":"10.1016\/j.patcog.2024.110438_b35","doi-asserted-by":"crossref","unstructured":"Y. Qiao, Q. Chen, C. Deng, N. Ding, Y. Qi, M. Tan, X. Ren, Q. Wu, R-GAN: Exploring Human-like Way for Reasonable Text-to-Image Synthesis via Generative Adversarial Networks, in: Proceedings of the 29th ACM International Conference on Multimedia, 2021, pp. 2085\u20132093.","DOI":"10.1145\/3474085.3475363"},{"key":"10.1016\/j.patcog.2024.110438_b36","series-title":"Df-gan: Deep fusion generative adversarial networks for text-to-image synthesis","author":"Tao","year":"2020"},{"key":"10.1016\/j.patcog.2024.110438_b37","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2020.107384","article-title":"Lightweight dynamic conditional GAN with pyramid attention for text-to-image synthesis","volume":"110","author":"Gao","year":"2021","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2024.110438_b38","article-title":"DR-GAN: Distribution regularization for text-to-image generation","author":"Tan","year":"2022","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.patcog.2024.110438_b39","doi-asserted-by":"crossref","unstructured":"S. Gu, D. Chen, J. Bao, F. Wen, B. Zhang, D. Chen, L. Yuan, B. Guo, Vector quantized diffusion model for text-to-image synthesis, in: Proc. IEEE Conf. Comput. Vis. Pattern Recognit., 2022, pp. 10696\u201310706.","DOI":"10.1109\/CVPR52688.2022.01043"}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320324001894?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320324001894?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,4,26]],"date-time":"2024-04-26T16:10:07Z","timestamp":1714147807000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320324001894"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8]]},"references-count":39,"alternative-id":["S0031320324001894"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2024.110438","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2024,8]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Adaptive multi-text union for stable text-to-image synthesis learning","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2024.110438","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2024 Elsevier Ltd. All rights reserved.","name":"copyright","label":"Copyright"}],"article-number":"110438"}}