{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T14:37:09Z","timestamp":1730212629068,"version":"3.28.0"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1109\/cvpr52729.2023.01363","type":"proceedings-article","created":{"date-parts":[[2023,8,22]],"date-time":"2023-08-22T17:30:52Z","timestamp":1692725452000},"page":"14183-14192","source":"Crossref","is-referenced-by-count":10,"title":["N\u00dcWA-LIP: Language-guided Image Inpainting with Defect-free VQGAN"],"prefix":"10.1109","author":[{"given":"Minheng","family":"Ni","sequence":"first","affiliation":[{"name":"Harbin Institute of Technology"}]},{"given":"Xiaoming","family":"Li","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology"}]},{"given":"Wangmeng","family":"Zuo","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology"}]}],"member":"263","reference":[{"journal-title":"Paint by word","year":"2021","author":"Bau","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/344779.344972"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01103"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"journal-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","year":"2018","author":"Devlin","key":"ref5"},{"journal-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale","year":"2020","author":"Dosovitskiy","key":"ref6"},{"key":"ref7","article-title":"Imagebart: Bidirectional context with multinomial diffusion for autoregressive image synthesis","volume":"34","author":"Esser","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.5555\/2969033.2969125"},{"key":"ref10","article-title":"Gans trained by a two time-scale update rule converge to a local nash equilibrium","volume":"30","author":"Heusel","year":"2017","journal-title":"Advances in neural information processing systems"},{"journal-title":"Adam: A method for stochastic optimization","year":"2014","author":"Kingma","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1405.0312"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_6"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01117"},{"journal-title":"Glide: Towards photorealistic image generation and editing with text-guided diffusion models","year":"2021","author":"Nichol","key":"ref16"},{"journal-title":"Neural discrete representation learning","year":"2017","author":"van den Oord","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01063"},{"journal-title":"Zero-shot text-to-image generation","year":"2021","author":"Ramesh","key":"ref19"},{"key":"ref20","first-page":"14866","article-title":"Generating diverse high-fidelity images with vq-vae-2","volume-title":"Advances in neural information processing systems","author":"Razavi","year":"2019"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1238"},{"journal-title":"Mass: Masked sequence to sequence pre-training for language generation","year":"2019","author":"Song","key":"ref23"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00323"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19787-1_41"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475506"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00457"},{"journal-title":"Scaling autoregressive models for content-rich text-to-image generation","year":"2022","author":"Yu","key":"ref29"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3414017"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00578"},{"journal-title":"Large scale image completion via co-modulated generative adversarial networks","year":"2021","author":"Zhao","key":"ref33"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00153"}],"event":{"name":"2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","start":{"date-parts":[[2023,6,17]]},"location":"Vancouver, BC, Canada","end":{"date-parts":[[2023,6,24]]}},"container-title":["2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10203037\/10203050\/10205299.pdf?arnumber=10205299","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,1]],"date-time":"2024-03-01T17:29:59Z","timestamp":1709314199000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10205299\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/cvpr52729.2023.01363","relation":{},"subject":[],"published":{"date-parts":[[2023,6]]}}}