{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T14:45:36Z","timestamp":1730213136473,"version":"3.28.0"},"reference-count":67,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1109\/cvpr52729.2023.02241","type":"proceedings-article","created":{"date-parts":[[2023,8,22]],"date-time":"2023-08-22T17:30:52Z","timestamp":1692725452000},"page":"23401-23411","source":"Crossref","is-referenced-by-count":6,"title":["LANIT: Language-Driven Image-to-Image Translation for Unlabeled Data"],"prefix":"10.1109","author":[{"given":"Jihye","family":"Park","sequence":"first","affiliation":[{"name":"Korea University,Seoul,Korea"}]},{"given":"Sunwoo","family":"Kim","sequence":"additional","affiliation":[{"name":"Korea University,Seoul,Korea"}]},{"given":"Soohyun","family":"Kim","sequence":"additional","affiliation":[{"name":"Korea University,Seoul,Korea"}]},{"given":"Seokju","family":"Cho","sequence":"additional","affiliation":[{"name":"Korea University,Seoul,Korea"}]},{"given":"Jaejun","family":"Yoo","sequence":"additional","affiliation":[{"name":"UNIST,Ulsan,Korea"}]},{"given":"Youngjung","family":"Uh","sequence":"additional","affiliation":[{"name":"Yonsei University,Seoul,Korea"}]},{"given":"Seungryong","family":"Kim","sequence":"additional","affiliation":[{"name":"Korea University,Seoul,Korea"}]}],"member":"263","reference":[{"key":"ref13","article-title":"Domain adaptation via prompt learning","author":"ge","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref57","first-page":"5914","article-title":"Relgan: Multi-domain image-to-image translation via relative attributes","author":"wu","year":"2019","journal-title":"ICCV"},{"key":"ref12","article-title":"An image is worth one word: Personalizing text-to-image generation using textual inversion","author":"gal","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref56","article-title":"Hair-clip: Design your hair by text and reference image","author":"wei","year":"2021","journal-title":"ar Xiv preprint"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref59","article-title":"Cpt: Colorful prompt tuning for pre-trained vision-language models","author":"yao","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref14","article-title":"Image-to-image translation for cross-domain dis-entanglement","author":"gonzalez-garcia","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref58","article-title":"Diversity-sensitive conditional gen-erative adversarial networks","author":"yang","year":"2019","journal-title":"ICLRE"},{"key":"ref53","article-title":"Test-time prompt tuning for zero-shot generalization in vision-language models","author":"shu","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.346"},{"key":"ref11","article-title":"Scaling-up dis-entanglement for image translation","author":"gabbay","year":"2021","journal-title":"ar Xiv preprint"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00451"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00821"},{"key":"ref54","first-page":"14144","article-title":"Aligning latent and image spaces to connect the un-connectable","author":"skorokhodov","year":"2021","journal-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision"},{"key":"ref17","first-page":"6626","article-title":"Gans trained by a two time-scale update rule converge to a local nash equilib-rium","author":"heusel","year":"2017","journal-title":"NeurIPS"},{"key":"ref16","article-title":"Prompt-to-prompt im-age editing with cross attention control","author":"hertz","year":"2022","journal-title":"ar Xiv preprint"},{"key":"ref19","first-page":"1989","article-title":"Cycada: Cycle-consistent adversarial domain adaptation","author":"hoffman","year":"2018","journal-title":"ICML"},{"key":"ref18","article-title":"Denoising diffusion probabilistic models","author":"ho","year":"2020","journal-title":"Arxiv preprint arXiv"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58580-8_23"},{"key":"ref50","article-title":"Dreambooth: Fine tuning text-to-image diffusion models for subject-driven generation","author":"ruiz","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref46","first-page":"319","article-title":"Contrastive learning for unpaired image-to-image translation","author":"park","year":"2020","journal-title":"ECCV"},{"key":"ref45","article-title":"Glide: Towards photorealistic image generation and editing with text-guided diffusion models","author":"nichol","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref48","article-title":"Language models as know ledge bases?","author":"petroni","year":"2017","journal-title":"ar Xiv preprint"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00209"},{"key":"ref42","first-page":"4183","article-title":"High-fidelity im-age generation with fewer labels","author":"lu?i?","year":"2019","journal-title":"ICML"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.425"},{"key":"ref44","first-page":"7176","article-title":"Reliable fidelity and diversity metrics for generative models","author":"naeem","year":"2020","journal-title":"ICML"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00152"},{"key":"ref49","first-page":"8748","article-title":"Learning transferable visual models from natural language super-vision","author":"radford","year":"2021","journal-title":"ICML"},{"key":"ref8","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"brown","year":"2020","journal-title":"NeurIPS"},{"key":"ref7","first-page":"446","article-title":"Food-101-mining discriminative components with random forests","author":"bossard","year":"2014","journal-title":"ECCV"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00916"},{"key":"ref4","first-page":"5821","article-title":"Exploring unlabeled faces for novel attribute discov-ery","author":"bahng","year":"2020","journal-title":"CVPR"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01389"},{"key":"ref6","first-page":"4787","article-title":"Dunit: Detection-based unsuper-vised image-to-image translation","author":"bhattacharjee","year":"2020","journal-title":"CVPR"},{"key":"ref5","first-page":"707","article-title":"Text21ive: Text-driven layered image and video editing","author":"bar-tal","year":"2022","journal-title":"European Conference on Computer Vision"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01064"},{"key":"ref35","article-title":"Few-shot un-sueprvised image-to-image translation","author":"liu","year":"2019","journal-title":"ArXiv"},{"key":"ref34","first-page":"700","article-title":"Unsupervised image-to-image translation networks","author":"liu","year":"2017","journal-title":"NeurIPS"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01429"},{"key":"ref36","article-title":"Pre-train, prompt, and predict: A systematic survey of prompting methods in nat-ural language processing","author":"liu","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref31","first-page":"35","article-title":"Diverse image-to-image translation via disentangled representations","author":"lee","year":"2018","journal-title":"ECCV"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01778"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"ref32","article-title":"Ordinalclip: Learning rank prompts for language-guided ordinal regression","author":"li","year":"2022","journal-title":"ar Xiv preprint"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01767"},{"key":"ref1","article-title":"Clip2stylegan: Unsupervised extraction of stylegan edit directions","author":"abdal","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref39","article-title":"More control for free! image synthesis with semantic diffusion guidance","author":"liu","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref38","article-title":"Fusedream: Training-free text-to-image generation with improved clip+ gan space op-timization","author":"liu","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref24","first-page":"4904","article-title":"Scaling up visual and vision-language representation learning with noisy text supervision","author":"jia","year":"2021","journal-title":"ICML"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00649"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"ref25","first-page":"105","article-title":"Prompting visual-language models for efficient video understanding","author":"ju","year":"2022","journal-title":"European Conference on Computer Vision"},{"key":"ref20","article-title":"Unsupervised prompt learning for vision-language models","author":"huang","year":"2022","journal-title":"ar Xiv preprint"},{"key":"ref64","article-title":"Learning to prompt for vision-language models","author":"zhou","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref63","article-title":"Prompt consis-tency for zero-shot task generalization","author":"zhou","year":"2022","journal-title":"ar Xiv preprint"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.632"},{"key":"ref66","first-page":"11465","article-title":"Cocosnet v2: Full-resolution correspondence learning for image trans-lation","author":"zhou","year":"2021","journal-title":"CVPR"},{"key":"ref21","first-page":"172","article-title":"Multimodal unsupervised image-to-image translation","author":"huang","year":"2018","journal-title":"ECCV"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01631"},{"journal-title":"Diffusionclip Text-guided image manipulation using diffusion models","year":"2021","author":"kim","key":"ref28"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01770"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.310"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01614"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00519"}],"event":{"name":"2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","start":{"date-parts":[[2023,6,17]]},"location":"Vancouver, BC, Canada","end":{"date-parts":[[2023,6,24]]}},"container-title":["2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10203037\/10203050\/10205308.pdf?arnumber=10205308","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,11]],"date-time":"2023-09-11T18:03:59Z","timestamp":1694455439000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10205308\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6]]},"references-count":67,"URL":"https:\/\/doi.org\/10.1109\/cvpr52729.2023.02241","relation":{},"subject":[],"published":{"date-parts":[[2023,6]]}}}