{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,11,5]],"date-time":"2024-11-05T05:03:55Z","timestamp":1730783035762,"version":"3.28.0"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","funder":[{"name":"Institute of Information & communica- tions Technology Planning & Evaluation (IITP)","award":["No.2022-0-00608"]},{"name":"China Scholarship Council","award":["No.202006210057"]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100004543","name":"China Scholarship Council","doi-asserted-by":"publisher","award":["No.202208060266"],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100004543","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681252","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"7143-7151","update-policy":"http:\/\/dx.doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["TextGaze: Gaze-Controllable Face Generation with Natural Language"],"prefix":"10.1145","author":[{"ORCID":"http:\/\/orcid.org\/0009-0004-0043-8480","authenticated-orcid":false,"given":"Hengfei","family":"Wang","sequence":"first","affiliation":[{"name":"University of Birmingham, Birmingham, United Kingdom"}]},{"ORCID":"http:\/\/orcid.org\/0000-0003-0884-8711","authenticated-orcid":false,"given":"Zhongqun","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Birmingham, Birmingham, United Kingdom"}]},{"ORCID":"http:\/\/orcid.org\/0000-0003-1353-9817","authenticated-orcid":false,"given":"Yihua","family":"Cheng","sequence":"additional","affiliation":[{"name":"University of Birmingham, Birmingham, United Kingdom"}]},{"ORCID":"http:\/\/orcid.org\/0000-0001-7495-9677","authenticated-orcid":false,"given":"Hyung Jin","family":"Chang","sequence":"additional","affiliation":[{"name":"University of Birmingham, Birmingham, United Kingdom"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"volume-title":"Adaptive Feature Fusion Network for Gaze Tracking in Mobile Tablets. In International Conference on Pattern Recognition (ICPR).","year":"2020","author":"Bao Yiwei","key":"e_1_3_2_1_1_1","unstructured":"Yiwei Bao, Yihua Cheng, Yunfei Liu, and Feng Lu. 2020. Adaptive Feature Fusion Network for Gaze Tracking in Mobile Tablets. In International Conference on Pattern Recognition (ICPR)."},{"volume-title":"Computer Graphics Forum","author":"Blanz Volker","key":"e_1_3_2_1_2_1","unstructured":"Volker Blanz, Kristina Scherbaum, Thomas Vetter, and Hans-Peter Seidel. 2004. Exchanging faces in images. In Computer Graphics Forum, Vol. 23. Wiley Online Library, 669--676."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.116"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01886"},{"volume-title":"Appearance-based Gaze Estimation With Deep Learning: A Review and Benchmark","year":"2021","author":"Cheng Yihua","key":"e_1_3_2_1_5_1","unstructured":"Yihua Cheng, Haofei Wang, Yiwei Bao, and Feng Lu. 2021. Appearance-based Gaze Estimation With Deep Learning: A Review and Benchmark. IEEE Transactions on Pattern Analysis and Machine Intelligence (2021)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19836-6_6"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Delmas Ginger and Weinzaepfel Philippe and Lucas Thomas and Moreno-Noguer Francesc and Rogez Gr\u00e9gory. 2022. PoseScript: 3D Human Poses from Natural Language. In ECCV.","DOI":"10.1007\/978-3-031-20068-7_20"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.341"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01268"},{"volume-title":"Generative adversarial nets. Advances in neural information processing systems","year":"2014","author":"Goodfellow Ian","key":"e_1_3_2_1_10_1","unstructured":"Ian Goodfellow, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David Warde-Farley, Sherjil Ozair, Aaron Courville, and Yoshua Bengio. 2014. Generative adversarial nets. Advances in neural information processing systems, Vol. 27 (2014)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00509"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"volume-title":"Photo-Realistic Monocular Gaze Redirection Using Generative Adversarial Networks. In The IEEE International Conference on Computer Vision.","year":"2019","author":"He Zhe","key":"e_1_3_2_1_13_1","unstructured":"Zhe He, Adrian Spurr, Xucong Zhang, and Otmar Hilliges. 2019. Photo-Realistic Monocular Gaze Redirection Using Generative Adversarial Networks. In The IEEE International Conference on Computer Vision."},{"volume-title":"Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems","year":"2017","author":"Heusel Martin","key":"e_1_3_2_1_14_1","unstructured":"Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter. 2017. Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3160360"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cub.2015.05.052"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"volume-title":"Photorealistic monocular gaze redirection using machine learning","year":"2017","author":"Kononenko Daniil","key":"e_1_3_2_1_18_1","unstructured":"Daniil Kononenko, Yaroslav Ganin, Diana Sungatullina, and Victor Lempitsky. 2017. Photorealistic monocular gaze redirection using machine learning. IEEE transactions on pattern analysis and machine intelligence, Vol. 40, 11 (2017), 2696--2710."},{"volume-title":"BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. In ICML.","year":"2022","author":"Li Junnan","key":"e_1_3_2_1_19_1","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. In ICML."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3130800.3130813"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19784-0_7"},{"volume-title":"Conditional generative adversarial nets. arXiv preprint arXiv:1411.1784","year":"2014","author":"Mirza Mehdi","key":"e_1_3_2_1_22_1","unstructured":"Mehdi Mirza and Simon Osindero. 2014. Conditional generative adversarial nets. arXiv preprint arXiv:1411.1784 (2014)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigMM.2019.00-42"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3204493.3204545"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00078"},{"volume-title":"International conference on machine learning. PMLR, 8748--8763","year":"2021","author":"Radford Alec","key":"e_1_3_2_1_26_1","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"volume-title":"Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125","year":"2022","author":"Ramesh Aditya","key":"e_1_3_2_1_27_1","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125, Vol. 1, 2 (2022), 3."},{"volume-title":"International conference on machine learning. PMLR, 1060--1069","year":"2016","author":"Reed Scott","key":"e_1_3_2_1_28_1","unstructured":"Scott Reed, Zeynep Akata, Xinchen Yan, Lajanugen Logeswaran, Bernt Schiele, and Honglak Lee. 2016. Generative adversarial text to image synthesis. In International conference on machine learning. PMLR, 1060--1069."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00933"},{"key":"e_1_3_2_1_31_1","first-page":"36479","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","volume":"35","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily L Denton, Kamyar Ghasemipour, Raphael Gontijo Lopes, Burcu Karagol Ayan, Tim Salimans, et al. 2022. Photorealistic text-to-image diffusion models with deep language understanding. Advances in Neural Information Processing Systems, Vol. 35 (2022), 36479--36494.","journal-title":"Advances in Neural Information Processing Systems"},{"volume-title":"Improved techniques for training gans. Advances in neural information processing systems","year":"2016","author":"Salimans Tim","key":"e_1_3_2_1_32_1","unstructured":"Tim Salimans, Ian Goodfellow, Wojciech Zaremba, Vicki Cheung, Alec Radford, and Xi Chen. 2016. Improved techniques for training gans. Advances in neural information processing systems, Vol. 29 (2016)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01813"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475391"},{"volume-title":"European Conference on Computer Vision. Springer, 568--584","year":"2022","author":"Elden Tse Tze Ho","key":"e_1_3_2_1_35_1","unstructured":"Tze Ho Elden Tse, Zhongqun Zhang, Kwang In Kim, Ales Leonardis, Feng Zheng, and Hyung Jin Chang. 2022. S 2 Contact: Graph-Based Network for 3D Hand-Object Contact Estimation with Semi-supervised Learning. In European Conference on Computer Vision. Springer, 568--584."},{"volume-title":"High-fidelity eye animatable neural radiance fields for human face. BMVC","year":"2023","author":"Wang Hengfei","key":"e_1_3_2_1_36_1","unstructured":"Hengfei Wang, Zhongqun Zhang, Yihua Cheng, and Hyung Jin Chang. 2023. High-fidelity eye animatable neural radiance fields for human face. BMVC (2023)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00143"},{"volume-title":"Recursive-NeRF: An efficient and dynamically growing NeRF","year":"2022","author":"Yang Guo-Wei","key":"e_1_3_2_1_38_1","unstructured":"Guo-Wei Yang, Wen-Yang Zhou, Hao-Yang Peng, Dun Liang, Tai-Jiang Mu, and Shi-Min Hu. 2022. Recursive-NeRF: An efficient and dynamically growing NeRF. IEEE Transactions on Visualization and Computer Graphics (2022)."},{"volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","year":"2024","author":"Hongquan Hao Yongwei Liu Zongji Wang","key":"e_1_3_2_1_39_1","unstructured":"Zongji Wang Hongquan Hao Yongwei Liu Shiqing Cheng Xi Wang Hyung Jin Chang Yihua Cheng, Yaning Zhu. 2024. What Do You See in Vehicle? Comprehensive Vision Solution for In-Vehicle Gaze Estimation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01221"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.629"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58558-7_22"},{"volume-title":"European Conference on Computer Vision. Springer, 112--128","year":"2022","author":"Zhang Zhongqun","key":"e_1_3_2_1_44_1","unstructured":"Zhongqun Zhang, Wei Chen, Linfang Zheng, Alevs Leonardis, and Hyung Jin Chang. 2022. Trans6D: Transformer-Based 6D Object Pose Estimation and Refinement. In European Conference on Computer Vision. Springer, 112--128."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01646"},{"volume-title":"Shalini De Mello, and Otmar Hilliges","year":"2020","author":"Zheng Yufeng","key":"e_1_3_2_1_46_1","unstructured":"Yufeng Zheng, Seonwook Park, Xucong Zhang, Shalini De Mello, and Otmar Hilliges. 2020. Self-Learning Transformations for Improving Gaze and Head Redirection. Advances in Neural Information Processing Systems (2020)."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681252","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T04:51:42Z","timestamp":1730695902000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681252"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":46,"alternative-id":["10.1145\/3664647.3681252","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681252","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}