{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T12:29:51Z","timestamp":1743769791541,"version":"3.28.0"},"reference-count":59,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1109\/cvpr52729.2023.00213","type":"proceedings-article","created":{"date-parts":[[2023,8,22]],"date-time":"2023-08-22T17:30:52Z","timestamp":1692725452000},"page":"2142-2152","source":"Crossref","is-referenced-by-count":47,"title":["MAGE: MAsked Generative Encoder to Unify Representation Learning and Image Synthesis"],"prefix":"10.1109","author":[{"given":"Tianhong","family":"Li","sequence":"first","affiliation":[{"name":"MIT CSAIL"}]},{"given":"Huiwen","family":"Chang","sequence":"additional","affiliation":[{"name":"Google Research"}]},{"given":"Shlok Kumar","family":"Mishra","sequence":"additional","affiliation":[{"name":"University of Maryland"}]},{"given":"Han","family":"Zhang","sequence":"additional","affiliation":[{"name":"Google Research"}]},{"given":"Dina","family":"Katabi","sequence":"additional","affiliation":[{"name":"MIT CSAIL"}]},{"given":"Dilip","family":"Krishnan","sequence":"additional","affiliation":[{"name":"Google Research"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19821-2_26"},{"key":"ref2","article-title":"Beit: Bert pre-training of image transformers","author":"Bao","year":"2021","journal-title":"arXiv preprint"},{"key":"ref3","article-title":"Large scale gan training for high fidelity natural image synthesis","author":"Brock","year":"2018","journal-title":"arXiv preprint"},{"key":"ref4","article-title":"Large scale GAN training for high fidelity natural image synthesis","volume-title":"Int. Conf. on Learning Representations (ICLR)","author":"Brock","year":"2019"},{"key":"ref5","article-title":"Unsupervised learning of visual features by contrasting cluster assignments","author":"Caron","year":"2020","journal-title":"arXiv preprint"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01103"},{"key":"ref8","article-title":"A simple framework for contrastive learning of visual representations","author":"Chen","year":"2020","journal-title":"arXiv preprint"},{"key":"ref9","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","volume-title":"icml","author":"Chen","year":"2020"},{"issue":"33","key":"ref10","article-title":"Big self-supervised models are strong semi-supervised learners","author":"Chen","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01852-4"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00950"},{"key":"ref14","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"ar Xiv preprint"},{"key":"ref15","first-page":"8780","article-title":"Diffusion models beat gans on image synthesis","volume":"34","author":"Dhariwal","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref16","article-title":"Ad-versarial feature learning","author":"Donahue","year":"2016","journal-title":"arXiv preprint"},{"issue":"32","key":"ref17","article-title":"Large scale adversarial representation learning","author":"Donahue","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25130"},{"key":"ref19","article-title":"An image is worth 16x16 words: Trans-formers for image recognition at scale","author":"Dosovitskiy","year":"2020","journal-title":"ar Xiv preprint"},{"key":"ref20","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","volume-title":"Int. Conf. on Learning Representations (ICLR)","author":"Dosovitskiy","year":"2021"},{"key":"ref21","article-title":"Adversarially learned inference","author":"Dumoulin","year":"2016","journal-title":"ar Xiv preprint"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"ref23","article-title":"Unsupervised representation learning by predicting image rotations","author":"Gidaris","year":"2018","journal-title":"arXiv preprint"},{"journal-title":"Generative adversarial nets","year":"2014","author":"Goodfellow","key":"ref24"},{"key":"ref25","article-title":"Bootstrap your own latent: A new approach to self-supervised learning","author":"Grill","year":"2020","journal-title":"arXiv preprint"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"issue":"30","key":"ref29","article-title":"Gans trained by a two time-scale update rule converge to a local nash equilibrium","author":"Heusel","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref30","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho","year":"2020","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3336525"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01123"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00679"},{"key":"ref35","article-title":"Making contrastive learning robust to shortcuts","author":"Li","year":"2020","journal-title":"ar Xiv preprint"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25252"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01429"},{"key":"ref38","first-page":"4183","article-title":"High-fidelity image generation with fewer labels","volume-title":"International conference on machine learning","author":"Lucic","year":"2019"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46466-4_5"},{"key":"ref40","article-title":"Repre-sentation learning with contrastive predictive coding","author":"van den Oord","year":"2018","journal-title":"ar Xiv preprint"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.278"},{"key":"ref42","article-title":"Beit v2: Masked image modeling with vector-quantized visual tokenizers","author":"Peng","year":"2022","journal-title":"arXiv preprint"},{"key":"ref43","article-title":"Hierarchical text-conditional image generation with clip latents","author":"Ramesh","year":"2022","journal-title":"arXiv preprint"},{"key":"ref44","article-title":"Generating diverse high-fidelity images with vq-vae-2","author":"Razavi","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref45","first-page":"4974","article-title":"Can contrastive learning avoid shortcut solutions?","volume":"34","author":"Robinson","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref48","article-title":"Improved techniques for training gans","author":"Salimans","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref49","article-title":"Score-based generative modeling through stochastic differential equations","volume-title":"Int. Conf. on Learning Representations (ICLR)","author":"Song","year":"2021"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58621-8_45"},{"key":"ref51","article-title":"Neural discrete representation learning","author":"van den Oord","year":"2017","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01426"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20056-4_20"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00441"},{"key":"ref55","article-title":"Vector-quantized image modeling with improved vqgan","author":"Yu","year":"2021","journal-title":"arXiv preprint"},{"key":"ref56","first-page":"7354","article-title":"Self-attention generative adversarial networks","volume-title":"Int. Conference on Machine Learning (ICML)","author":"Zhang","year":"2019"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.629"},{"key":"ref58","first-page":"18367","article-title":"Improved transformer for high-resolution gans","volume":"34","author":"Zhao","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref59","article-title":"ibot: Image bert pre-training with online tokenizer","author":"Zhou","year":"2021","journal-title":"arXiv preprint"}],"event":{"name":"2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","start":{"date-parts":[[2023,6,17]]},"location":"Vancouver, BC, Canada","end":{"date-parts":[[2023,6,24]]}},"container-title":["2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10203037\/10203050\/10203945.pdf?arnumber=10203945","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T02:32:06Z","timestamp":1705026726000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10203945\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6]]},"references-count":59,"URL":"https:\/\/doi.org\/10.1109\/cvpr52729.2023.00213","relation":{},"subject":[],"published":{"date-parts":[[2023,6]]}}}