{"id":"https://openalex.org/W4386071584","doi":"https://doi.org/10.1109/cvpr52729.2023.00213","title":"MAGE: MAsked Generative Encoder to Unify Representation Learning and Image Synthesis","display_name":"MAGE: MAsked Generative Encoder to Unify Representation Learning and Image Synthesis","publication_year":2023,"publication_date":"2023-06-01","ids":{"openalex":"https://openalex.org/W4386071584","doi":"https://doi.org/10.1109/cvpr52729.2023.00213"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52729.2023.00213","pdf_url":null,"source":{"id":"https://openalex.org/S4363607701","display_name":"2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2211.09117","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077391276","display_name":"Tianhong Li","orcid":"https://orcid.org/0000-0001-7422-3520"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tianhong Li","raw_affiliation_strings":["MIT CSAIL"],"affiliations":[{"raw_affiliation_string":"MIT CSAIL","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067538672","display_name":"Hui\u2010Wen Chang","orcid":"https://orcid.org/0000-0001-8877-1886"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"funder","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Huiwen Chang","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101547831","display_name":"Shlok Mishra","orcid":"https://orcid.org/0000-0003-2492-8762"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"funder","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shlok Kumar Mishra","raw_affiliation_strings":["University of Maryland"],"affiliations":[{"raw_affiliation_string":"University of Maryland","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100399257","display_name":"Han Zhang","orcid":"https://orcid.org/0000-0001-7072-2189"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"funder","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Han Zhang","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052959289","display_name":"Dina Katabi","orcid":"https://orcid.org/0000-0003-4854-4157"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dina Katabi","raw_affiliation_strings":["MIT CSAIL"],"affiliations":[{"raw_affiliation_string":"MIT CSAIL","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103083233","display_name":"Dilip Krishnan","orcid":"https://orcid.org/0000-0002-9183-2886"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"funder","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dilip Krishnan","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":10.689,"has_fulltext":false,"cited_by_count":50,"citation_normalized_percentile":{"value":0.99996,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.65537846},{"id":"https://openalex.org/keywords/representation","display_name":"Representation","score":0.61204815},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature Learning","score":0.6080819},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.46437067}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.80286574},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.7121714},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.7066637},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.65537846},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.61204815},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.6080819},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5668425},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.54066145},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.51651806},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.46437067},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43059695},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.42934385},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3878341},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.060569823},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52729.2023.00213","pdf_url":null,"source":{"id":"https://openalex.org/S4363607701","display_name":"2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2211.09117","pdf_url":"https://arxiv.org/pdf/2211.09117","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2211.09117","pdf_url":"https://arxiv.org/pdf/2211.09117","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":52,"referenced_works":["https://openalex.org/W2117539524","https://openalex.org/W2321533354","https://openalex.org/W2785325870","https://openalex.org/W2896457183","https://openalex.org/W2952716587","https://openalex.org/W2953469440","https://openalex.org/W2962770929","https://openalex.org/W2962808998","https://openalex.org/W2963373786","https://openalex.org/W2963420272","https://openalex.org/W2963799213","https://openalex.org/W2964024144","https://openalex.org/W2971074500","https://openalex.org/W3005680577","https://openalex.org/W3034523045","https://openalex.org/W3035060554","https://openalex.org/W3035524453","https://openalex.org/W3036167779","https://openalex.org/W3036224891","https://openalex.org/W3094502228","https://openalex.org/W3100859887","https://openalex.org/W3108655343","https://openalex.org/W3145450063","https://openalex.org/W3159481202","https://openalex.org/W3162926177","https://openalex.org/W3170863103","https://openalex.org/W3171007011","https://openalex.org/W3180268133","https://openalex.org/W3180355996","https://openalex.org/W3189743387","https://openalex.org/W3206395542","https://openalex.org/W3213665720","https://openalex.org/W3213836217","https://openalex.org/W4224035735","https://openalex.org/W4287115658","https://openalex.org/W4294568686","https://openalex.org/W4297808394","https://openalex.org/W4301206121","https://openalex.org/W4301914798","https://openalex.org/W4312312750","https://openalex.org/W4312866014","https://openalex.org/W4312891522","https://openalex.org/W4312933868","https://openalex.org/W4312974539","https://openalex.org/W4313021454","https://openalex.org/W4313156423","https://openalex.org/W4313158203","https://openalex.org/W4320013936","https://openalex.org/W4382465386","https://openalex.org/W4382467347","https://openalex.org/W4386221015","https://openalex.org/W4389104669"],"related_works":["https://openalex.org/W4387506531","https://openalex.org/W4380551139","https://openalex.org/W4365211920","https://openalex.org/W4317695495","https://openalex.org/W4283803360","https://openalex.org/W4238433571","https://openalex.org/W3174044702","https://openalex.org/W3014948380","https://openalex.org/W2967848559","https://openalex.org/W2280377497"],"abstract_inverted_index":{"Generative":[0,44],"modeling":[1,71],"and":[2,30,34,55,81,108,134,157,171],"representation":[3,57,82,119,135,172],"learning":[4,83,136],"are":[5,15],"two":[6],"key":[7,60],"tasks":[8],"in":[9,68,150,167],"computer":[10],"vision.":[11],"However,":[12],"these":[13],"models":[14],"typically":[16],"trained":[17],"independently,":[18],"which":[19],"ignores":[20],"the":[21,28,47,88,118,126,132,151],"potential":[22],"for":[23,161],"each":[24],"task":[25,152],"to":[26,32,50,125],"help":[27],"other,":[29],"leads":[31],"training":[33,76,90],"model":[35,146],"maintenance":[36],"overheads.":[37],"In":[38],"this":[39,111],"work,":[40],"we":[41],"propose":[42],"MAsked":[43],"Encoder":[45],"(MAGE),":[46],"first":[48],"framework":[49],"unify":[51],"SOTA":[52],"image":[53,70,155,169],"generation":[54,133,156,170],"self-supervised":[56],"learning.":[58,173],"Our":[59],"insight":[61],"is":[62,175],"that":[63],"using":[64],"variable":[65],"masking":[66,79,85],"ratios":[67],"masked":[69],"pre-training":[72],"can":[73,115],"allow":[74],"generative":[75,95],"(very":[77],"high":[78],"ratio)":[80,86],"(lower":[84],"under":[87],"same":[89],"framework.":[91],"Inspired":[92],"by":[93,102,120],"previous":[94],"models,":[96],"MAGE":[97,144],"uses":[98],"semantic":[99],"tokens":[100],"learned":[101],"a":[103,122,142],"vector-quantized":[104],"GAN":[105],"at":[106,177],"inputs":[107],"outputs,":[109],"combining":[110],"with":[112],"masking.":[113],"We":[114,129],"further":[116],"improve":[117],"adding":[121],"contrastive":[123],"loss":[124],"encoder":[127],"output.":[128],"extensively":[130],"evaluate":[131],"capabilities":[137],"of":[138,153],"MAGE.":[139],"On":[140],"ImageNet-1K,":[141],"single":[143],"ViT-L":[145],"obtains":[147],"9.10":[148],"FID":[149],"class-unconditional":[154],"78.9%":[158],"top-1":[159],"accuracy":[160],"linear":[162],"probing,":[163],"achieving":[164],"state-of-the-art":[165],"performance":[166],"both":[168],"Code":[174],"available":[176],"https://github.com/LTHl4/mage.":[178]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4386071584","counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":37},{"year":2023,"cited_by_count":6}],"updated_date":"2025-04-27T21:11:43.377100","created_date":"2023-08-23"}