{"id":"https://openalex.org/W4386113868","doi":"https://doi.org/10.48550/arxiv.2308.10997","title":"MarkovGen: Structured Prediction for Efficient Text-to-Image Generation","display_name":"MarkovGen: Structured Prediction for Efficient Text-to-Image Generation","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4386113868","doi":"https://doi.org/10.48550/arxiv.2308.10997"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2308.10997","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2308.10997","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075594276","display_name":"Sadeep Jayasumana","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jayasumana, Sadeep","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001667895","display_name":"Daniel Gl\u00e4sner","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Glasner, Daniel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101811303","display_name":"Srikumar Ramalingam","orcid":"https://orcid.org/0000-0002-2844-4119"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ramalingam, Srikumar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081627147","display_name":"Andreas Veit","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Veit, Andreas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050566186","display_name":"Ayan Chakrabarti","orcid":"https://orcid.org/0000-0002-4843-740X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chakrabarti, Ayan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5018301052","display_name":"Sanjiv Kumar","orcid":"https://orcid.org/0000-0002-4080-1414"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kumar, Sanjiv","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":65},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9895,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9895,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9784,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9732,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/markov-random-field","display_name":"Markov random field","score":0.8359828}],"concepts":[{"id":"https://openalex.org/C2778045648","wikidata":"https://www.wikidata.org/wiki/Q176827","display_name":"Markov random field","level":4,"score":0.8359828},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7479439},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6701746},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.64415514},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.529166},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.48109668},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4190932},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.41810977},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3784463},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.22980064}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2308.10997","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2308.10997","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2308.10997","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4321636575","https://openalex.org/W4280588203","https://openalex.org/W2741131631","https://openalex.org/W2357796999","https://openalex.org/W2156919374","https://openalex.org/W2055243143","https://openalex.org/W2045526782","https://openalex.org/W1986418932","https://openalex.org/W1984019423","https://openalex.org/W1483472507"],"abstract_inverted_index":{"Modern":[0],"text-to-image":[1,107],"generation":[2],"models":[3,29],"produce":[4,177],"high-quality":[5],"images":[6,180],"that":[7,48],"are":[8,30,54],"both":[9,170],"photorealistic":[10],"and":[11,32,125,142,176],"faithful":[12],"to":[13,46,76,122,169],"the":[14,52,59,95,103,113,128,137],"text":[15,60],"prompts.":[16],"However,":[17],"this":[18,69,78,98,165],"quality":[19,124,179],"comes":[20],"at":[21,118],"significant":[22],"computational":[23],"cost:":[24],"nearly":[25],"all":[26],"of":[27,51,83,97,102,131],"these":[28],"iterative":[31,42],"require":[33],"running":[34],"sampling":[35,133],"multiple":[36],"times":[37],"with":[38,58,65,136],"large":[39],"models.":[40],"This":[41],"process":[43],"is":[44,139],"needed":[45],"ensure":[47],"different":[49,81,119],"regions":[50,82],"image":[53,116,184],"not":[55],"only":[56],"aligned":[57],"prompt,":[61],"but":[62],"also":[63],"compatible":[64],"each":[66],"other.":[67],"In":[68],"work,":[70],"we":[71],"propose":[72],"a":[73,87,156],"light-weight":[74],"approach":[75],"achieving":[77],"compatibility":[79,114],"between":[80],"an":[84],"image,":[85],"using":[86],"Markov":[88],"Random":[89],"Field":[90],"(MRF)":[91],"model.":[92,108],"We":[93],"demonstrate":[94],"effectiveness":[96],"method":[99],"on":[100],"top":[101],"latent":[104],"token-based":[105],"Muse":[106,132,173],"The":[109],"MRF":[110,138,153,167],"richly":[111],"encodes":[112],"among":[115],"tokens":[117],"spatial":[120],"locations":[121],"improve":[123],"significantly":[126,140],"reduce":[127],"required":[129],"number":[130],"steps.":[134],"Inference":[135],"cheaper,":[141],"its":[143],"parameters":[144],"can":[145],"be":[146],"quickly":[147],"learned":[148],"through":[149],"back-propagation":[150],"by":[151,174,181],"modeling":[152],"inference":[154],"as":[155],"differentiable":[157],"neural-network":[158],"layer.":[159],"Our":[160],"full":[161],"model,":[162],"MarkovGen,":[163],"uses":[164],"proposed":[166],"model":[168],"speed":[171],"up":[172],"1.5X":[175],"higher":[178],"decreasing":[182],"undesirable":[183],"artifacts.":[185]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4386113868","counts_by_year":[],"updated_date":"2025-04-14T13:42:06.052388","created_date":"2023-08-24"}