{"id":"https://openalex.org/W4361865933","doi":"https://doi.org/10.48550/arxiv.2303.17189","title":"LayoutDiffusion: Controllable Diffusion Model for Layout-to-image Generation","display_name":"LayoutDiffusion: Controllable Diffusion Model for Layout-to-image Generation","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4361865933","doi":"https://doi.org/10.48550/arxiv.2303.17189"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2303.17189","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2303.17189","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088316957","display_name":"Guangcong Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Guangcong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039357854","display_name":"Xianpan Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Xianpan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053246660","display_name":"Xuewei Li","orcid":"https://orcid.org/0000-0001-5336-7234"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xuewei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101500719","display_name":"Zhongang Qi","orcid":"https://orcid.org/0000-0001-8298-4063"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qi, Zhongang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102004349","display_name":"Ying Shan","orcid":"https://orcid.org/0000-0001-7673-8325"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shan, Ying","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100407758","display_name":"Xi Li","orcid":"https://orcid.org/0000-0003-3023-1662"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xi","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":67},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9977,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9977,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9938,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.9915,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fuse","display_name":"Fuse (electrical)","score":0.55160606},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.49902296},{"id":"https://openalex.org/keywords/position","display_name":"Position (finance)","score":0.4484527}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7735174},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.6747851},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.62477565},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.5596976},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5562885},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.55160606},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.53854007},{"id":"https://openalex.org/C48209547","wikidata":"https://www.wikidata.org/wiki/Q1331104","display_name":"Controllability","level":2,"score":0.5299348},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5099393},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.49902296},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.4484527},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.42320356},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09894541},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.090901196},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2303.17189","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2303.17189","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2303.17189","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4214649316","https://openalex.org/W4205698120","https://openalex.org/W2561315646","https://openalex.org/W2554790198","https://openalex.org/W2391745328","https://openalex.org/W2332386680","https://openalex.org/W2248621902","https://openalex.org/W2095903272","https://openalex.org/W2036697162","https://openalex.org/W2003779889"],"abstract_inverted_index":{"Recently,":[0],"diffusion":[1,54],"models":[2],"have":[3],"achieved":[4],"great":[5],"success":[6],"in":[7,107],"image":[8,20,78,87,96],"synthesis.":[9],"However,":[10],"when":[11],"it":[12],"comes":[13],"to":[14,30,83,101,123,132],"the":[15,36,68,73,94,104,125,141,152],"layout-to-image":[16],"generation":[17,62],"where":[18],"an":[19],"often":[21],"has":[22],"a":[23,45,53,85,98,108],"complex":[24],"scene":[25],"of":[26,77],"multiple":[27,128],"objects,":[28],"how":[29],"make":[31],"strong":[32],"control":[33],"over":[34],"both":[35],"global":[37],"layout":[38,100,106],"map":[39],"and":[40,64,79,92,116,130,135,165],"each":[41],"detailed":[42],"object":[43],"remains":[44],"challenging":[46],"task.":[47],"In":[48],"this":[49],"paper,":[50],"we":[51,81],"propose":[52,82],"model":[55,124],"named":[56],"LayoutDiffusion":[57,150],"that":[58,148],"can":[59],"obtain":[60],"higher":[61],"quality":[63],"greater":[65],"controllability":[66],"than":[67],"previous":[69,153],"works.":[70],"To":[71],"overcome":[72],"difficult":[74],"multimodal":[75],"fusion":[76],"layout,":[80],"construct":[84],"structural":[86],"patch":[88],"with":[89,103],"region":[90],"information":[91],"transform":[93],"patched":[95],"into":[97],"special":[99],"fuse":[102],"normal":[105],"unified":[109],"form.":[110],"Moreover,":[111],"Layout":[112],"Fusion":[113],"Module":[114],"(LFM)":[115],"Object-aware":[117],"Cross":[118],"Attention":[119],"(OaCA)":[120],"are":[121],"proposed":[122],"relationship":[126],"among":[127],"objects":[129],"designed":[131],"be":[133],"object-aware":[134],"position-sensitive,":[136],"allowing":[137],"for":[138],"precisely":[139],"controlling":[140],"spatial":[142],"related":[143],"information.":[144],"Extensive":[145],"experiments":[146],"show":[147],"our":[149],"outperforms":[151],"SOTA":[154],"methods":[155],"on":[156,163,168],"FID,":[157],"CAS":[158],"by":[159],"relatively":[160],"46.35%,":[161],"26.70%":[162],"COCO-stuff":[164],"44.29%,":[166],"41.82%":[167],"VG.":[169],"Code":[170],"is":[171],"available":[172],"at":[173],"https://github.com/ZGCTroy/LayoutDiffusion.":[174]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4361865933","counts_by_year":[],"updated_date":"2025-01-08T22:11:14.789417","created_date":"2023-04-05"}